# Import Data

In [None]:
import os
import researchpy as rp
import pandas as pd
import pickle

import statsmodels.api as sm
from statsmodels.formula.api import ols

dir = os.scandir('data/')
experiments = list(filter(lambda x: x.name.endswith(".pickle"), dir))

df = pd.DataFrame()

for experiment in experiments:
    dffile = open(experiment.path, 'rb')      
    read_df = pickle.load(dffile)
    read_df = read_df[['max_epochs', 'executor_memory', 'executor_cores', 'model', 'accuracy', 'time']]
    df = pd.concat([df, read_df])

        
acc_df = pd.DataFrame({'y': df["accuracy"],
                       'epochs': df["max_epochs"],
                       'memory': df["executor_memory"],
                       'cores': df["executor_cores"],
                       'model': df["model"]})

time_df = pd.DataFrame({'y': df["time"],
                        'epochs': df["max_epochs"],
                        'memory': df["executor_memory"],
                        'cores': df["executor_cores"],
                        'model': df["model"]})

print(f"Imported {len(experiments)} experiments!")

## DataFrame Summaries

In [None]:
summary_acc_df = rp.summary_cont(acc_df.groupby(['model', 'epochs', 'cores', 'memory']))['y']
summary_acc_df

In [None]:
summary_time_df = rp.summary_cont(time_df.groupby(['model', 'epochs', 'cores', 'memory']))['y']
summary_time_df

## ANOVA analysis

In [None]:
def anova_analysis(input_df):
    model = ols('y ~ C(model)*C(epochs)*C(cores)*C(memory)', input_df).fit()
    
    res = sm.stats.anova_lm(model, typ=2)
    res['PR(>F) < 0.05'] = res['PR(>F)'] < 0.05
    return res

In [None]:
acc_res = anova_analysis(acc_df)
print("Accuracy ANOVA Analysis")
acc_res

In [None]:
time_res = anova_analysis(time_df)
print("Time ANOVA Analysis")
time_res