# Calculate average results

This notebooks used the results from the experiments and calculate the average results per run.

In [1]:
import os
import shutil
import json
import pandas as pd
import numpy as np
from scipy import stats

In [None]:
save_path = os.path.join('..', '..', 'data', 'results-csv')
if not os.path.exists(save_path):
    os.makedirs(save_path)

In [None]:
datasets = ['Amazon_employee_access', 'Australian', 'adult', 'bank-marketing', 'cnae-9', 'credit-g', 'mfeat-factors']
frameworks = ['edca-1-0-0', 'flaml', 'tpot']

In [None]:
for dataset in datasets:
    p = os.path.join(save_path, dataset)
    if not os.path.exists(p):
        os.makedirs(p)

In [None]:
for dataset in datasets:
    for framework in frameworks:
        source_path = os.path.join('..', '..', 'thesis-results', 'datasets-divided', framework, dataset)
        if not os.path.exists(source_path):
            continue
        values = []
        exps = [exp for exp in os.listdir(source_path) if exp.startswith('run')]
        for run, exp in enumerate(sorted(exps)):
            with open(os.path.join(source_path, exp, 'results.json')) as f:
                data = json.load(f)
            aux = {}
            for key, value in data.items():
                if isinstance(value, list) and (isinstance(value[0], float) or isinstance(value[0], int)):
                    aux[key] = [np.nanmean(np.array(value, dtype=np.float32))]
            
            values.append(pd.DataFrame(aux))
        df = pd.concat(values)
        df.to_csv(os.path.join(save_path,  dataset, f'{framework}-results.csv'), index=False)