# Analyze experiments

Analyses the results achieved on a single experiment.

In [None]:
import pandas as pd
import numpy as np
import json
import os
from openpyxl import load_workbook

In [None]:
legend = pd.read_csv('../data/results/legend.csv', sep=';')

In [None]:
experiments = [
    # all features
    '../tests/test1/cv-PT',
    '../tests/test1/train-PT-test-US',
    # reduced features
    '../tests/test1/cv-PT-reduced-13',
    '../tests/test1/train-PT-test-US-reduced-13',    
    ]

In [None]:
output_file = '../data/results/results.xlsx'

In [None]:
def write_experiment(experiment_path, writer, dataframes):
    test_file = experiment_path.split('tests/')[1].replace('/', '-')
    results = {}
    if os.path.exists(experiment_path):
        for run_number, run in enumerate(os.listdir(experiment_path)):
            try:
                with open(os.path.join(experiment_path, run, 'results.json')) as f:
                    results[run_number] = json.load(f)
            except:
                pass
        results_models = list(results[run_number].keys())
        results_metrics = list(results[run_number][results_models[0]].keys())
        # initialize data structure
        src_data = {}
        data = {}
        for model in results_models:
            aux = {}
            for metric in results_metrics:
                aux[metric] = []
            src_data[model] = aux
            aux = {}
            for metric in results_metrics:
                aux[metric] = []
            data[model] = aux
        # extract data
        for run in results:
            for model in results_models:
                for metric in results_metrics:
                    src_data[model][metric] += results[run][model][metric]
        # calculate mean and std
        for model in results_models:
            for metric in results_metrics:
                data[model][metric] = np.mean(src_data[model][metric])
                data[model][metric + '_std'] = np.std(src_data[model][metric])
        df = pd.DataFrame(data).T
        df.reset_index(inplace=True, names='model')
        df.to_csv(f'../data/results/{test_file}.csv', index=False)
        df.to_excel(writer, sheet_name=test_file, index=False)
        df.insert(0, 'experiment', test_file)
        dataframes.append(df)
    else:
        print(f'No results for {test_file}')

In [None]:
dataframes = []

In [None]:
with pd.ExcelWriter(output_file) as writer:
    legend.to_excel(writer, sheet_name='Legend', index=False)
    for experiment in experiments:
        write_experiment(experiment, writer, dataframes)
    geral_df = pd.concat(dataframes, ignore_index=True)
    geral_df.to_csv('../data/results/geral.csv', index=False)
    # insert at the beginning of the sheets
    geral_df.to_excel(writer, sheet_name='Geral', index=False, )

In [None]:
wb = load_workbook(output_file)
sheetnames = wb.sheetnames
wb._sheets = [wb['Legend'], wb['Geral']] + [wb[sheet] for sheet in sheetnames if sheet not in ['Legend', 'Geral']]
wb.save(output_file)