# History Summary

In [1]:
import os
import torch
import numpy as np
import pandas as pd

### Get all History File Paths

In [3]:
output_dir = './summary/'
history_dir = './history/'

versions, datasets, fx_models, clf_models, runs, paths = [], [], [], [], [], []

for version in os.listdir(history_dir):
    version_dir = os.path.join(history_dir,version)
    for dataset in os.listdir(version_dir):
        dataset_dir = os.path.join(version_dir,dataset)
        for model in os.listdir(dataset_dir):
            fx_model = '_'.join(model.split('_')[:-1])
            clf_model = model.split('_')[-1]
            model_dir = os.path.join(dataset_dir,model)
            for run in os.listdir(model_dir):
                path = os.path.join(model_dir,run,'history.pt')
                if not os.path.isfile(path): continue
                versions.append(version)
                datasets.append(dataset)
                fx_models.append(fx_model)
                clf_models.append(clf_model)
                runs.append(run)
                paths.append(path)

history_files = pd.DataFrame.from_dict(dict(version=versions,dataset=datasets,fx_model=fx_models,clf_model=clf_models,run=runs,path=paths))
history_files.describe()

Unnamed: 0,version,dataset,fx_model,clf_model,run,path
count,420,420,420,420,420,420
unique,2,7,6,2,5,420
top,v1.2,RAVDESS,WAV2VEC2_BASE,DENSE,Run1,./history/v1.2/RAVDESS/WAV2VEC2_BASE_DENSE/Run...
freq,210,60,70,210,84,1


### Get by Classifier

In [4]:
dense_history_files = history_files[history_files.clf_model=='DENSE']
probing_history_files = history_files[history_files.clf_model=='PROBING']

#### DENSE

In [5]:
mean_accs=[]
std_accs = []
models = []
datasets =[]
for model in dense_history_files.fx_model.unique():
    for dataset in dense_history_files.dataset.unique():
        run_df = dense_history_files[(dense_history_files.dataset==dataset) & (dense_history_files.fx_model==model)]
        test_accs = []
        for run in range(run_df.shape[0]):
            history = torch.load(run_df.iloc[run].path)
            test_accs.append(history['test_acc'][history['val_acc'].index(max(history['val_acc']))])
        mean_accs.append(np.mean(test_accs))
        std_accs.append(np.std(test_accs))
        models.append(model)
        datasets.append(dataset)

dense_summary = pd.DataFrame.from_dict(dict(dataset=datasets,model=models,mean_accuracy=mean_accs,std_accuracy=std_accs))
dense_summary.to_csv(os.path.join(output_dir,'dense_summary.csv'))

#### PROBING

In [6]:
mean_accs=[]
std_accs = []
models = []
datasets =[]
for model in probing_history_files.fx_model.unique():
    for dataset in probing_history_files.dataset.unique():
        run_df = probing_history_files[(probing_history_files.dataset==dataset) & (probing_history_files.fx_model==model)]
        test_accs = []
        for run in range(run_df.shape[0]):
            history = torch.load(run_df.iloc[run].path)
            hs_idx = np.argmax(history['val_acc'],axis=0)
            ts_acc = np.array(history['test_acc']).transpose()
            test_accs.append([acc[idx] for acc, idx in zip (ts_acc,hs_idx)])

        mean_accs.append(np.mean(test_accs,axis=0))
        std_accs.append(np.std(test_accs,axis=0))
        models.append(model)
        datasets.append(dataset)

probing_summary = pd.DataFrame.from_dict(dict(dataset=datasets,model=models,mean_accuracy=mean_accs,std_accuracy=std_accs))
probing_summary.to_csv(os.path.join(output_dir,'probing_summary.csv'))