In [None]:
# Magic functions -- Run Once
%load_ext autoreload
%autoreload 2
%matplotlib notebook

import os
if os.getcwd().split('/')[-1] != "film-aqa":
    # Move up one folder to reach the repo root
    %cd ..

from utils.notebook.generic import full_width_notebook

full_width_notebook()

In [None]:
# Paths, Imports & Configs
import re
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from copy import deepcopy

from utils.notebook.experiment_explorer import get_experiments, get_format_dicts
from utils.notebook.pandas import color_by_multi_attribute
from utils.notebook.pandas import sub_cols_with_cond_and_create_new_col, grouped_scatter, groupby_mean, convert_cols_to_int
from utils.notebook.result_analysis import show_table

root_data_path = "data"
root_output_path = "output_synced/training"

# Retrieve all experiments infos
experiments = get_experiments(root_output_path, min_date="2020-09-15_00h00", question_type_analysis=True)

all_random_seeds = {189369, 876944, 682421, 175326, 427438}

# Per family columns
family_order = ['instrument', 'note', 'brightness', 'loudness', 'boolean', 'exist', 'position', 'position_global', 'position_rel', 'count', 'count_compare', 'count_diff']
reg = r'(_(?:with_rel_)?(?:no_rel_)?(?:no_or_)?(?:with_or_)?(?:with_.*_)?)test_acc'

global_test_acc_cols = [c for c in experiments.columns if 'all' in c and 'train' not in c and 'val' not in c or c == 'test_acc']
all_families_test_acc_cols = [c for c in experiments.columns if 'test_acc' in c and c != 'test_acc' and 'all' not in c]
no_rel_family_test_acc_cols = sorted([c for c in all_families_test_acc_cols if 'no_rel_test_acc' in c], key=lambda x: family_order.index(re.sub(reg, '', x)))
no_rel_with_filter_family_test_acc_cols = sorted([c for c in all_families_test_acc_cols if 'no_rel_with' in c], key=lambda x: family_order.index(re.sub(reg, '', x)))
with_rel_family_test_acc_cols = sorted([c for c in all_families_test_acc_cols if 'with_rel' in c], key=lambda x: family_order.index(re.sub(reg, '', x)))
no_or_family_test_acc_cols = sorted([c for c in all_families_test_acc_cols if 'no_or' in c], key=lambda x: family_order.index(re.sub(reg, '', x)))
with_or_family_test_acc_cols = sorted([c for c in all_families_test_acc_cols if 'with_or' in c], key=lambda x: family_order.index(re.sub(reg, '', x)))
family_test_acc_cols = set(all_families_test_acc_cols) - set(no_rel_family_test_acc_cols) - set(with_rel_family_test_acc_cols) - set(no_rel_with_filter_family_test_acc_cols) - set(no_or_family_test_acc_cols) - set(with_or_family_test_acc_cols)
family_test_acc_cols = sorted(family_test_acc_cols, key=lambda x: family_order.index(re.sub(reg, '', x)))

# Pretty printing
format_dict, latex_format_dict = get_format_dicts()

pd.set_option('display.max_colwidth', None)

sorted(experiments.columns.values)



In [None]:
# Most recent Individual experiments

cols = ['config', 'random_seed', 'test_acc', 'n_mels', 'nb_trainable_param', 'date', 'train_time', 'mean_epoch_time', 'nb_epoch_trained', 'gpu_name', 'device', 'note']
exp = experiments.sort_values('date', ascending=False)
exp[cols].style.format(format_dict)


In [None]:
# Experiments grouped by mel

filters = (experiments['date'] >= '2021-10-21')
filters &= (experiments['n_mels'].notnull())

grouped_df = show_table(df=experiments,
           filters=filters,
           groupby_columns = ['extractor_type', 'n_mels'],
           acc_columns = ['test_acc', *family_test_acc_cols],
           extra_columns = ['nb_trainable_param', 'gpu_name'],
           attribute_by_color = {c: 'CMRmap' for c in family_test_acc_cols},
           #hardcoded_cols= hardcoded_columns,
           display_all=False,
           show_count_col=False,
           format_dict=latex_format_dict,
           inplace_std=False,
           #remove_outliers=remove_outliers,
           #nb_to_keep=1,#nb_results_to_keep,
           #all_seeds=all_random_seeds if show_missing_seeds else None
          )