In [None]:
# Magic functions -- Run Once
%load_ext autoreload
%autoreload 2
%matplotlib notebook

import os
if os.getcwd().split('/')[-1] != "film-aqa":
    # Move up one folder to reach the repo root
    %cd ..

from utils.notebook.generic import full_width_notebook

full_width_notebook()

In [None]:
# Paths, Imports & Configs
import re
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from copy import deepcopy

from utils.notebook.experiment_explorer import get_experiments, get_format_dicts
from utils.notebook.pandas import color_by_multi_attribute
from utils.notebook.pandas import sub_cols_with_cond_and_create_new_col, grouped_scatter, groupby_mean, convert_cols_to_int
from utils.notebook.result_analysis import show_table

root_data_path = "data"
root_output_path = "output_synced/training"
experiment_start_date = '2021-10-21'


# Retrieve all experiments infos
experiments = get_experiments(root_output_path, min_date=experiment_start_date, question_type_analysis=True)

all_random_seeds = {189369, 876944, 682421, 175326, 427438}

# Per family columns
family_order = ['instrument', 'note', 'brightness', 'loudness', 'boolean', 'exist', 'position', 'position_global', 'position_rel', 'count', 'count_compare', 'count_diff']
reg = r'(_(?:with_rel_)?(?:no_rel_)?(?:no_or_)?(?:with_or_)?(?:with_.*_)?)test_acc'

global_test_acc_cols = [c for c in experiments.columns if 'all' in c and 'train' not in c and 'val' not in c or c == 'test_acc']
all_families_test_acc_cols = [c for c in experiments.columns if 'test_acc' in c and c != 'test_acc' and "cogent" not in c and 'all' not in c]
no_rel_family_test_acc_cols = sorted([c for c in all_families_test_acc_cols if 'no_rel_test_acc' in c], key=lambda x: family_order.index(re.sub(reg, '', x)))
no_rel_with_filter_family_test_acc_cols = sorted([c for c in all_families_test_acc_cols if 'no_rel_with' in c], key=lambda x: family_order.index(re.sub(reg, '', x)))
with_rel_family_test_acc_cols = sorted([c for c in all_families_test_acc_cols if 'with_rel' in c], key=lambda x: family_order.index(re.sub(reg, '', x)))
no_or_family_test_acc_cols = sorted([c for c in all_families_test_acc_cols if 'no_or' in c], key=lambda x: family_order.index(re.sub(reg, '', x)))
with_or_family_test_acc_cols = sorted([c for c in all_families_test_acc_cols if 'with_or' in c], key=lambda x: family_order.index(re.sub(reg, '', x)))
family_test_acc_cols = set(all_families_test_acc_cols) - set(no_rel_family_test_acc_cols) - set(with_rel_family_test_acc_cols) - set(no_rel_with_filter_family_test_acc_cols) - set(no_or_family_test_acc_cols) - set(with_or_family_test_acc_cols)
family_test_acc_cols = sorted(family_test_acc_cols, key=lambda x: family_order.index(re.sub(reg, '', x)))

# Pretty printing
format_dict, latex_format_dict = get_format_dicts()

pd.set_option('display.max_colwidth', None)

sorted(experiments.columns.values)



In [None]:
# Generic table parameters
display_all_exp = False
nb_results_to_keep = None
remove_outliers = False
show_missing_seeds = True
show_count_col = True
show_std = True and nb_results_to_keep is not None and nb_results_to_keep > 1

In [None]:
cols = ['train_acc', 'test_acc', 'random_seed']
cols = ['extractor_type', 'hop_length', 'n_mels', 'queue', 'config', 'random_seed', 'test_acc', 'n_mels', 'nb_trainable_param', 'date', 'train_time', 'mean_epoch_time', 'nb_epoch_trained', 'gpu_name', 'device', 'note']
exp = experiments.sort_values('date', ascending=False)
#exp = exp[exp['config'].str.contains('config_fix')]
#exp.sort_values('config')[['config'] + cols]
#exp.groupby('config').mean()[cols]
exp[exp['prefix']=='DAQA'][cols]#.style.format(format_dict)

## NAAQA on DAQA Dataset

In [None]:
def get_table_extractor_type_mask(df, start_date):
    # Table 3 - Extractor types - Per question type analysis
    # Static parameters :
        # G = 4096
        # J = 3
        # M = 64
        # ClassifierTopology = FCN
        # C = 512
        # H = 1024

    # Variable parameters :
        # Extractors Type = {Parallel, Interleaved, Resnet}
    # -- Input parameters
    filters = (df['prefix'] == 'DAQA')
    filters &= (df['date'] >= start_date)

    # -- Text Processing
    filters &= (df['rnn_state_size'] == 512)              # G

    # -- Coordconv
    filters &= (df['extractor_spatial_location'] == 'None')
    filters &= (df['stem_spatial_location'] == 'Both')
    filters &= (df['resblock_spatial_location'] == 'Both')
    filters &= (df['classifier_spatial_location'] == 'Both')

    # -- Resblocks
    filters &= (df['nb_resblock'] == 4)                    # J
    filters &= (df['resblocks_out_chan'] == 64)           # M

    # -- Classifier
    filters &= (df['classifier_conv_out'] == 256)          # C
    filters &= (df['classifier_projection_out'] == 1024)    # H
    
    # -- Extractor
    #filters &= (df['extractor_type'].str.contains('Interleaved_Time_First'))
    
    # -- Other
    filters &= (df['reduce_lr_on_plateau'] == True)

    # Variable Parameters (hop_length, n_fft, n_mels)

    #input_1d_filter = (df['input_type'].str.contains('1D'))
    #input_1d_filter &= (df['n_fft'] == 512)
    #input_1d_filter &= (df['hop_length'] == 2048)
    #input_1d_filter &= (df['resized_width'].isnull())
    
    return filters

filters = get_table_extractor_type_mask(experiments, experiment_start_date)

hardcoded_columns = {'extractor': {
    'type': 'replace_groupby',
    'values': [
        'Interleaved Time (Fig \ref{fig:interlaced_extractor})',
        'Parallel (Fig \ref{fig:parallel_extractor})', 
        '2D Conv',
        'Resnet101 (Baseline)', 
        'Interleaved Freq (Fig \ref{fig:interlaced_extractor})',
        'Resnet101 - Resized 224', 
    ]
}}

grouped_df = show_table(df=experiments,
           filters=filters,
           groupby_columns = ['n_fft', 'hop_length', 'n_mels'],
           acc_columns = ['test_acc'],
           extra_columns = ['extractor_type', 'nb_trainable_param', 'train_time'],
           #attribute_by_color = {c: 'CMRmap' for c in family_test_acc_cols},
           #hardcoded_cols= hardcoded_columns,
           display_all=display_all_exp or True,
           show_count_col=show_count_col or True,
           format_dict=latex_format_dict,
           inplace_std=show_std,
           #remove_outliers=remove_outliers,
           #nb_to_keep=100,#nb_results_to_keep,
           #all_seeds=all_random_seeds if show_missing_seeds else None
          )