In [None]:
# Magic functions -- Run Once
%load_ext autoreload
%autoreload 2
%matplotlib notebook

import os
if os.getcwd().split('/')[-1] != "film-aqa":
    # Move up one folder to reach the repo root
    %cd ..

from utils.notebook.generic import full_width_notebook

full_width_notebook()

In [None]:
# Paths, Imports & Configs
import re
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from copy import deepcopy

from utils.notebook.experiment_explorer import get_experiments, get_format_dicts
from utils.notebook.pandas import color_by_multi_attribute
from utils.notebook.pandas import sub_cols_with_cond_and_create_new_col, grouped_scatter, groupby_mean, convert_cols_to_int
from utils.notebook.result_analysis import show_table

root_data_path = "data"
root_output_path = "output_synced/training"
experiment_start_date = '2021-10-21'
experiment_start_date = '2022-01-01'
#experiment_start_date = None

data_folder="data/CLEAR_50k_4_inst_audio"
data_folder="data/CLEAR_FINAL_50k_4_inst_audio"
# Retrieve all experiments infos
experiments = get_experiments(root_output_path, data_folder=data_folder, min_date=experiment_start_date, question_type_analysis=True, cogent_analysis=True)

all_random_seeds = {189369, 876944, 682421, 175326, 427438}

# Per family columns
family_order = ['instrument', 'note', 'brightness', 'loudness', 'boolean', 'exist', 'position', 'position_global', 'position_rel', 'count', 'count_compare', 'count_diff']
reg = r'(_(?:with_rel_)?(?:no_rel_)?(?:no_or_)?(?:with_or_)?(?:with_.*_)?)test_acc'

global_test_acc_cols = [c for c in experiments.columns if 'all' in c and 'train' not in c and 'val' not in c or c == 'test_acc']
all_families_test_acc_cols = [c for c in experiments.columns if 'test_acc' in c and c != 'test_acc' and 'all' not in c and 'cogent' not in c]
no_rel_family_test_acc_cols = sorted([c for c in all_families_test_acc_cols if 'no_rel_test_acc' in c], key=lambda x: family_order.index(re.sub(reg, '', x)))
no_rel_with_filter_family_test_acc_cols = sorted([c for c in all_families_test_acc_cols if 'no_rel_with' in c], key=lambda x: family_order.index(re.sub(reg, '', x)))
with_rel_family_test_acc_cols = sorted([c for c in all_families_test_acc_cols if 'with_rel' in c], key=lambda x: family_order.index(re.sub(reg, '', x)))
no_or_family_test_acc_cols = sorted([c for c in all_families_test_acc_cols if 'no_or' in c], key=lambda x: family_order.index(re.sub(reg, '', x)))
with_or_family_test_acc_cols = sorted([c for c in all_families_test_acc_cols if 'with_or' in c], key=lambda x: family_order.index(re.sub(reg, '', x)))
family_test_acc_cols = set(all_families_test_acc_cols) - set(no_rel_family_test_acc_cols) - set(with_rel_family_test_acc_cols) - set(no_rel_with_filter_family_test_acc_cols) - set(no_or_family_test_acc_cols) - set(with_or_family_test_acc_cols)
family_test_acc_cols = sorted(family_test_acc_cols, key=lambda x: family_order.index(re.sub(reg, '', x)))

# Pretty printing
format_dict, latex_format_dict = get_format_dicts()

pd.set_option('display.max_colwidth', None)

sorted(experiments.columns.values)



In [None]:
# Generic table parameters
display_all_exp = False
nb_results_to_keep = None
remove_outliers = False
show_missing_seeds = True
show_count_col = True
show_std = True and nb_results_to_keep is not None and nb_results_to_keep > 1

In [None]:
cols = ['train_acc', 'test_acc', 'random_seed']
cols = ['prefix', 'malimo', 'hop_length', 'extractor_type', 'config', 'random_seed', 'test_acc', 'cogent_test_acc', 'n_mels', 'nb_trainable_param', 'date', 'train_time', 'mean_epoch_time', 'nb_epoch_trained', 'gpu_name', 'device', 'folder_dated']
cols = ['prefix', 'malimo', 'hop_length', 'extractor_type', 'cogent_test_acc', 'nb_trainable_param', 'config', 'date']
exp = experiments.sort_values('date', ascending=False)
#exp = exp[exp['config'].str.contains('config_fix')]
#exp.sort_values('config')[['config'] + cols]
#exp.groupby('config').mean()[cols]
exp[cols].style.format(format_dict)

#exp['folder_dated'].tolist()

In [None]:
colss = ['config', 'extractor_type', 'input_type', 'cogent_test_acc', 'date']

exp[(exp['prefix'].str.contains('CLEAR_FINAL')) & (exp['extractor_type'].str.contains('Conv_2d')) & (exp['date'] >=experiment_start_date)][colss]

In [None]:
t = experiments

t[(t['prefix'].str.contains('CLEAR_FINAL')) & (t['config'].str.contains("table_4"))].sort_values('date', ascending=False)[['config', 'n_mels']]

In [None]:
family_test_acc_cols

## Table 3

In [None]:
# Table 3 - Extractor types - Per question type analysis

def get_experiments_filters(df, start_date, dataset="CLEAR_FINAL", g=256, j=4, m=128, malimo=False, n_mels=64, extractors=None, config_filter=None):
    # Table 3 - Extractor types - Per question type analysis
    # Static parameters :
        # G = 4096
        # J = 3
        # M = 64
        # ClassifierTopology = FCNisin
        # C = 512
        # H = 1024

    # Variable parameters :
        # Extractors Type = {Parallel, Interleaved, Resnet}
    # -- Input parameters
    
    if extractors is None:
        extractors = []
    
    filters = (df['prefix'].str.contains(dataset))
    filters &= (df['date'] >= start_date)
    filters &= (df['n_mels'] == n_mels)

    # -- Text Processing
    filters &= (df['rnn_state_size'] == g)              # G

    # -- Coordconv
    filters &= (df['extractor_spatial_location'] == 'None')
    filters &= (df['stem_spatial_location'] == 'Both')
    filters &= (df['resblock_spatial_location'] == 'Both')
    filters &= (df['classifier_spatial_location'] == 'Both')

    # -- Resblocks
    filters &= (df['nb_resblock'] == j)                    # J
    filters &= (df['resblocks_out_chan'] == m)           # M

    # -- Classifier
    filters &= (df['classifier_conv_out'] == 512)          # C
    filters &= (df['classifier_projection_out'] == 1024)    # H
    
    # -- Other
    filters &= (df['reduce_lr_on_plateau'] == True)
    filters &= (df['malimo'] == malimo)

    # Variable Parameters (Input_type & Extractor)

    input_1d_filter = (df['input_type'].str.contains('1D'))
    input_1d_filter &= (df['n_fft'] == 512)
    input_1d_filter &= (df['hop_length'] == 2048)
    
    resnet_extractor_filter = ( (df['extractor_type'].str.contains('Resnet')) &(df['spectrogram_repeat_channels'] == True) )
    parallel_extractor_filters = ( (df['extractor_type'].str.contains('Parallel')) & input_1d_filter)
    interleaved_extractor_filters = ( (df['extractor_type'].str.contains('Interleaved')) & input_1d_filter)
    conv_2d_extractor_filters = ( df['extractor_type'].str.contains('Conv_2d') & input_1d_filter & ~df['config'].str.contains('resnet'))
    
    extractors = [e.lower() for e in extractors]
    
    extractors_filter = None
    
    if 'parallel' in extractors and 'interleaved' in extractors and 'conv_2d' in extractors and 'resnet' in extractors:
        extractors_filter = ( parallel_extractor_filters | interleaved_extractor_filters | resnet_extractor_filter | conv_2d_extractor_filters)
    elif 'parallel' in extractors and 'interleaved' in extractors and 'conv_2d' in extractors:
        extractors_filter = ( parallel_extractor_filters | interleaved_extractor_filters | conv_2d_extractor_filters)
    elif 'parallel' in extractors and 'interleaved' in extractors:
        extractors_filter = ( parallel_extractor_filters | interleaved_extractor_filters)
    elif 'parallel' in extractors and 'conv_2d' in extractors:
        extractors_filter = ( parallel_extractor_filters | conv_2d_extractor_filters)
    elif 'interleaved' in extractors and 'conv_2d' in extractors:
        extractors_filter = ( interleaved_extractor_filters | conv_2d_extractor_filters)
    elif 'conv_2d' in extractors:
        extractors_filter = ( conv_2d_extractor_filters)
    elif 'parallel' in extractors:
        extractors_filter = ( parallel_extractor_filters)
    elif 'interleaved' in extractors:
        extractors_filter = ( interleaved_extractor_filters)
    elif 'resnet' in extractors:
        extractors_filter = ( resnet_extractor_filter )
        
    if extractors_filter is not None:                 
        filters &= extractors_filter
        
    if config_filter is not None:
        filters &= (df[config_filter['key']].str.contains(config_filter['value']))
    
    return filters


extractors_results_filters = get_experiments_filters(experiments, experiment_start_date, g=4096, j=4, m=128, malimo=False, n_mels=64, extractors=None)



#table_3_filters = get_table_extractor_type_mask(experiments, experiment_start_date)

hardcoded_columns = {'extractor': {
    'type': 'replace_groupby',
    'values': [
        'Parallel (Fig \ref{fig:parallel_extractor})', 
        'Interleaved Time (Fig \ref{fig:interlaced_extractor})',
        '2D Conv',
        'Interleaved Freq (Fig \ref{fig:interlaced_extractor})',
        'Resnet101 (Baseline)'
    ]
}}

grouped_df = show_table(df=experiments,
           filters=extractors_results_filters,
           groupby_columns = ['extractor_type'],
           acc_columns = ['cogent_test_acc'],# *family_test_acc_cols],
           extra_columns = ['nb_trainable_param', 'malimo', 'config', 'queue', 'random_seed', 'date', 'gpu_name'],
           #attribute_by_color = {c: 'CMRmap' for c in family_test_acc_cols},
           hardcoded_cols= hardcoded_columns,
           display_all=display_all_exp or True,
           show_count_col=show_count_col,
           format_dict=latex_format_dict,
           inplace_std=show_std or True,
           remove_outliers=remove_outliers,
           #nb_to_keep=100,#nb_results_to_keep,
           all_seeds=all_random_seeds if show_missing_seeds else None
          )

## Table 3 -- Malimo

In [None]:
# Table 3 - Extractor types - Per question type analysis

def get_table_extractor_type_malimo_mask(df, start_date, n_mels=64):
    # Table 3 - Extractor types - Per question type analysis
    # Static parameters :
        # G = 4096
        # J = 3
        # M = 64
        # ClassifierTopology = FCNisin
        # C = 512
        # H = 1024

    # Variable parameters :
        # Extractors Type = {Parallel, Interleaved, Resnet}
    # -- Input parameters
    filters = (df['prefix'].str.contains('CLEAR_FINAL'))
    filters &= (df['date'] >= start_date)
    filters &= (df['n_mels'] == n_mels)
    filters &= (df['resized_width'].isnull())
    filters &= (~df['normalisation'].str.contains("imagenet_stats", na=False))

    # -- Text Processing
    filters &= (df['rnn_state_size'] == 4096)              # G

    # -- Coordconv
    filters &= (df['extractor_spatial_location'] == 'None')
    filters &= (df['stem_spatial_location'] == 'Both')
    filters &= (df['resblock_spatial_location'] == 'Both')
    filters &= (df['classifier_spatial_location'] == 'Both')

    # -- Resblocks
    filters &= (df['nb_resblock'] == 4)                    # J
    filters &= (df['resblocks_out_chan'] == 128)           # M

    # -- Classifier
    filters &= (df['classifier_conv_out'] == 512)          # C
    filters &= (df['classifier_projection_out'] == 1024)    # H
    
    # -- Other
    filters &= (df['reduce_lr_on_plateau'] == True)
    filters &= (df['malimo'] == True)

    # Variable Parameters (Input_type & Extractor)
    filters &= (~df['RGB_colormap'].str.contains('Blues', na=False))

    input_1d_filter = (df['input_type'].str.contains('1D'))
    input_1d_filter &= (df['n_fft'] == 512)
    input_1d_filter &= (df['hop_length'] == 2048)
    
    resnet_extractor_filter = ( (df['extractor_type'].str.contains('Resnet')) & (df['spectrogram_repeat_channels'] == True) )
    parallel_extractor_filters = ( (df['extractor_type'].str.contains('Parallel')) & input_1d_filter)# & df['config'].str.contains('table_2_parallel_extractor') )
    interleaved_extractor_filters = ( (df['extractor_type'].str.contains('Interleaved')) & input_1d_filter)# & df['config'].str.contains('table_2_interleaved_extractor') )
    
    conv_2d_extractor_filters = ( df['extractor_type'].str.contains('Conv_2d') & input_1d_filter & df['prefix'].str.contains("CLEAR_FINAL") & ~df['config'].str.contains('resnet'))
    
    extractors = ( parallel_extractor_filters | interleaved_extractor_filters | resnet_extractor_filter | conv_2d_extractor_filters)
                               
    filters &= extractors
    
    return filters

filters = get_table_extractor_type_malimo_mask(experiments, experiment_start_date)

hardcoded_columns = {'extractor': {
    'type': 'replace_groupby',
    'values': [
        'Parallel (Fig \ref{fig:parallel_extractor})', 
        'Interleaved Time (Fig \ref{fig:interlaced_extractor})',
        'Resnet101 (Baseline)', 
        'Interleaved Freq (Fig \ref{fig:interlaced_extractor})',
        '2D Conv'
    ]
}}

grouped_df = show_table(df=experiments,
           filters=filters,
           groupby_columns = ['extractor_type'],
           acc_columns = ['cogent_test_acc'],# *family_test_acc_cols],
           extra_columns = ['nb_trainable_param'],
           #attribute_by_color = {c: 'CMRmap' for c in family_test_acc_cols},
           #hardcoded_cols= hardcoded_columns,
           #display_all=display_all_exp or True,
           show_count_col=show_count_col,
           format_dict=latex_format_dict,
           inplace_std=show_std,
           remove_outliers=remove_outliers,
           #nb_to_keep=100,#nb_results_to_keep,
           all_seeds=all_random_seeds if show_missing_seeds else None
          )

## Table 4

In [None]:
# Table 4 - Coordinate maps placement - Interleaved

def get_table_coordconv_placement_filters(df, start_date, allow_multi_both=False, only_final=True, n_mels=64):
    prefixes = ['CLEAR', 'CLEAR_FINAL']
    if only_final:
        prefixes = prefixes[1:]
        
    filters = (df['prefix'].isin(prefixes))
    #filters &= (df['date'] >= start_date)
    
    # -- Input parameters
    filters &= (df['input_type'].str.contains('1D'))
    filters &= (df['n_fft'] == 512)
    filters &= (df['hop_length'] == 2048)
    filters &= (df['resized_width'].isnull())
    filters &= (df['n_mels'] == n_mels)

    # -- Text Processing
    filters &= (df['rnn_state_size'] == 4096)              # G

    # -- Extractor
    #filters &= (df['extractor_type'].str.contains('Interleaved_Time_First'))
    filters &= (df['extractor_type'].str.contains('Parallel'))
    #filters &= (df['extractor_nb_block'] == 3)             # K
    #filters &= (df['extractor_filters'] == [8, 16, 32])   # N
    #filters &= (df['extractor_projection_size'] == 64)     # P

    # -- Resblocks
    filters &= (df['nb_resblock'] == 4)                   # J       <---- FIXME: Make sure this is the good values
    filters &= (df['resblocks_out_chan'] == 128)           # M       <---- FIXME: Make sure this is the good values

    # -- Classifier
    filters &= (df['classifier_conv_out'] == 512)          # C      <---- FIXME: Make sure this is the good values
    filters &= (df['classifier_projection_out'] == 1024)    # H      <---- FIXME: Make sure this is the good values

    # Variable Parameters (CoordConv)
    filters &= (df['extractor_spatial_location'].str.contains('None|Time|Freq|Both'))
    filters &= (df['stem_spatial_location'].str.contains('None|Time|Freq|Both'))
    filters &= (df['resblock_spatial_location'].str.contains('None|Time|Freq|Both'))
    filters &= (df['classifier_spatial_location'].str.contains('None|Time|Freq|Both'))
    
    if not allow_multi_both:
        filters &= ~(df['resblock_spatial_location'].str.contains('Both') & df['classifier_spatial_location'].str.contains('Both'))

    # -- Other
    filters &= (df['reduce_lr_on_plateau'] == True)
    filters &= (df['malimo'] != True)
    
    return filters

# Table 7 - CoordConv placement
    # K = 3
    # N_0 = 8
    # P = 64
    # G = 1024
    # J = 3
    # M = 64
    # ClassifierTopology = FCN
    # C = 512
    # H = 1024
    
filters = get_table_coordconv_placement_filters(experiments, experiment_start_date, allow_multi_both=False)

grouped_df = show_table(df=experiments,
           filters=filters,
           groupby_columns = ['extractor_spatial_location', 'stem_spatial_location', 'resblock_spatial_location', 'classifier_spatial_location'],
           acc_columns = ['train_acc', 'best_val_acc', 'cogent_test_acc'],
           sort_by_col = 'cogent_test_acc',
           extra_columns = ['config', 'date'],
           #display_all=display_all_exp or True,
           show_count_col=show_count_col,
           format_dict=latex_format_dict,
           inplace_std=show_std or True,
           remove_outliers=remove_outliers,
           nb_to_keep=nb_results_to_keep,
           all_seeds=all_random_seeds if show_missing_seeds else None
          )

## Table 5

In [None]:
# Table 5 - Coordinate maps type - Interleaved -> Resblock

def get_per_q_type_coordconv_mask(df, coordconv_placement_mask, position):
    # Variable Parameters (CoordConv)
    coordconv_placement_mask &= df['extractor_spatial_location'].str.contains('None')
    
    if position == "Resblock":
        specific_configs = ((df['stem_spatial_location'].str.contains('None')) & (df['resblock_spatial_location'].str.contains('Time')) & (df['classifier_spatial_location'].str.contains('None')))
        specific_configs |= ((df['stem_spatial_location'].str.contains('None')) & (df['resblock_spatial_location'].str.contains('Freq')) & (df['classifier_spatial_location'].str.contains('None')))
        specific_configs |= ((df['stem_spatial_location'].str.contains('None')) & (df['resblock_spatial_location'].str.contains('Both')) & (df['classifier_spatial_location'].str.contains('None')))
        specific_configs |= ((df['stem_spatial_location'].str.contains('None')) & (df['resblock_spatial_location'].str.contains('None')) & (df['classifier_spatial_location'].str.contains('None')))
    elif position == "Stem":
        specific_configs = ((df['stem_spatial_location'].str.contains('Time')) & (df['resblock_spatial_location'].str.contains('None')) & (df['classifier_spatial_location'].str.contains('None')))
        specific_configs |= ((df['stem_spatial_location'].str.contains('Freq')) & (df['resblock_spatial_location'].str.contains('None')) & (df['classifier_spatial_location'].str.contains('None')))
        specific_configs |= ((df['stem_spatial_location'].str.contains('Both')) & (df['resblock_spatial_location'].str.contains('None')) & (df['classifier_spatial_location'].str.contains('None')))
        specific_configs |= ((df['stem_spatial_location'].str.contains('None')) & (df['resblock_spatial_location'].str.contains('None')) & (df['classifier_spatial_location'].str.contains('None')))
    elif position == "Classifier":
        specific_configs = ((df['stem_spatial_location'].str.contains('None')) & (df['resblock_spatial_location'].str.contains('None')) & (df['classifier_spatial_location'].str.contains('Time')))
        specific_configs |= ((df['stem_spatial_location'].str.contains('None')) & (df['resblock_spatial_location'].str.contains('None')) & (df['classifier_spatial_location'].str.contains('Freq')))
        specific_configs |= ((df['stem_spatial_location'].str.contains('None')) & (df['resblock_spatial_location'].str.contains('None')) & (df['classifier_spatial_location'].str.contains('Both')))
        specific_configs |= ((df['stem_spatial_location'].str.contains('None')) & (df['resblock_spatial_location'].str.contains('None')) & (df['classifier_spatial_location'].str.contains('None')))
    else:
        assert False, "Invalid position"
        
    coordconv_placement_mask &= specific_configs
    
    return coordconv_placement_mask


filters = get_table_coordconv_placement_filters(experiments, experiment_start_date, allow_multi_both=True)
filters = get_per_q_type_coordconv_mask(experiments, filters, position="Stem")

hardcoded_columns={'configuration':{
                   'type': 'replace_groupby',
                   'values': ["Time only", "Time \& Freq", "None", "Freq only"]
               }
           }

grouped_df = show_table(df=experiments,
           filters=filters,
           groupby_columns = ['extractor_spatial_location', 'stem_spatial_location', 'resblock_spatial_location', 'classifier_spatial_location'],
           acc_columns = ['test_acc', *family_test_acc_cols],
           attribute_by_color = {c: 'CMRmap' for c in family_test_acc_cols},
           hardcoded_cols= hardcoded_columns,
           display_all=display_all_exp,
           #show_count_col=show_count_col,
           format_dict=latex_format_dict,
           inplace_std=show_std or True,
           remove_outliers=remove_outliers,
           nb_to_keep=nb_results_to_keep,
           all_seeds=all_random_seeds if show_missing_seeds else None
          )


## Table 6

In [None]:
# Table 6 - GRU Reduction - Interleaved

def get_gru_reduction_df_mask(df, start_date, extractor_type="interleaved", only_final=True, n_mels=64):
    # Table 6 - GRU Reduction
    # Static Parameters :
        # Extractor = {extractor_type}
        # J = 4
        # M = 128
        # ClassifierTopology = FCN
        # C = 512
        # H = 1024
        # K = 3
        # N = [8, 16, 32]
        # P = 64
    # Variable Parameters: 
        # G={4096,2048, 1024, 512, 256}
    prefixes = ['CLEAR', 'CLEAR_FINAL']
    if only_final:
        prefixes = prefixes[1:]
        
    filters = (df['prefix'].isin(prefixes))
    filters &= (df['date'] >= start_date)

    # -- Input parameters
    filters &= (df['input_type'].str.contains('1D'))
    filters &= (df['n_fft'] == 512)
    filters &= (df['hop_length'] == 2048)
    filters &= (df['resized_width'].isnull())
    filters &= (df['n_mels'] == n_mels)

    # -- Coordconv
    filters &= (df['extractor_spatial_location'] == 'None')
    filters &= (df['stem_spatial_location'] == 'Both')
    filters &= (df['resblock_spatial_location'] == 'Both')
    filters &= (df['classifier_spatial_location'] == 'Both')

    # -- Extractor
    extractor_type = 'Interleaved_Time_First' if extractor_type == "interleaved" else 'Parallel'
        
    #filters &= (df['extractor_type'].str.contains('Interleaved_Time_First'))
    filters &= (df['extractor_type'].str.contains(extractor_type))
    filters &= (experiments['extractor_nb_block'] == 3)             # K
    #filters &= (experiments['extractor_filters'] == [8, 16, 32])   # N
    filters &= (experiments['extractor_projection_size'] == 64)     # P      <---- FIXME : Might want to try with 32

    # -- Resblocks
    filters &= (df['nb_resblock'] == 4)                    # J
    filters &= (df['resblocks_out_chan'] == 128)           # M

    # -- Classifier
    filters &= (df['classifier_conv_out'] == 512)           # C 
    filters &= (df['classifier_projection_out'] == 1024)    # H

    # Variable Parameters (Text-Processing GRU units)
    filters &= (df['rnn_state_size'].isin([4096, 2048, 1024, 512, 256])) # G
    
    # -- Other
    filters &= (df['reduce_lr_on_plateau'] == True)
    filters &= ((df['config'].str.contains('table_6')) | (df['config'].str.contains('table_3')))
    filters &= (df['malimo'] != True)
    
    return filters


extractor_type = "interleaved"
extractor_type = "parallel"
filters = get_gru_reduction_df_mask(experiments, experiment_start_date, extractor_type=extractor_type)

grouped_df = show_table(df=experiments,
           filters=filters,
           groupby_columns = ['rnn_state_size'],
           acc_columns = ['train_acc', 'best_val_acc', 'cogent_test_acc'],
           extra_columns = ['nb_trainable_param_million'],#, 'reduce_lr_on_plateau'],
           sort_by_col= 'cogent_test_acc',
           display_all=display_all_exp or True,
           #show_count_col=show_count_col,
           format_dict=latex_format_dict,
           inplace_std=show_std or True,
           remove_outliers=remove_outliers,
           nb_to_keep=2,#nb_results_to_keep,
           all_seeds=all_random_seeds if show_missing_seeds else None
          )

## Table 7

In [None]:
# Table 7 - Classifier Reduction - Interleaved

def get_classifier_reduction_df_mask(df, start_date, nb_gru=512, extractor_type='interleaved', only_final=True, n_mels=64):
    # Table 7 - Classifier Topologies
    # Static Parameters :
        # Extractor = {extractor_type}
        # G = 1024
        # J = 4
        # M = 128
        # K = 3
        # N = [8, 16, 32]
        # P = 64
    # Variable Parameters :
        # Classifier Topology = {Fcn, Conv}
        # C = {512, 256, 128, None}
        # H = {1024, 512, 256, None}
        
    prefixes = ['CLEAR', 'CLEAR_FINAL']
    if only_final:
        prefixes = prefixes[1:]
    filters = (df['prefix'].isin(prefixes))
    filters &= (df['date'] >= start_date)

    # -- Input parameters
    filters &= (df['input_type'].str.contains('1D'))
    filters &= (df['n_fft'] == 512)
    filters &= (df['hop_length'] == 2048)
    filters &= (df['resized_width'].isnull())
    filters &= (df['n_mels'] == n_mels)

    # -- Text Processing
    filters &= (df['rnn_state_size'] == nb_gru)              # G

    # -- Coordconv
    filters &= (df['extractor_spatial_location'] == 'None')
    filters &= (df['stem_spatial_location'] == 'Both')
    filters &= (df['resblock_spatial_location'] == 'Both')
    filters &= (df['classifier_spatial_location'] == 'Both')

    # -- Extractor
    extractor_type = 'Interleaved_Time_First' if extractor_type == "interleaved" else 'Parallel'
        
    filters &= (df['extractor_type'].str.contains(extractor_type))
    #filters &= (experiments['extractor_nb_block'] == 3)             # K
    #filters &= (experiments['extractor_filters'] == [8, 16, 32])   # N
    #filters &= (experiments['extractor_projection_size'] == 64)     # P      <---- FIXME : Might want to try with 32

    # -- Resblocks
    filters &= (df['nb_resblock'] == 4)                    # J
    filters &= (df['resblocks_out_chan'] == 128)           # M

    # Variable Parameters (Classifier Topologies)
    filters &= (df['classifier_type'].str.contains('fcn'))
    filters &= (df['classifier_conv_out'].isin([512, 256, 128]))          # C
    filters &= (df['classifier_projection_out'].isin([1024, 512, 256]))   # H
    
    # -- Other
    filters &= (df['reduce_lr_on_plateau'] == True)
    
    return filters

# -----------------------------------------------------------------------------------------------------

extractor_type = "interleaved"
extractor_type = "parallel"
filters = get_classifier_reduction_df_mask(experiments, experiment_start_date, nb_gru=512, extractor_type=extractor_type)
grouped_df = show_table(df=experiments,
           filters=filters,
           groupby_columns = ['rnn_state_size', 'classifier_conv_out', 'classifier_projection_out'],
           acc_columns = ['train_acc', 'best_val_acc', 'cogent_test_acc'],
           extra_columns = ['nb_trainable_param_million', 'queue'],#, 'reduce_lr_on_plateau'],
           sort_by_col='cogent_test_acc',
           display_all=display_all_exp,
           show_count_col=show_count_col,
           format_dict=latex_format_dict,
           inplace_std=show_std or True,
           remove_outliers=remove_outliers,
           nb_to_keep=nb_results_to_keep,
           all_seeds=all_random_seeds if show_missing_seeds else None
          )

## Table 8

In [None]:
# Table 8 - Resblock Reduction - Interleaved
def get_resblock_reduction_df_mask(df, start_date, extractor_type, nb_gru=1024, c=256, h=512, only_final=True, n_mels=64):
    # Table 8 - Resblocks
    # Static parameters
        # Extractor = {extractor_type}
        # K = 3
        # N = [8, 16, 32]
        # P = 64
        # ClassifierTopology = FCN
        # C = 256
        # H = 512
    # Variable Parameters
        # J = {4, 3, 2, 1}
        # M = {128, 64, 32}

    prefixes = ['CLEAR', 'CLEAR_FINAL']
    if only_final:
        prefixes = prefixes[1:]
    filters = (df['prefix'].isin(prefixes))
    filters &= (df['date'] >= start_date)

    # -- Input parameters
    filters &= (df['input_type'].str.contains('1D'))
    filters &= (df['n_fft'] == 512)
    filters &= (df['hop_length'] == 2048)
    filters &= (df['resized_width'].isnull())
    filters &= (df['n_mels'] == n_mels)

    # -- Text Processing
    filters &= (experiments['rnn_state_size'] == nb_gru)              # G

    # -- Coordconv
    filters &= (df['extractor_spatial_location'] == 'None')
    filters &= (df['stem_spatial_location'] == 'Both')
    filters &= (df['resblock_spatial_location'] == 'Both')
    filters &= (df['classifier_spatial_location'] == 'Both')

    # -- Extractor
    filters &= experiments['extractor_type'].str.contains(extractor_type)
    
    if 'Parallel' in extractor_type or 'Interleaved' in extractor_type:
        filters &= (df['extractor_nb_block'] == 3)             # K
        #filters &= (df['extractor_filters'] == [8, 16, 32])   # N
        filters &= (df['extractor_projection_size'] == 64)     # P      <---- FIXME : Might want to try with 32

    # -- Classifier
    filters &= (df['classifier_conv_out'] == c)          # C      <---- FIXME: Make sure this is the good values
    filters &= (df['classifier_projection_out'] == h)    # H      <---- FIXME: Make sure this is the good values

    # Variable Parameters (Resblocks)
    filters &= (df['nb_resblock'] <= 4)                    # J
    filters &= (df['resblocks_out_chan'].isin([128, 64, 32]))           # M

    # -- Other
    filters &= (df['reduce_lr_on_plateau'] == True)
    filters &= (~df['config'].str.contains('last_review_table_6_256_gru'))
    
    return filters

filters = get_resblock_reduction_df_mask(experiments, experiment_start_date, 'Parallel', nb_gru=512, c=512, h=1024)

grouped_df = show_table(df=experiments,
           filters=filters,
           groupby_columns = ['nb_resblock', 'resblocks_out_chan'],
           acc_columns = ['train_acc', 'best_val_acc', 'test_acc'],
           sort_by_col='test_acc',
           extra_columns = ['nb_trainable_param_million'],
           #display_all=display_all_exp or True,
           #show_count_col=show_count_col,
           format_dict=latex_format_dict,
           inplace_std=show_std or True,
           remove_outliers=remove_outliers,
           #nb_to_keep=2,#nb_results_to_keep,
           all_seeds=all_random_seeds if show_missing_seeds else None
          )

## Table 9

In [None]:
# Table 9 - Interleaved reduction
def get_interleaved_reduction_df_mask(df, start_date, nb_resblocks=4, m=128, only_final=True, n_mels=64):
    # Table 9 - Interleaved reduction
    # Static parameters
        # Extractor = Interleaved
        # N = [8, 16, 32]
        # J = 4
        # M = 
        # G = 512
        # ClassifierTopology = FCN
        # C = 256
        # H = 1024
    # Variable Parameters
        # K = {4, 3, 2}
        # P = {128, 64, 32, --}

    prefixes = ['CLEAR', 'CLEAR_FINAL']
    if only_final:
        prefixes = prefixes[1:]
    filters = (df['prefix'].isin(prefixes))
    filters &= (df['date'] >= start_date)

    # -- Input parameters
    filters &= (df['input_type'].str.contains('1D'))
    filters &= (df['n_fft'] == 512)
    filters &= (df['hop_length'] == 2048)
    filters &= (df['resized_width'].isnull())
    filters &= (df['n_mels'] == n_mels)

    # -- Text Processing
    filters &= (df['rnn_state_size'] == 512)              # G

    # -- Coordconv
    filters &= (df['extractor_spatial_location'] == 'None')
    filters &= (df['stem_spatial_location'] == 'Both')
    filters &= (df['resblock_spatial_location'] == 'Both')
    filters &= (df['classifier_spatial_location'] == 'Both')

    # -- Extractor
    #filters &= df['extractor_type'].str.contains('Parallel')

    # Resblocks
    filters &= (df['nb_resblock'] == nb_resblocks)        # J
    filters &= (df['resblocks_out_chan'] == m)           # M
        
    # -- Classifier
    #filters &= (df['classifier_conv_out'] == 256)          # C      <---- FIXME: Make sure this is the good values
    #filters &= (df['classifier_projection_out'] == 1024)    # H      <---- FIXME: Make sure this is the good values

    # Variable Parameters (Extractor)
    #filters &= (df['extractor_nb_block'].isin([4,3,2]))   # K 
    #filters &= (df['extractor_projection_size'].isin([128, 64, 32, None]))   # P

    # -- Other
    filters &= (df['reduce_lr_on_plateau'] == True)
    #filters &= (df['queue'].str.contains('reduction_mel'))
    
    return filters

filters = get_interleaved_reduction_df_mask(experiments, experiment_start_date, nb_resblocks=4, m=128)

grouped_df = show_table(df=experiments,
           filters=filters,
           groupby_columns = ['extractor_nb_block', 'extractor_projection_size'],
           acc_columns = ['train_acc', 'best_val_acc', 'test_acc'],
           extra_columns = ['nb_trainable_param_million', 'date', 'random_seed', 'config', 'queue'],
           display_all=display_all_exp or True,
           show_count_col=show_count_col,
           format_dict=latex_format_dict,
           inplace_std=show_std,
           remove_outliers=remove_outliers,
           #nb_to_keep=2,#nb_results_to_keep,
           all_seeds=all_random_seeds if show_missing_seeds else None
          )

In [None]:
cols = ['prefix','malimo', 'config', 'extractor_projection_size']
exp = experiments[(experiments["config"].str.contains("table_9")) & experiments['config'].str.contains("P_0")]
exp[cols].style.format(format_dict)

## Figure 5

In [None]:
# Use best config from resblock reduction
filters = get_resblock_reduction_df_mask(experiments, 'Interleaved_Time', nb_gru=512, c=256, h=1024) & (experiments['nb_resblock'] == 4) & (experiments['resblocks_out_chan'] == 64)

grouped_overall_df = show_table(df=experiments,
           filters=filters,
           groupby_columns = ['nb_resblock', 'resblocks_out_chan'],
           acc_columns = ['test_acc', *family_test_acc_cols],
           attribute_by_color = {c: 'CMRmap' for c in family_test_acc_cols},
           #extra_columns = ['nb_trainable_param_million'],
           #hardcoded_cols=hardcoded_columns,
           display_all=display_all_exp,
           show_count_col=show_count_col,
           format_dict=latex_format_dict,
           inplace_std=False,
           remove_outliers=remove_outliers,
           nb_to_keep=nb_results_to_keep,
           all_seeds=all_random_seeds if show_missing_seeds else None,
           print_latex=False
          )


grouped_df = show_table(df=experiments,
           filters=filters,
           groupby_columns = ['nb_resblock', 'resblocks_out_chan'],
           acc_columns = ['test_acc', *no_rel_family_test_acc_cols, *with_rel_family_test_acc_cols],
           #extra_columns = ['nb_trainable_param_million'],
           #hardcoded_cols=hardcoded_columns,
           display_all=display_all_exp | True,
           show_count_col=show_count_col,
           format_dict=latex_format_dict,
           inplace_std=False,
           remove_outliers=remove_outliers,
           nb_to_keep=nb_results_to_keep,
           all_seeds=all_random_seeds if show_missing_seeds else None,
           print_latex=False
          )


import seaborn as sns

def show_values_on_bars(axs):
    def _show_on_single_plot(ax):        
        for p in ax.patches:
            _x = p.get_x() + p.get_width() / 2
            _y = p.get_y() + p.get_height() + 0.5
            value = p.get_height()
            if value == 0:
                ax.text(_x, _y, "N/A", ha="center", size='x-small')
            else:
                value = '{:.0f}'.format(value)
                ax.text(_x, _y, value, ha="center")
            

    if isinstance(axs, np.ndarray):
        for idx, ax in np.ndenumerate(axs):
            _show_on_single_plot(ax)
    else:
        _show_on_single_plot(axs)

cols = {
    'instrument_with_rel_test_acc' : "Instrument",
    'note_with_rel_test_acc': 'Note',
    'brightness_with_rel_test_acc': 'Brightness',
    'loudness_with_rel_test_acc': 'Loudness',
    'exist_with_rel_test_acc': 'Exist',
    'position_with_rel_test_acc': 'Absolute Pos',
    'position_global_with_rel_test_acc': 'Global Pos',
    'position_rel_with_rel_test_acc':'Relative Pos',
    'count_with_rel_test_acc':'Count',
    'count_compare_with_rel_test_acc':'Count compare',
    'count_diff_with_rel_test_acc': 'Count Instrument'
}

with_relation_df = grouped_df[with_rel_family_test_acc_cols].rename({0:'acc'}).rename(cols,axis=1)
#no_relation_df[list(no_relation_df.T.sort_values('acc', ascending=False).index)]

#.T.assign(nb_relation=0)


#with_relation_df.columns = no_relation_df.columns

no_relation_df = grouped_df[no_rel_family_test_acc_cols].rename({0:'acc'})
no_relation_df.columns = with_relation_df.columns

no_relation_df = no_relation_df.T.assign(nb_relation=0)
with_relation_df = with_relation_df.T.assign(nb_relation=1)

merged = pd.concat([no_relation_df, with_relation_df]).reset_index().replace(-1,0).rename({'nb_relation':"Nb Relation"}, axis=1)
merged['acc'] = merged.apply(lambda x: x['acc'] * 100, axis=1)

bar_order = list(with_relation_df.sort_values('acc', ascending=False).index)

fig = plt.figure()
ax = sns.barplot(x='index', hue='Nb Relation', y='acc', data=merged, order=bar_order, palette=['dodgerblue','forestgreen'])
for item in ax.get_xticklabels():
    item.set_rotation(90)
    
show_values_on_bars(ax)

ax.set_ylabel('Accuracy (%)')
ax.set_xlabel('Question Type')
legend_texts = ax.legend().get_texts()
legend_texts[0].set_text('No relation')
legend_texts[1].set_text('With relation')
    
plt.tight_layout()

fig.savefig(f"stats/q_family_acc_by_relation.pdf", bbox_inches='tight')

In [None]:
grouped_overall_df.style.format(latex_format_dict)

## Figure 5 - With 3rd col

In [None]:
# Use best config from resblock reduction
filters = get_resblock_reduction_df_mask(experiments, 'Interleaved_Time', nb_gru=512, c=256, h=1024) & (experiments['nb_resblock'] == 4) & (experiments['resblocks_out_chan'] == 64)

grouped_df = show_table(df=experiments,
           filters=filters,
           groupby_columns = ['nb_resblock', 'resblocks_out_chan'],
           acc_columns = ['test_acc', *family_test_acc_cols],
           attribute_by_color = {c: 'CMRmap' for c in family_test_acc_cols},
           #extra_columns = ['nb_trainable_param_million'],
           #hardcoded_cols=hardcoded_columns,
           display_all=display_all_exp,
           show_count_col=show_count_col,
           format_dict=latex_format_dict,
           inplace_std=False,
           remove_outliers=remove_outliers,
           nb_to_keep=1,#nb_results_to_keep,
           all_seeds=all_random_seeds if show_missing_seeds else None,
           print_latex=False
          )


grouped_df = show_table(df=experiments,
           filters=filters,
           groupby_columns = ['nb_resblock', 'resblocks_out_chan'],
           acc_columns = ['test_acc', *family_test_acc_cols, *no_rel_family_test_acc_cols, *with_rel_family_test_acc_cols],
           #extra_columns = ['nb_trainable_param_million'],
           #hardcoded_cols=hardcoded_columns,
           display_all=display_all_exp,
           show_count_col=show_count_col,
           format_dict=latex_format_dict,
           inplace_std=False,
           remove_outliers=remove_outliers,
           nb_to_keep=1,#nb_results_to_keep,
           all_seeds=all_random_seeds if show_missing_seeds else None,
           print_latex=False
          )


import seaborn as sns

def show_values_on_bars(axs):
    def _show_on_single_plot(ax):        
        for p in ax.patches:
            _x = p.get_x() + p.get_width() / 2
            _y = p.get_y() + p.get_height() + 0.5
            value = p.get_height()
            if value == 0:
                ax.text(_x, _y, "N/A", ha="center", size='x-small')
            else:
                value = '{:.0f}'.format(value)
                ax.text(_x, _y, value, ha="center", size='small')
            

    if isinstance(axs, np.ndarray):
        for idx, ax in np.ndenumerate(axs):
            _show_on_single_plot(ax)
    else:
        _show_on_single_plot(axs)

cols = {
    'instrument_with_rel_test_acc' : "Instrument",
    'note_with_rel_test_acc': 'Note',
    'brightness_with_rel_test_acc': 'Brightness',
    'loudness_with_rel_test_acc': 'Loudness',
    'exist_with_rel_test_acc': 'Exist',
    'position_with_rel_test_acc': 'Absolute Pos',
    'position_global_with_rel_test_acc': 'Global Pos',
    'position_rel_with_rel_test_acc':'Relative Pos',
    'count_with_rel_test_acc':'Count',
    'count_compare_with_rel_test_acc':'Count compare',
    'count_diff_with_rel_test_acc': 'Count Instrument'
}

with_relation_df = grouped_df[with_rel_family_test_acc_cols].rename({0:'acc'}).rename(cols,axis=1)
#no_relation_df[list(no_relation_df.T.sort_values('acc', ascending=False).index)]

#.T.assign(nb_relation=0)


#with_relation_df.columns = no_relation_df.columns

overall_df = grouped_df[family_test_acc_cols].rename({0: 'acc'})
overall_df.columns = with_relation_df.columns

no_relation_df = grouped_df[no_rel_family_test_acc_cols].rename({0:'acc'})
no_relation_df.columns = with_relation_df.columns

no_relation_df = no_relation_df.T.assign(nb_relation="None")
with_relation_df = with_relation_df.T.assign(nb_relation="One")
overall_df = overall_df.T.assign(nb_relation='Overall')

merged = pd.concat([overall_df, no_relation_df, with_relation_df]).reset_index().replace(-1,0).rename({'nb_relation':"Nb Relation"}, axis=1)
merged['acc'] = merged.apply(lambda x: x['acc'] * 100, axis=1)

bar_order = list(with_relation_df.sort_values('acc', ascending=False).index)

fig = plt.figure()
ax = sns.barplot(x='index', hue='Nb Relation', y='acc', data=merged, order=bar_order, palette=['gold', 'forestgreen', 'dodgerblue'])
for item in ax.get_xticklabels():
    item.set_rotation(90)
    
show_values_on_bars(ax)

ax.set_ylabel('Accuracy (%)')
ax.set_xlabel('Question Type')
ax.legend(title="Nb relation", bbox_to_anchor=(1,1), labelspacing=0.3)
    
plt.tight_layout()

fig.savefig(f"stats/q_family_acc_by_relation_with_overall.pdf", bbox_inches='tight')

## Resized to 224

In [None]:
# Best config with input resized to 224x224
def get_best_df_mask(df, m=64, resized=False):
    # Static parameters
        # Extractor = Interleaved
        # N = [8, 16, 32]
        # J = 4
        # M = 64 OR 128
        # G = 512
        # ClassifierTopology = FCN
        # C = 256
        # H = 1024
        # K = 3
        # P = 32 OR 64

    # -- Input parameters
    filters = (experiments['input_type'].str.contains('1D'))
    filters &= (experiments['n_fft'] == 512)
    filters &= (experiments['keep_freq_point'] == 256)
    filters &= (experiments['hop_length'] == 2048)
    if resized:
        filters &= (experiments['resized_width'].notna())
    else:
        filters &= (experiments['resized_width'].isnull())

    # -- Text Processing
    filters &= (experiments['rnn_state_size'] == 512)              # G

    # -- Coordconv
    filters &= (experiments['extractor_spatial_location'] == 'None')
    filters &= (experiments['stem_spatial_location'] == 'Both')
    filters &= (experiments['resblock_spatial_location'] == 'Both')
    filters &= (experiments['classifier_spatial_location'] == 'Both')

    # -- Extractor
    filters &= experiments['extractor_type'].str.contains('Interleaved_Time')

    # Resblocks
    filters &= (experiments['nb_resblock'] == 4)        # J
    filters &= (experiments['resblocks_out_chan'] == m)           # M
        
    # -- Classifier
    filters &= (experiments['classifier_conv_out'] == 256)          # C      <---- FIXME: Make sure this is the good values
    filters &= (experiments['classifier_projection_out'] == 1024)    # H      <---- FIXME: Make sure this is the good values

    # Variable Parameters (Extractor)
    filters &= (experiments['extractor_nb_block'] == 3)   # K 
    if m == 64:
        p = 32
        p = 64
    else:
        p = 64
        
    filters &= (experiments['extractor_projection_size'] == p)   # P

    # -- Other
    #filters &= (~experiments['config'].str.contains('extractor_slim'))
    #filters &= (experiments['note'].str.contains('table_5_final|table_4.*1_worker'))    # Table 4 result for comparison
    #filters &= (experiments['random_seed'] == 876944)   # FIXME : Add more seeds and do the mean
    #filters &= (experiments['reduce_lr_on_plateau'] == False)

    # -- Comparison
    #filters |= ( (experiments['nb_resblock'] == 4) & (experiments['resblocks_out_chan'] == 128) & (experiments['note'].str.contains('table_4.*1_worker')) & experiments['extractor_type'].str.contains('Parallel') & (experiments['classifier_conv_out'] == 128) & (experiments['classifier_projection_out'] == 512) )

    #filters = get_table_extractor_type_filters(experiments)
    
    return filters

filters = get_best_df_mask(experiments, m=64, resized=False)

grouped_df = show_table(df=experiments,
           filters=filters,
           groupby_columns = ['resblocks_out_chan'],
           acc_columns = ['train_acc', 'best_val_acc', 'test_acc'],
           extra_columns = ['nb_trainable_param_million'],
           display_all=display_all_exp | True,
           show_count_col=show_count_col,
           format_dict=latex_format_dict,
           inplace_std=show_std,
           remove_outliers=remove_outliers,
           nb_to_keep=nb_results_to_keep,
           all_seeds=all_random_seeds if show_missing_seeds else None
          )

In [None]:
filters = get_best_df_mask(experiments, m=64, resized=True)

grouped_df = show_table(df=experiments,
           filters=filters,
           groupby_columns = ['resblocks_out_chan'],
           acc_columns = ['train_acc', 'best_val_acc', 'test_acc'],
           extra_columns = ['resized_width', 'random_seed', 'nb_trainable_param_million'],
           display_all=display_all_exp | True,
           show_count_col=show_count_col,
           format_dict=latex_format_dict,
           inplace_std=show_std,
           remove_outliers=remove_outliers,
           nb_to_keep=nb_results_to_keep,
           all_seeds=all_random_seeds if show_missing_seeds else None
          )

In [None]:
experiments[(experiments['note'].str.contains('new_resize'))].sort_values("date", ascending=False)

In [None]:
with_relation_df = grouped_df[with_rel_family_test_acc_cols].rename({0:'acc'}).rename(cols,axis=1)
#no_relation_df[list(no_relation_df.T.sort_values('acc', ascending=False).index)]

#.T.assign(nb_relation=0)


#with_relation_df.columns = no_relation_df.columns

no_relation_df = grouped_df[no_rel_family_test_acc_cols].rename({0:'acc'})
#no_relation_df.columns = with_relation_df.columns

no_relation_df = no_relation_df.T.assign(nb_relation=0)
#with_relation_df = with_relation_df.T.assign(nb_relation=1)

overall_df = grouped_df[family_test_acc_cols].rename({0: 'acc'})
overall_df.columns = with_relation_df.columns
#overall_df = overall_df.T.assign(nb_relation='All')

In [None]:
with_relation_df

In [None]:
overall_df

In [None]:
# Table X - Extractor types - Per question type analysis
# Static parameters :
    # G = 4096
    # J = 3
    # M = 64
    # ClassifierTopology = FCN
    # C = 512
    # H = 1024
    
# Variable parameters :
    # Extractors Type = {Parallel, Interleaved, Resnet}

filters = get_table_extractor_type_filters(experiments)

hardcoded_columns = {'extractor': {
    'type': 'replace_groupby',
    'values': [
        'Interleaved Time 1st (Fig \ref{fig:interlaced_extractor})',
        'Parallel (Fig \ref{fig:parallel_extractor})', 
        '2D Conv',
        'Resnet101 (Baseline)', 
        'Interleaved Freq 1st (Fig \ref{fig:interlaced_extractor})'
    ]
}}

grouped_df = show_table(df=experiments,
           filters=filters,
           groupby_columns = ['extractor_type'],
           acc_columns = ['test_acc', *family_test_acc_cols],
           extra_columns = ['random_seed', 'note', 'extractor_out_chan', 'extractor_nb_block','config'],
           attribute_by_color = {c: 'CMRmap' for c in family_test_acc_cols},
           hardcoded_cols= hardcoded_columns,
           display_all=True,
           show_count_col=False,
           format_dict=latex_format_dict,
           inplace_std=False,
           remove_outliers=False,
           nb_to_keep=None
          )

In [None]:
filters = get_table_extractor_type_filters(experiments) & (experiments['extractor_type'].str.contains('Parallel'))

grouped_df = show_table(df=experiments,
           filters=filters,
           groupby_columns = ['extractor_type'],
           acc_columns = ['test_acc', *family_test_acc_cols, *with_rel_family_test_acc_cols],
           #extra_columns = ['train_acc', 'best_val_acc','nb_epoch_trained', 'train_time'],
           attribute_by_color = {c: 'CMRmap' for c in family_test_acc_cols},
           display_all=False,
           show_count_col=False,
           format_dict=latex_format_dict,
           inplace_std=False,
           remove_outliers=True,
           print_latex=False
          )

In [None]:
cols = {'instrument_test_acc' : "Instrument",
 'note_test_acc': 'Note',
 'brightness_test_acc': 'Brightness',
 'loudness_test_acc': 'Loudness',
 'exist_test_acc': 'Exist',
 'position_test_acc': 'Abs Pos',
 'position_global_test_acc': 'Global Pos',
 'position_rel_test_acc':'Rel Pos',
 'count_test_acc':'Count',
 'count_compare_test_acc':'Count compare',
 'count_diff_test_acc': 'Count Instr'}

plt.figure()
grouped_df.loc[0][list(cols.keys())].rename(cols).plot.bar(rot=45)


In [None]:
list(experiments.columns)

In [None]:
renamed = grouped_df.loc[0,list(cols.keys())].rename(cols)

renamed = grouped_df.loc[0, list(cols.keys())].rename(cols).T.reset_index().rename({0:'acc'}, axis=1)

#grouped_df.T

#idx = pd.MultiIndex.from_tuples([('instrument_test_acc', 'instrument_with_rel_test_acc'), ('brightness_test_acc', 'brightness_with_rel_test_acc')])

#grouped_df[['instrument_test_acc', 'instrument_with_rel_test_acc']].reindex(idx)

df1 = grouped_df[['instrument_test_acc', 'brightness_test_acc']].assign(nb_relation=0)
df2 = grouped_df[['instrument_with_rel_test_acc', 'brightness_with_rel_test_acc']].assign(nb_relation=1)
df2.columns = df1.columns

#plt.figure()
#ax = sns.barplot(x='index', y='acc',data=renamed)

In [None]:
merged

In [None]:
def show_values_on_bars(axs):
    def _show_on_single_plot(ax):        
        for p in ax.patches:
            _x = p.get_x() + p.get_width() / 2
            _y = p.get_y() + p.get_height() + 0.5
            value = p.get_height()
            if value == 0:
                ax.text(_x, _y, "N/A", ha="center", size='x-small')
            else:
                value = '{:.0f}'.format(value)
                ax.text(_x, _y, value, ha="center")
            

    if isinstance(axs, np.ndarray):
        for idx, ax in np.ndenumerate(axs):
            _show_on_single_plot(ax)
    else:
        _show_on_single_plot(axs)

In [None]:
import seaborn as sns

def show_values_on_bars(axs):
    def _show_on_single_plot(ax):        
        for p in ax.patches:
            _x = p.get_x() + p.get_width() / 2
            _y = p.get_y() + p.get_height() + 0.5
            value = p.get_height()
            if value == 0:
                ax.text(_x, _y, "N/A", ha="center", size='x-small')
            else:
                value = '{:.0f}'.format(value)
                ax.text(_x, _y, value, ha="center")
            

    if isinstance(axs, np.ndarray):
        for idx, ax in np.ndenumerate(axs):
            _show_on_single_plot(ax)
    else:
        _show_on_single_plot(axs)

cols = {'instrument_test_acc' : "Instrument",
 'note_test_acc': 'Note',
 'brightness_test_acc': 'Brightness',
 'loudness_test_acc': 'Loudness',
 'exist_test_acc': 'Exist',
 'position_test_acc': 'Abs Pos',
 'position_global_test_acc': 'Global Pos',
 'position_rel_test_acc':'Rel Pos',
 'count_test_acc':'Count',
 'count_compare_test_acc':'Count compare',
 'count_diff_test_acc': 'Count Instr'}


no_relation_df = grouped_df[family_test_acc_cols].rename({0:'acc'}).rename(cols,axis=1)
#no_relation_df[list(no_relation_df.T.sort_values('acc', ascending=False).index)]

#.T.assign(nb_relation=0)

with_relation_df = grouped_df[with_rel_family_test_acc_cols].rename({0:'acc'})
with_relation_df.columns = no_relation_df.columns

no_relation_df = no_relation_df.T.assign(nb_relation=0)
with_relation_df = with_relation_df.T.assign(nb_relation=1)

merged = pd.concat([no_relation_df, with_relation_df]).reset_index().replace(-1,0).rename({'nb_relation':"Nb Relation"}, axis=1)
merged['acc'] = merged.apply(lambda x: x['acc'] * 100, axis=1)

bar_order = list(no_relation_df.sort_values('acc', ascending=False).index)

fig = plt.figure()
ax = sns.barplot(x='index', hue='Nb Relation', y='acc', data=merged, order=bar_order, palette=['dodgerblue','mediumseagreen'])
for item in ax.get_xticklabels():
    item.set_rotation(90)
    
show_values_on_bars(ax)

ax.set_ylabel('Accuracy (%)')
ax.set_xlabel('Question Type')
    
plt.tight_layout()

fig.savefig(f"stats/q_family_acc_by_relation.pdf", bbox_inches='tight')

In [None]:
plt.figure()
sns.barplot(x='nb_relation', hue='index', y='acc', data=merged, hue_order=list(no_relation_df.sort_values('acc', ascending=False).index))

In [None]:
list(no_relation_df.sort_values('acc', ascending=False).index)

In [None]:
df3 = grouped_df[['instrument_test_acc', 'brightness_test_acc']]
df3_r = df3.rename({0:'acc'}).T.assign(nb_relation=0)

In [None]:
df4 = grouped_df[['instrument_with_rel_test_acc', 'brightness_with_rel_test_acc']]
df4.columns = df3.columns
df4_r = df4.rename({0:'acc'}).T.assign(nb_relation=1)

In [None]:
merged = pd.concat([df3_r, df4_r]).reset_index()

plt.figure()
sns.barplot(x='index', hue='nb_relation', y='acc', data=merged)

In [None]:
merged

In [None]:
#df1 = df1.rename({0:"no_relate"})
#df1.assign(nb_relation=0)
df1

In [None]:
#df2['nb_relation'] = 1
#df2.columns = df1.columns
df2
#df2 = df2.rename({0:"with_relate"})

In [None]:
merged = pd.concat([df1,df2])
merged

In [None]:
plt.figure()
sns.barplot(hue='nb_relation', x='index', data=merged.T.reset_index())

In [None]:
import seaborn as sns

plt.figure()
ax = sns.barplot(data=grouped_df.loc[0][list(cols.keys())].rename(cols))

In [None]:
# Table X - Per question analysis - COORDCONV
    # K = 3
    # N_0 = 8
    # P = 64
    # G = 1024
    # J = 3
    # M = 64
    # ClassifierTopology = FCN
    # C = 512
    # H = 1024

filters = get_table_coordconv_per_q_type_filters(experiments)

grouped_df = show_table(df=experiments,
                       filters=filters,
                       groupby_columns = ['extractor_spatial_location', 'stem_spatial_location', 'resblock_spatial_location', 'classifier_spatial_location'],
                       acc_columns = ['test_acc', *family_test_acc_cols],
                       extra_columns = [],
                       attribute_by_color = {c: 'CMRmap' for c in family_test_acc_cols},
                       display_all=True,
                       format_dict=latex_format_dict,
                       hardcoded_cols={
                           'configuration':{
                               'type': 'replace_groupby',
                               'values': ["NAAQA", "Time only", "Freq only", "None"]
                           }
                       }
                      )

In [None]:
# Table 7 - CoordConv placement
    # K = 3
    # N_0 = 8
    # P = 64
    # G = 1024
    # J = 3
    # M = 64
    # ClassifierTopology = FCN
    # C = 512
    # H = 1024
    
filters = get_table_coordconv_placement_filters(experiments)

show_table(df=experiments,
           filters=filters,
           groupby_columns = ['extractor_spatial_location', 'stem_spatial_location', 'resblock_spatial_location', 'classifier_spatial_location'],
           acc_columns = ['train_acc', 'best_val_acc', 'test_acc'],
           extra_columns = ['note'],
           attribute_by_color = None,
           display_all=True,
           format_dict=latex_format_dict,
           all_seeds=all_random_seeds
          )

In [None]:
# Table 7 - CoordConv placement - BIGGER
    # K = 3
    # N_0 = 8
    # P = 64
    # G = 1024
    # J = 3
    # M = 64
    # ClassifierTopology = FCN
    # C = 512
    # H = 1024
    
# TODO : Update
filters = get_table_coordconv_bigger_placement_filters(experiments)

filters = (experiments['note'].str.contains('table_7_bigger'))

show_table(df=experiments,
           filters=filters,
           groupby_columns = ['extractor_spatial_location', 'stem_spatial_location', 'resblock_spatial_location', 'classifier_spatial_location', 'extractor_type'],
           acc_columns = ['train_acc', 'best_val_acc', 'test_acc'],
           extra_columns = ['random_seed', 'train_time'],
           attribute_by_color = None,
           display_all=True,
           format_dict=latex_format_dict,
           show_count_col=True,
           all_seeds=all_random_seeds
          )

In [None]:
# Table 4 - Resblocks
# Static parameters
    # Extractor = Parallel
    # K = 3
    # N = [8, 16, 32]
    # P = 64
    # ClassifierTopology = FCN
    # C = 512
    # H = 1024
# Variable Parameters
    # J = {4, 3, 2, 1}
    # M = {128, 64, 32}

filters = (experiments['date'] >= '2020-11-20')

# -- Input parameters
filters &= (experiments['input_type'].str.contains('1D'))
filters &= (experiments['n_fft'] == 512)
filters &= (experiments['keep_freq_point'] == 256)
filters &= (experiments['hop_length'] == 2048)
filters &= (experiments['resized_width'].isnull())

# -- Text Processing
filters &= (experiments['rnn_state_size'] == 1024)              # G

# -- Coordconv
filters &= (experiments['extractor_spatial_location'] == 'None')
filters &= (experiments['stem_spatial_location'] == 'Both')
filters &= (experiments['resblock_spatial_location'] == 'Both')
filters &= (experiments['classifier_spatial_location'] == 'Both')

# -- Extractor
filters &= experiments['extractor_type'].str.contains('Parallel')
filters &= (experiments['extractor_nb_block'] == 3)             # K
#filters &= (experiments['extractor_filters'] == [8, 16, 32])   # N
filters &= (experiments['extractor_projection_size'] == 64)     # P      <---- FIXME : Might want to try with 32

# -- Classifier
filters &= (experiments['classifier_conv_out'] == 128)          # C      <---- FIXME: Make sure this is the good values
filters &= (experiments['classifier_projection_out'] == 512)    # H      <---- FIXME: Make sure this is the good values

# Variable Parameters (Resblocks)
filters &= (experiments['nb_resblock'] <= 4)                    # J
filters &= (experiments['resblocks_out_chan'].isin([128, 64, 32]))           # M

# -- Other
#filters &= (experiments['note'].str.contains('table_5_final|table_4.*1_worker'))    # Table 4 result for comparison
#filters &= (experiments['random_seed'] == 876944)   # FIXME : Add more seeds and do the mean

# -- Comparison
#filters |= ( (experiments['nb_resblock'] == 4) & (experiments['resblocks_out_chan'] == 128) & (experiments['note'].str.contains('table_4.*1_worker')) & experiments['extractor_type'].str.contains('Parallel') & (experiments['classifier_conv_out'] == 128) & (experiments['classifier_projection_out'] == 512) )

# -----------------------------------------------------------------------------------------------------
exp = experiments[filters]

# Display
groupby_columns = ['nb_resblock', 'resblocks_out_chan']
acc_columns = ['train_acc', 'best_val_acc', 'test_acc']
extra_columns = ['nb_trainable_param_million']

exp[['train_time']].min()

In [None]:
# Table 7 - CoordConv placement
    # K = 3
    # N_0 = 8
    # P = 64
    # G = 1024
    # J = 3
    # M = 64
    # ClassifierTopology = FCN
    # C = 512
    # H = 1024
columns = ['extractor_spatial_location', 'stem_spatial_location', 'resblock_spatial_location', 'classifier_spatial_location' , 'train_acc', 'best_val_acc', 'test_acc', 'best_val_loss', 'test_loss', 'nb_trainable_param_round', 'nb_epoch_trained']
#columns = ['extractor_filters', 'classifier_conv_out', 'classifier_projection_out',  'keep_freq_point', 'hop_length', 'extractor_type', 'nb_resblock', 'resblocks_out_chan', 'train_acc', 'best_val_acc', 'test_acc', 'best_val_loss', 'test_loss', 'nb_trainable_param_round', 'nb_epoch_trained', 'folder_dated']


filters = (experiments['date'] >= '2020-11-20')
#filters = (experiments['date'] >= '2020-09-20')

# -- Input parameters
filters &= (experiments['input_type'].str.contains('1D'))
filters &= (experiments['n_fft'] == 512)
filters &= (experiments['keep_freq_point'] == 256)
filters &= (experiments['hop_length'] == 2048)
filters &= (experiments['resized_width'].isnull())

# -- Text Processing
filters &= (experiments['rnn_state_size'] == 1024)              # G

# -- Extractor
filters &= (experiments['extractor_nb_block'] == 3)             # K
#filters &= (experiments['extractor_filters'] == [8, 16, 32])   # N
filters &= (experiments['extractor_projection_size'] == 64)     # P

# -- Resblocks
filters &= (experiments['nb_resblock'] == 3)                   # J       <---- FIXME: Make sure this is the good values
filters &= (experiments['resblocks_out_chan'] == 64)           # M       <---- FIXME: Make sure this is the good values

# -- Classifier
filters &= (experiments['classifier_conv_out'] == 128)          # C      <---- FIXME: Make sure this is the good values
filters &= (experiments['classifier_projection_out'] == 512)    # H      <---- FIXME: Make sure this is the good values

# Variable Parameters (CoordConv)
filters &= (experiments['extractor_spatial_location'].str.contains('None|Time|Freq|Both'))
filters &= (experiments['stem_spatial_location'].str.contains('None|Time|Freq|Both'))
filters &= (experiments['resblock_spatial_location'].str.contains('None|Time|Freq|Both'))
filters &= (experiments['classifier_spatial_location'].str.contains('None|Time|Freq|Both'))

# -- Other
filters &= (experiments['note'].str.contains('table_7|table_5_final'))
#filters &= (experiments['random_seed'] == 876944)   # FIXME : Add more seeds and do the mean

# -----------------------------------------------------------------------------------------------------
exp = experiments[filters]

# Display
groupby_columns = ['extractor_spatial_location', 'stem_spatial_location', 'resblock_spatial_location', 'classifier_spatial_location']
acc_columns = ['train_acc', 'best_val_acc', 'test_acc']
extra_columns = ['nb_trainable_param_million']#, 'folder_dated']

# Drop duplicates (Same experiment ran multiple time)
exp = exp.drop_duplicates(groupby_columns + ['random_seed'],keep='first')

# Grouping - Mean & Std calc
acc_std_columns = [f"{c}_std" for c in acc_columns]
columns = groupby_columns + acc_columns + extra_columns

exp_grouped = groupby_mean(exp, groupby_columns, acc_columns, columns, add_count_col=True, add_std_str=True)
train_std_mean, val_std_mean, test_std_mean = exp_grouped['train_acc_std'].mean() * 100, exp_grouped['best_val_acc_std'].mean() * 100, exp_grouped['test_acc_std'].mean() * 100
accuracy_std_mean = np.mean([train_std_mean, val_std_mean, test_std_mean])

exp_grouped = exp_grouped.sort_values('test_acc', ascending=False)[columns]

#display(color_by_multi_attribute(exp[columns + ['random_seed', 'note']].sort_values(groupby_columns + ['random_seed'], ascending=False), main_attribute='test_acc', attributes=groupby_columns))

# Color display
display(color_by_multi_attribute(exp_grouped, main_attribute='test_acc', 
                                 attributes=['extractor_spatial_location', 'stem_spatial_location', 'resblock_spatial_location', 'classifier_spatial_location', 'test_acc', 'test_acc_std'],
                                 cmaps=['Blues', 'YlOrRd', 'YlOrRd', 'YlOrRd', 'YlOrRd'], 
                                 format_dict=latex_format_dict))

# Latex code
latex = exp_grouped.to_latex(index=False, formatters=latex_format_dict).replace("\\textasciitilde", "$\\approx$")

print("\n",latex)
print(f"STD Means -- Global means : ± {accuracy_std_mean} Train : ± {train_std_mean} Val : ± {val_std_mean} - Test : ± {test_std_mean}")


In [None]:
show_table(df=experiments,
           filters=get_table1_filters(experiments),
           groupby_columns = ['extractor_type'],
           acc_columns = ['train_acc', 'best_val_acc', 'test_acc'],
           extra_columns = ['train_time', 'random_seed'],#, 'folder_dated'],
           attribute_by_color = {'best_val_acc':None},
           display_all=True
          )

In [None]:
# Table X - Per question type analysis
# Static parameters :
    # G = 4096
    # J = 3
    # M = 64
    # ClassifierTopology = FCN
    # C = 512
    # H = 1024
    
# Variable parameters :
    # Extractors Type = {Parallel, Interleaved, Resnet}

filters = get_table1_filters(experiments)

show_table(df=experiments,
           filters=filters,
           groupby_columns = ['extractor_type'],
           acc_columns = ['test_acc', *family_test_acc_cols],
           extra_columns = ['nb_trainable'],#, 'folder_dated'],
           attribute_by_color = {c: 'CMRmap' for c in family_test_acc_cols},
           display_all=False,
           format_dict=latex_format_dict
          )

In [None]:
# Table X - Per question type analysis
# Static parameters :
    # G = 4096
    # J = 3
    # M = 64
    # ClassifierTopology = FCN
    # C = 512
    # H = 1024
    
# Variable parameters :
    # Extractors Type = {Parallel, Interleaved, Resnet}

filters = (experiments['date'] >= '2020-09-15')

# -- Input parameters
filters &= (experiments['resized_width'].isnull())
filters &= (~experiments['normalisation'].str.contains("imagenet_stats", na=False))

# -- Text Processing
filters &= (experiments['rnn_state_size'] == 4096)              # G

# -- Coordconv
filters &= (experiments['extractor_spatial_location'] == 'None')
filters &= (experiments['stem_spatial_location'] == 'Both')
filters &= (experiments['resblock_spatial_location'] == 'Both')
filters &= (experiments['classifier_spatial_location'] == 'Both')

# -- Resblocks
filters &= (experiments['nb_resblock'] == 4)                    # J
filters &= (experiments['resblocks_out_chan'] == 128)           # M

# -- Classifier
filters &= (experiments['classifier_conv_out'] == 512)          # C
filters &= (experiments['classifier_projection_out'] == 1024)    # H

# Variable Parameters (Input_type & Extractor)
filters &= (~experiments['RGB_colormap'].str.contains('Blues', na=False))

input_1d_filter = (experiments['input_type'].str.contains('1D'))
input_1d_filter &= (experiments['n_fft'] == 512)
input_1d_filter &= (experiments['keep_freq_point'] == 256) 
input_1d_filter &= (experiments['hop_length'] == 2048)

filters &= ((experiments['input_type'].str.contains('RGB') & (experiments['extractor_type'].str.contains('Resnet'))) | input_1d_filter )

#filters &= (experiments['extractor_type'].str.contains('Resnet|Parallel'))# | (experiments['extractor_type'].str.startswith('Interleaved')))
#filters &= (experiments['extractor_type'].str.contains('Baseline') | (experiments['extractor_type'].str.contains('Resnet')))

filters &= (experiments['extractor_type'].str.contains('Parallel|Interleaved|Resnet'))

# -- Other
filters &= (experiments['note'].str.contains('table_1_final_final|table_2_final_final'))
#filters &= (experiments['random_seed'] == 876944)   # FIXME : Add more seeds and do the mean

# -----------------------------------------------------------------------------------------------------
exp = experiments[filters]

# Display
groupby_columns = ['extractor_type']
#acc_columns = ['train_acc', 'best_val_acc', 'test_acc']
acc_columns = ['test_acc', *family_test_acc_cols]
extra_columns = []#'nb_trainable_param_million']#, 'folder_dated']

# Drop duplicates (Same experiment ran multiple time)
exp = exp.sort_values('date', ascending=False).drop_duplicates([*groupby_columns, 'random_seed'],keep='first')

# Grouping - Mean & Std calc
acc_std_columns = [f"{c}_std" for c in acc_columns]
columns = groupby_columns + acc_columns + extra_columns
columns_to_show = [*groupby_columns, *acc_columns, 'mean_std', *extra_columns]

exp_grouped = groupby_mean(exp, groupby_columns, acc_columns, columns, add_count_col=True, add_std_str=True, inplace_std_str=False)
exp_grouped['mean_std'] = exp_grouped[acc_std_cols].mean(axis=1)
exp_grouped['mean_acc'] = exp_grouped[acc_columns].mean(axis=1)
#train_std_mean, val_std_mean, test_std_mean = exp_grouped['train_acc_std'].mean() * 100, exp_grouped['best_val_acc_std'].mean() * 100, exp_grouped['test_acc_std'].mean() * 100
#accuracy_std_mean = np.mean([train_std_mean, val_std_mean, test_std_mean])

exp_grouped = exp_grouped.sort_values('test_acc', ascending=False)[columns_to_show]

# All experiments
#display(color_by_multi_attribute(exp[columns + ['random_seed', 'note']].sort_values(groupby_columns + ['random_seed'], ascending=False), main_attribute='extractor_type', attributes=groupby_columns))

# Color display      
display(color_by_multi_attribute(exp_grouped, main_attribute="extractor_type", 
                                 attributes=acc_columns, 
                                 cmaps=['Blues'] + ['CMRmap'] * len(acc_columns),
                                 format_dict=latex_format_dict))

# Latex code
latex = exp_grouped[columns_to_show].to_latex(index=False, formatters=latex_format_dict).replace("\\textasciitilde", "$\\approx$")
latex = re.sub(r'&\s*', '& ', latex)

print("\n",latex)
#print(f"STD Means -- Global means : ± {accuracy_std_mean} Train : ± {train_std_mean} Val : ± {val_std_mean} - Test : ± {test_std_mean}")

In [None]:
acc_std_columns == std_cols

In [None]:
# Table 1 - Extractor Types
# Static parameters :
    # G = 4096
    # J = 3
    # M = 64
    # ClassifierTopology = FCN
    # C = 512
    # H = 1024
    
# Variable parameters :
    # Extractors Type = {Parallel, Interleaved, Resnet}

filters = get_table1_filters(experiments)
exp = experiments[filters]

# Display
groupby_columns = ['extractor_type']
acc_columns = ['train_acc', 'best_val_acc', 'test_acc']
extra_columns = ['nb_trainable_param_million']#, 'folder_dated']

# Drop duplicates (Same experiment ran multiple time)
#exp = exp.sort_values('date', ascending=False).drop_duplicates(groupby_columns + ['random_seed', 'input_type'],keep='first')

# Grouping - Mean & Std calc
acc_std_columns = [f"{c}_std" for c in acc_columns]
columns = groupby_columns + acc_columns + extra_columns
#columns_to_show = groupby_columns + acc_std_columns + extra_columns
columns_to_show = [*groupby_columns, 'train_acc', 'best_val_acc', 'test_acc_std', *extra_columns]

exp_grouped = groupby_mean(exp, groupby_columns, acc_columns, columns, add_count_col=True, add_std_str=True, inplace_std_str=True)
#train_std_mean, val_std_mean, test_std_mean = exp_grouped['train_acc_std'].mean() * 100, exp_grouped['best_val_acc_std'].mean() * 100, exp_grouped['test_acc_std'].mean() * 100
#accuracy_std_mean = np.mean([train_std_mean, val_std_mean, test_std_mean])

exp_grouped = exp_grouped.sort_values('test_acc', ascending=False)[columns_to_show]

# All experiments
#display(color_by_multi_attribute(exp[columns + ['random_seed', 'input_type', 'train_time', 'nb_epoch_trained', 'nb_epoch_runned', 'date']].sort_values(groupby_columns + ['random_seed'], ascending=False), main_attribute='test_acc', attributes=groupby_columns))

# Color display      
display(color_by_multi_attribute(exp_grouped, main_attribute="extractor_type", 
                                 attributes=[], 
                                 cmaps=['Blues'],
                                 format_dict=latex_format_dict))

# Latex code
latex = exp_grouped[columns_to_show].to_latex(index=False, formatters=latex_format_dict).replace("\\textasciitilde", "$\\approx$")
latex = re.sub(r'&\s*', '& ', latex)

print("\n",latex)
#print(f"STD Means -- Global means : ± {accuracy_std_mean} Train : ± {train_std_mean} Val : ± {val_std_mean} - Test : ± {test_std_mean}")

In [None]:
# Table 1 - Extractor Types
# Static parameters :
    # G = 4096
    # J = 3
    # M = 64
    # ClassifierTopology = FCN
    # C = 512
    # H = 1024
    
# Variable parameters :
    # Extractors Type = {Parallel, Interleaved, Resnet}

filters = (experiments['date'] >= '2020-09-15')

# -- Input parameters
filters &= (experiments['resized_width'].isnull())
filters &= (~experiments['normalisation'].str.contains("imagenet_stats", na=False))

# -- Text Processing
filters &= (experiments['rnn_state_size'] == 4096)              # G

# -- Coordconv
filters &= (experiments['extractor_spatial_location'] == 'None')
filters &= (experiments['stem_spatial_location'] == 'Both')
filters &= (experiments['resblock_spatial_location'] == 'Both')
filters &= (experiments['classifier_spatial_location'] == 'Both')

# -- Resblocks
filters &= (experiments['nb_resblock'] == 4)                    # J
filters &= (experiments['resblocks_out_chan'] == 128)           # M

# -- Classifier
filters &= (experiments['classifier_conv_out'] == 512)          # C
filters &= (experiments['classifier_projection_out'] == 1024)    # H

# Variable Parameters (Input_type & Extractor)
filters &= (~experiments['RGB_colormap'].str.contains('Blues', na=False))

input_1d_filter = (experiments['input_type'].str.contains('1D'))
input_1d_filter &= (experiments['n_fft'] == 512)
input_1d_filter &= (experiments['keep_freq_point'] == 256) 
input_1d_filter &= (experiments['hop_length'] == 2048)

filters &= ((experiments['input_type'].str.contains('RGB') & (experiments['extractor_type'].str.contains('Resnet'))) | input_1d_filter )

#filters &= (experiments['extractor_type'].str.contains('Resnet|Parallel') | (experiments['extractor_type'].str.startswith('Interleaved')))
#filters &= (experiments['extractor_type'].str.contains('Baseline') | (experiments['extractor_type'].str.contains('Resnet')))

# -- Other
filters &= (experiments['note'].str.contains('table_1_final_final|table_2_final_final'))
#filters &= (experiments['random_seed'] == 876944)   # FIXME : Add more seeds and do the mean

# -----------------------------------------------------------------------------------------------------
exp = experiments[filters]

# Display
groupby_columns = ['extractor_type']
acc_columns = ['train_acc', 'best_val_acc', 'test_acc']
extra_columns = ['nb_trainable_param_million']#, 'folder_dated']

# Drop duplicates (Same experiment ran multiple time)
exp = exp.sort_values('date', ascending=False).drop_duplicates(groupby_columns + ['random_seed', 'input_type'],keep='first')

# Grouping - Mean & Std calc
acc_std_columns = [f"{c}_std" for c in acc_columns]
columns = groupby_columns + acc_columns + extra_columns
#columns_to_show = groupby_columns + acc_std_columns + extra_columns
columns_to_show = [*groupby_columns, 'train_acc', 'best_val_acc', 'test_acc_std', *extra_columns]

exp_grouped = groupby_mean(exp, groupby_columns, acc_columns, columns, add_count_col=True, add_std_str=True, inplace_std_str=True)
#train_std_mean, val_std_mean, test_std_mean = exp_grouped['train_acc_std'].mean() * 100, exp_grouped['best_val_acc_std'].mean() * 100, exp_grouped['test_acc_std'].mean() * 100
#accuracy_std_mean = np.mean([train_std_mean, val_std_mean, test_std_mean])

exp_grouped = exp_grouped.sort_values('test_acc', ascending=False)[columns_to_show]

# All experiments
#display(color_by_multi_attribute(exp[columns + ['random_seed', 'input_type', 'train_time', 'nb_epoch_trained', 'nb_epoch_runned', 'date']].sort_values(groupby_columns + ['random_seed'], ascending=False), main_attribute='test_acc', attributes=groupby_columns))

# Color display      
display(color_by_multi_attribute(exp_grouped, main_attribute="extractor_type", 
                                 attributes=[], 
                                 cmaps=['Blues'],
                                 format_dict=latex_format_dict))

# Latex code
latex = exp_grouped[columns_to_show].to_latex(index=False, formatters=latex_format_dict).replace("\\textasciitilde", "$\\approx$")
latex = re.sub(r'&\s*', '& ', latex)

print("\n",latex)
#print(f"STD Means -- Global means : ± {accuracy_std_mean} Train : ± {train_std_mean} Val : ± {val_std_mean} - Test : ± {test_std_mean}")

In [None]:
# Table X - Per question type analysis
# Static parameters :
    # G = 4096
    # J = 3
    # M = 64
    # ClassifierTopology = FCN
    # C = 512
    # H = 1024
    
# Variable parameters :
    # Extractors Type = {Parallel, Interleaved, Resnet}

filters = (experiments['date'] >= '2020-09-15')

# -- Input parameters
filters &= (experiments['resized_width'].isnull())
filters &= (~experiments['normalisation'].str.contains("imagenet_stats", na=False))

# -- Text Processing
filters &= (experiments['rnn_state_size'] == 4096)              # G

# -- Coordconv
filters &= (experiments['extractor_spatial_location'] == 'None')
filters &= (experiments['stem_spatial_location'] == 'Both')
filters &= (experiments['resblock_spatial_location'] == 'Both')
filters &= (experiments['classifier_spatial_location'] == 'Both')

# -- Resblocks
filters &= (experiments['nb_resblock'] == 4)                    # J
filters &= (experiments['resblocks_out_chan'] == 128)           # M

# -- Classifier
filters &= (experiments['classifier_conv_out'] == 512)          # C
filters &= (experiments['classifier_projection_out'] == 1024)    # H

# Variable Parameters (Input_type & Extractor)
filters &= (~experiments['RGB_colormap'].str.contains('Blues', na=False))

input_1d_filter = (experiments['input_type'].str.contains('1D'))
input_1d_filter &= (experiments['n_fft'] == 512)
input_1d_filter &= (experiments['keep_freq_point'] == 256) 
input_1d_filter &= (experiments['hop_length'] == 2048)

filters &= ((experiments['input_type'].str.contains('RGB') & (experiments['extractor_type'].str.contains('Resnet'))) | input_1d_filter )

#filters &= (experiments['extractor_type'].str.contains('Resnet|Parallel'))# | (experiments['extractor_type'].str.startswith('Interleaved')))
#filters &= (experiments['extractor_type'].str.contains('Baseline') | (experiments['extractor_type'].str.contains('Resnet')))

filters &= (experiments['extractor_type'].str.contains('Parallel|Interleaved|Resnet'))

# -- Other
filters &= (experiments['note'].str.contains('table_1_final_final|table_2_final_final'))
#filters &= (experiments['random_seed'] == 876944)   # FIXME : Add more seeds and do the mean

# -----------------------------------------------------------------------------------------------------
exp = experiments[filters]

# Display
groupby_columns = ['extractor_type']
#acc_columns = ['train_acc', 'best_val_acc', 'test_acc']
acc_columns = ['test_acc', *family_test_acc_cols]
extra_columns = []#'nb_trainable_param_million']#, 'folder_dated']

# Drop duplicates (Same experiment ran multiple time)
exp = exp.sort_values('date', ascending=False).drop_duplicates([*groupby_columns, 'random_seed'],keep='first')

# Grouping - Mean & Std calc
acc_std_columns = [f"{c}_std" for c in acc_columns]
columns = groupby_columns + acc_columns + extra_columns
#columns_to_show = groupby_columns + acc_std_columns + extra_columns
columns_to_show = [*groupby_columns, *acc_columns, 'mean_std', *extra_columns]

exp_grouped = groupby_mean(exp, groupby_columns, acc_columns, columns, add_count_col=True, add_std_str=True, inplace_std_str=False)
std_cols = [c for c in exp_grouped.columns if 'std' in c]
exp_grouped['mean_std'] = exp_grouped[std_cols].mean(axis=1)
exp_grouped['mean_acc'] = exp_grouped[acc_columns].mean(axis=1)
#train_std_mean, val_std_mean, test_std_mean = exp_grouped['train_acc_std'].mean() * 100, exp_grouped['best_val_acc_std'].mean() * 100, exp_grouped['test_acc_std'].mean() * 100
#accuracy_std_mean = np.mean([train_std_mean, val_std_mean, test_std_mean])

exp_grouped = exp_grouped.sort_values('test_acc', ascending=False)[columns_to_show]

# All experiments
display(color_by_multi_attribute(exp[columns + ['random_seed', 'note']].sort_values(groupby_columns + ['random_seed'], ascending=False), main_attribute='extractor_type', attributes=groupby_columns))

# Color display      
display(color_by_multi_attribute(exp_grouped, main_attribute="extractor_type", 
                                 attributes=acc_columns, 
                                 cmaps=['Blues'] + ['CMRmap'] * len(acc_columns),
                                 format_dict=latex_format_dict))

# Latex code
latex = exp_grouped[columns_to_show].to_latex(index=False, formatters=latex_format_dict).replace("\\textasciitilde", "$\\approx$")
latex = re.sub(r'&\s*', '& ', latex)

print("\n",latex)
#print(f"STD Means -- Global means : ± {accuracy_std_mean} Train : ± {train_std_mean} Val : ± {val_std_mean} - Test : ± {test_std_mean}")

In [None]:
# Table X - Per question analysis - COORDCONV
    # K = 3
    # N_0 = 8
    # P = 64
    # G = 1024
    # J = 3
    # M = 64
    # ClassifierTopology = FCN
    # C = 512
    # H = 1024

filters = get_table_coordconv_per_q_type_filters(experiments)

grouped_df = show_table(df=experiments,
                       filters=filters,
                       groupby_columns = ['extractor_spatial_location', 'stem_spatial_location', 'resblock_spatial_location', 'classifier_spatial_location'],
                       acc_columns = ['test_acc', *family_test_acc_cols],
                       extra_columns = [],
                       attribute_by_color = {c: 'CMRmap' for c in family_test_acc_cols},
                       display_all=False,
                       format_dict=latex_format_dict,
                       hardcoded_cols={
                           'configuration':{
                               'type': 'replace_groupby',
                               'values': ["NAAQA", "Freq only", "None", "Time only"]
                           }
                       }
                      )

In [None]:
# Table X - Per question analysis - COORDCONV
    # K = 3
    # N_0 = 8
    # P = 64
    # G = 1024
    # J = 3
    # M = 64
    # ClassifierTopology = FCN
    # C = 512
    # H = 1024
columns = ['extractor_spatial_location', 'stem_spatial_location', 'resblock_spatial_location', 'classifier_spatial_location' , 'train_acc', 'best_val_acc', 'test_acc', 'best_val_loss', 'test_loss', 'nb_trainable_param_round', 'nb_epoch_trained']
#columns = ['extractor_filters', 'classifier_conv_out', 'classifier_projection_out',  'keep_freq_point', 'hop_length', 'extractor_type', 'nb_resblock', 'resblocks_out_chan', 'train_acc', 'best_val_acc', 'test_acc', 'best_val_loss', 'test_loss', 'nb_trainable_param_round', 'nb_epoch_trained', 'folder_dated']


filters = (experiments['date'] >= '2020-11-20')
#filters = (experiments['date'] >= '2020-09-20')

# -- Input parameters
filters &= (experiments['input_type'].str.contains('1D'))
filters &= (experiments['n_fft'] == 512)
filters &= (experiments['keep_freq_point'] == 256)
filters &= (experiments['hop_length'] == 2048)
filters &= (experiments['resized_width'].isnull())

# -- Text Processing
filters &= (experiments['rnn_state_size'] == 1024)              # G

# -- Extractor
filters &= (experiments['extractor_nb_block'] == 3)             # K
#filters &= (experiments['extractor_filters'] == [8, 16, 32])   # N
filters &= (experiments['extractor_projection_size'] == 64)     # P

# -- Resblocks
filters &= (experiments['nb_resblock'] == 3)                   # J       <---- FIXME: Make sure this is the good values
filters &= (experiments['resblocks_out_chan'] == 64)           # M       <---- FIXME: Make sure this is the good values

# -- Classifier
filters &= (experiments['classifier_conv_out'] == 128)          # C      <---- FIXME: Make sure this is the good values
filters &= (experiments['classifier_projection_out'] == 512)    # H      <---- FIXME: Make sure this is the good values

# Variable Parameters (CoordConv)
filters &= experiments['extractor_spatial_location'].str.contains('None')
specific_configs = ((experiments['stem_spatial_location'].str.contains('Both')) & (experiments['resblock_spatial_location'].str.contains('Both')) & (experiments['classifier_spatial_location'].str.contains('Both')))
specific_configs |= ((experiments['stem_spatial_location'].str.contains('None')) & (experiments['resblock_spatial_location'].str.contains('Time')) & (experiments['classifier_spatial_location'].str.contains('None')))
specific_configs |= ((experiments['stem_spatial_location'].str.contains('Freq')) & (experiments['resblock_spatial_location'].str.contains('None')) & (experiments['classifier_spatial_location'].str.contains('None')))
specific_configs |= ((experiments['stem_spatial_location'].str.contains('None')) & (experiments['resblock_spatial_location'].str.contains('None')) & (experiments['classifier_spatial_location'].str.contains('None')))

filters &= specific_configs

# -- Other
filters &= (experiments['note'].str.contains('table_7|table_5_final'))
#filters &= (experiments['random_seed'] == 876944)   # FIXME : Add more seeds and do the mean

# -----------------------------------------------------------------------------------------------------
exp = experiments[filters]

# Display
groupby_columns = ['extractor_spatial_location', 'stem_spatial_location', 'resblock_spatial_location', 'classifier_spatial_location']
#acc_columns = ['train_acc', 'best_val_acc', 'test_acc']
acc_columns = ['test_acc', *family_test_acc_cols]
extra_columns = []#'configuration']#'nb_trainable_param_million']#, 'folder_dated']

# Drop duplicates (Same experiment ran multiple time)
exp = exp.drop_duplicates(groupby_columns + ['random_seed'],keep='first')

# Grouping - Mean & Std calc
acc_std_columns = [f"{c}_std" for c in acc_columns]
columns = groupby_columns + acc_columns + extra_columns
#columns_to_show = groupby_columns + acc_std_columns + extra_columns
columns_to_show = ['configuration', *acc_columns, 'mean_std', *extra_columns]

exp_grouped = groupby_mean(exp, groupby_columns, acc_columns, columns, add_count_col=True, add_std_str=True, inplace_std_str=False)
std_cols = [c for c in exp_grouped.columns if 'std' in c]
exp_grouped['mean_std'] = exp_grouped[std_cols].mean(axis=1)
#train_std_mean, val_std_mean, test_std_mean = exp_grouped['train_acc_std'].mean() * 100, exp_grouped['best_val_acc_std'].mean() * 100, exp_grouped['test_acc_std'].mean() * 100
#accuracy_std_mean = np.mean([train_std_mean, val_std_mean, test_std_mean])

exp_grouped = exp_grouped.sort_values('test_acc', ascending=False)

# Add configuration column
exp_grouped['configuration'] = ["NAAQA", "Time only", "Freq only", "None"]

exp_grouped = exp_grouped[columns_to_show]

# All experiments
#display(color_by_multi_attribute(exp[columns + ['random_seed', 'note']].sort_values(groupby_columns + ['random_seed'], ascending=False), main_attribute='test_acc', attributes=groupby_columns))

# Color display      
display(color_by_multi_attribute(exp_grouped, main_attribute=None, 
                                 attributes=acc_columns, 
                                 cmaps=['Blues','CMRmap','CMRmap','CMRmap','CMRmap','CMRmap','CMRmap','CMRmap','CMRmap','CMRmap','CMRmap'],
                                 format_dict=latex_format_dict))

# Latex code
latex = exp_grouped[columns_to_show].to_latex(index=False, formatters=latex_format_dict).replace("\\textasciitilde", "$\\approx$")
latex = re.sub(r'&\s*', '& ', latex)
print("\n",latex)
#print(f"STD Means -- Global means : ± {accuracy_std_mean} Train : ± {train_std_mean} Val : ± {val_std_mean} - Test : ± {test_std_mean}")

In [None]:
# Table X - Filter analysis
    # K = 3
    # N_0 = 8
    # P = 64
    # G = 1024
    # J = 3
    # M = 64
    # ClassifierTopology = FCN
    # C = 512
    # H = 1024
columns = ['extractor_spatial_location', 'stem_spatial_location', 'resblock_spatial_location', 'classifier_spatial_location' , 'train_acc', 'best_val_acc', 'test_acc', 'best_val_loss', 'test_loss', 'nb_trainable_param_round', 'nb_epoch_trained']
#columns = ['extractor_filters', 'classifier_conv_out', 'classifier_projection_out',  'keep_freq_point', 'hop_length', 'extractor_type', 'nb_resblock', 'resblocks_out_chan', 'train_acc', 'best_val_acc', 'test_acc', 'best_val_loss', 'test_loss', 'nb_trainable_param_round', 'nb_epoch_trained', 'folder_dated']


filters = (experiments['date'] >= '2020-11-20')
#filters = (experiments['date'] >= '2020-09-20')

# -- Input parameters
filters &= (experiments['input_type'].str.contains('1D'))
filters &= (experiments['n_fft'] == 512)
filters &= (experiments['keep_freq_point'] == 256)
filters &= (experiments['hop_length'] == 2048)
filters &= (experiments['resized_width'].isnull())

# -- Text Processing
filters &= (experiments['rnn_state_size'] == 1024)              # G

# -- Extractor
filters &= (experiments['extractor_nb_block'] == 3)             # K
#filters &= (experiments['extractor_filters'] == [8, 16, 32])   # N
filters &= (experiments['extractor_projection_size'] == 64)     # P

# -- Resblocks
filters &= (experiments['nb_resblock'] == 3)                   # J       <---- FIXME: Make sure this is the good values
filters &= (experiments['resblocks_out_chan'] == 64)           # M       <---- FIXME: Make sure this is the good values

# -- Classifier
filters &= (experiments['classifier_conv_out'] == 128)          # C      <---- FIXME: Make sure this is the good values
filters &= (experiments['classifier_projection_out'] == 512)    # H      <---- FIXME: Make sure this is the good values

# Variable Parameters (CoordConv)
filters &= ((experiments['extractor_spatial_location'].str.contains('None')) & (experiments['stem_spatial_location'].str.contains('Both')) & (experiments['resblock_spatial_location'].str.contains('Both')) & (experiments['classifier_spatial_location'].str.contains('Both')))

# -- Other
filters &= (experiments['note'].str.contains('table_7|table_5_final'))
#filters &= (experiments['random_seed'] == 876944)   # FIXME : Add more seeds and do the mean

# -----------------------------------------------------------------------------------------------------
exp = experiments[filters]

# Display
groupby_columns = ['extractor_spatial_location', 'stem_spatial_location', 'resblock_spatial_location', 'classifier_spatial_location']
#acc_columns = ['train_acc', 'best_val_acc', 'test_acc']
#acc_columns = ['test_acc', *family_test_acc_cols]
acc_columns = family_test_acc_cols
#acc_columns = global_test_acc_cols
extra_columns = []#'configuration']#'nb_trainable_param_million']#, 'folder_dated']

# Drop duplicates (Same experiment ran multiple time)
exp = exp.drop_duplicates(groupby_columns + ['random_seed'],keep='first')

# Grouping - Mean & Std calc
acc_std_columns = [f"{c}_std" for c in acc_columns]
columns = groupby_columns + acc_columns + extra_columns
#columns_to_show = groupby_columns + acc_std_columns + extra_columns
columns_to_show = ['configuration', *acc_columns]#, 'mean_std', *extra_columns]

exp_grouped = groupby_mean(exp, groupby_columns, acc_columns, columns, add_count_col=True, add_std_str=True, inplace_std_str=False)
std_cols = [c for c in exp_grouped.columns if 'std' in c]
#exp_grouped['mean_std'] = exp_grouped[std_cols].mean(axis=1)
#train_std_mean, val_std_mean, test_std_mean = exp_grouped['train_acc_std'].mean() * 100, exp_grouped['best_val_acc_std'].mean() * 100, exp_grouped['test_acc_std'].mean() * 100
#accuracy_std_mean = np.mean([train_std_mean, val_std_mean, test_std_mean])

#exp_grouped = exp_grouped.sort_values('test_acc', ascending=False)

# Add configuration column
exp_grouped['configuration'] = ["NAAQA"]

exp_grouped = exp_grouped[columns_to_show]

# All experiments
#display(color_by_multi_attribute(exp[columns + ['random_seed', 'note']].sort_values(groupby_columns + ['random_seed'], ascending=False), main_attribute='test_acc', attributes=groupby_columns))

# Color display      
display(color_by_multi_attribute(exp_grouped, main_attribute=None, 
                                 attributes=acc_columns, 
                                 cmaps=['Blues','CMRmap','CMRmap','CMRmap','CMRmap','CMRmap','CMRmap','CMRmap','CMRmap','CMRmap','CMRmap'],
                                 format_dict=latex_format_dict))

# Latex code
latex = exp_grouped[columns_to_show].to_latex(index=False, formatters=latex_format_dict).replace("\\textasciitilde", "$\\approx$")
latex = re.sub(r'&\s*', '& ', latex)
print("\n",latex)
#print(f"STD Means -- Global means : ± {accuracy_std_mean} Train : ± {train_std_mean} Val : ± {val_std_mean} - Test : ± {test_std_mean}")

In [None]:
exp_grouped.style.background_gradient(axis=1, cmap='CMRmap_r')

In [None]:
ax = exp_grouped.plot.bar()
ax.legend(["Instrument", 'Note', 'Brightness', 'Loudness', 'Exist', 'Abs Pos', 'Global Pos', 'Rel Pos', 'Count', 'Count compare', 'Count diff'],bbox_to_anchor=(0.75, 1), loc='upper left')

In [None]:
# Table 2 - GRU Reduction
# Static Parameters :
    # Extractor = Parallel
    # J = 4
    # M = 128
    # ClassifierTopology = FCN
    # C = 512
    # H = 1024
    # K = 3
    # N = [8, 16, 32]
    # P = 64
# Variable Parameters: 
    # G={4096,2048, 1024, 512, 256}
    
columns = ['extractor_type', 'nb_resblock', 'resblocks_out_chan', 'train_acc', 'best_val_acc', 'test_acc', 'best_val_loss', 'test_loss', 'nb_trainable_param_round', 'nb_epoch_trained']
columns = ['extractor_filters', 'classifier_conv_out', 'classifier_projection_out',  'keep_freq_point', 'hop_length', 'extractor_type', 'nb_resblock', 'resblocks_out_chan', 'train_acc', 'best_val_acc', 'test_acc', 'best_val_loss', 'test_loss', 'nb_trainable_param_round', 'nb_epoch_trained', 'folder_dated']

filters = (experiments['date'] >= '2020-11-20')

# -- Input parameters
filters &= (experiments['input_type'].str.contains('1D'))
filters &= (experiments['n_fft'] == 512)
filters &= (experiments['keep_freq_point'] == 256)
filters &= (experiments['hop_length'] == 2048)
filters &= (experiments['resized_width'].isnull())

# -- Coordconv
filters &= (experiments['extractor_spatial_location'] == 'None')
filters &= (experiments['stem_spatial_location'] == 'Both')
filters &= (experiments['resblock_spatial_location'] == 'Both')
filters &= (experiments['classifier_spatial_location'] == 'Both')

# -- Extractor
filters &= experiments['extractor_type'].str.contains('Parallel')
filters &= (experiments['extractor_nb_block'] == 3)             # K
#filters &= (experiments['extractor_filters'] == [8, 16, 32])   # N
filters &= (experiments['extractor_projection_size'] == 64)     # P      <---- FIXME : Might want to try with 32

# -- Resblocks
filters &= (experiments['nb_resblock'] == 4)                    # J
filters &= (experiments['resblocks_out_chan'] == 128)           # M

# -- Classifier
#filters &= (experiments['classifier_conv_out'] == 512)           # C 
#filters &= (experiments['classifier_projection_out'] == 1024)    # H

# Variable Parameters (Text-Processing GRU units)
filters &= (experiments['rnn_state_size'].isin([4096, 2048, 1024, 512, 256])) # G

# -- Other
filters &= (experiments['note'].str.contains('table_3|table_2_final_final'))
#filters &= (experiments['random_seed'] == 876944)   # FIXME : Add more seeds and do the mean

# -----------------------------------------------------------------------------------------------------
exp = experiments[filters]

# Display
groupby_columns = ['rnn_state_size']
acc_columns = ['train_acc', 'best_val_acc', 'test_acc']
extra_columns = ['nb_trainable_param_million', 'train_time']

# Drop duplicates (Same experiment ran multiple time)
exp = exp.drop_duplicates(groupby_columns + ['random_seed'],keep='first')

# Grouping - Mean & Std calc
acc_std_columns = [f"{c}_std" for c in acc_columns]
columns = groupby_columns + acc_columns + extra_columns

exp_grouped = groupby_mean(exp, groupby_columns, acc_columns, columns, add_count_col=True, add_std_str=True)
train_std_mean, val_std_mean, test_std_mean = exp_grouped['train_acc_std'].mean() * 100, exp_grouped['best_val_acc_std'].mean() * 100, exp_grouped['test_acc_std'].mean() * 100
accuracy_std_mean = np.mean([train_std_mean, val_std_mean, test_std_mean])

exp_grouped = exp_grouped.sort_values('test_acc', ascending=False)#[columns]

# All experiments
display(color_by_multi_attribute(exp[columns + ['random_seed', 'note']].sort_values(groupby_columns + ['random_seed', 'train_time'], ascending=False), main_attribute='test_acc', attributes=groupby_columns))

# Color display
display(color_by_multi_attribute(exp_grouped, main_attribute='test_acc', 
                                 attributes=['rnn_state_size'],
                                 #attributes=['stem_spatial_location', 'resblock_spatial_location', 'classifier_spatial_location', 'test_acc'],
                                 cmaps=['Blues', 'YlOrRd', 'YlOrRd', 'YlOrRd', 'YlOrRd'], 
                                 format_dict=latex_format_dict))

# Latex code
latex = exp_grouped.to_latex(index=False, formatters=latex_format_dict).replace("\\textasciitilde", "$\\approx$")
print("\n",latex)

print(f"STD Means -- Global means : ± {accuracy_std_mean} Train : ± {train_std_mean} Val : ± {val_std_mean} - Test : ± {test_std_mean}")


In [None]:
# Table 3 - Classifier Topologies
# Static Parameters :
    # Extractor = Parallel
    # J = 4
    # M = 128
    # K = 3
    # N = [8, 16, 32]
    # P = 64
# Variable Parameters :
    # Classifier Topology = {Fcn, Conv}
    # C = {512, 256, 128, None}
    # H = {1024, 512, 256, None}

filters = (experiments['date'] >= '2020-11-20')

# -- Input parameters
filters &= (experiments['input_type'].str.contains('1D'))
filters &= (experiments['n_fft'] == 512)
filters &= (experiments['keep_freq_point'] == 256)
filters &= (experiments['hop_length'] == 2048)
filters &= (experiments['resized_width'].isnull())

# -- Text Processing
filters &= (experiments['rnn_state_size'] == 1024)              # G

# -- Coordconv
filters &= (experiments['extractor_spatial_location'] == 'None')
filters &= (experiments['stem_spatial_location'] == 'Both')
filters &= (experiments['resblock_spatial_location'] == 'Both')
filters &= (experiments['classifier_spatial_location'] == 'Both')

# -- Extractor
filters &= experiments['extractor_type'].str.contains('Parallel')
filters &= (experiments['extractor_nb_block'] == 3)             # K
#filters &= (experiments['extractor_filters'] == [8, 16, 32])   # N
filters &= (experiments['extractor_projection_size'] == 64)     # P      <---- FIXME : Might want to try with 32

# -- Resblocks
filters &= (experiments['nb_resblock'] == 4)                    # J
filters &= (experiments['resblocks_out_chan'] == 128)           # M

# Variable Parameters (Classifier Topologies)
filters &= (experiments['classifier_type'].str.contains('fcn|conv'))
filters &= (experiments['classifier_conv_out'].isin([512, 256, 128, None]))          # C
filters &= (experiments['classifier_projection_out'].isin([1024, 512, 256, None]))   # H

# Other
filters &= (experiments['note'].str.contains('1_worker|table_3_table_3'))
#filters &= (experiments['random_seed'] == 876944)   # FIXME : Add more seeds and do the mean

# -----------------------------------------------------------------------------------------------------
exp = experiments[filters]

# Display
groupby_columns = ['classifier_type', 'classifier_conv_out', 'classifier_projection_out']
acc_columns = ['train_acc', 'best_val_acc', 'test_acc']
extra_columns = ['nb_trainable_param_million', 'note']

# Drop duplicates (Same experiment ran multiple time)
exp = exp.drop_duplicates(groupby_columns + ['random_seed'],keep='first')

# Grouping - Mean & Std calc
acc_std_columns = [f"{c}_std" for c in acc_columns]
columns = groupby_columns + acc_columns + extra_columns

exp_grouped = groupby_mean(exp, groupby_columns, acc_columns, columns, add_count_col=True, add_std_str=True)
train_std_mean, val_std_mean, test_std_mean = exp_grouped['train_acc_std'].mean() * 100, exp_grouped['best_val_acc_std'].mean() * 100, exp_grouped['test_acc_std'].mean() * 100
accuracy_std_mean = np.mean([train_std_mean, val_std_mean, test_std_mean])

exp_grouped = exp_grouped.sort_values('test_acc', ascending=False)#[columns]

# All experiments
#display(color_by_multi_attribute(exp[columns + ['random_seed', 'note']].sort_values(groupby_columns + ['random_seed'], ascending=False), main_attribute='test_acc', attributes=groupby_columns))

# Color display
display(color_by_multi_attribute(exp_grouped, main_attribute='test_acc', 
                                 attributes=['classifier_type'],# 'classifier_conv_out', 'classifier_projection_out'],
                                 cmaps=['Blues'], 
                                 format_dict=latex_format_dict))

# Latex code
latex = exp_grouped.to_latex(index=False, formatters=latex_format_dict).replace("\\textasciitilde", "$\\approx$")

print("\n",latex)
print(f"STD Means -- Global means : ± {accuracy_std_mean} Train : ± {train_std_mean} Val : ± {val_std_mean} - Test : ± {test_std_mean}")

In [None]:
# Table 4 - Resblocks
# Static parameters
    # Extractor = Parallel
    # K = 3
    # N = [8, 16, 32]
    # P = 64
    # ClassifierTopology = FCN
    # C = 512
    # H = 1024
# Variable Parameters
    # J = {4, 3, 2, 1}
    # M = {128, 64, 32}

filters = (experiments['date'] >= '2020-11-20')

# -- Input parameters
filters &= (experiments['input_type'].str.contains('1D'))
filters &= (experiments['n_fft'] == 512)
filters &= (experiments['keep_freq_point'] == 256)
filters &= (experiments['hop_length'] == 2048)
filters &= (experiments['resized_width'].isnull())

# -- Text Processing
filters &= (experiments['rnn_state_size'] == 1024)              # G

# -- Coordconv
filters &= (experiments['extractor_spatial_location'] == 'None')
filters &= (experiments['stem_spatial_location'] == 'Both')
filters &= (experiments['resblock_spatial_location'] == 'Both')
filters &= (experiments['classifier_spatial_location'] == 'Both')

# -- Extractor
filters &= experiments['extractor_type'].str.contains('Parallel')
filters &= (experiments['extractor_nb_block'] == 3)             # K
#filters &= (experiments['extractor_filters'] == [8, 16, 32])   # N
filters &= (experiments['extractor_projection_size'] == 64)     # P      <---- FIXME : Might want to try with 32

# -- Classifier
filters &= (experiments['classifier_conv_out'] == 128)          # C      <---- FIXME: Make sure this is the good values
filters &= (experiments['classifier_projection_out'] == 512)    # H      <---- FIXME: Make sure this is the good values

# Variable Parameters (Resblocks)
filters &= (experiments['nb_resblock'] <= 4)                    # J
filters &= (experiments['resblocks_out_chan'].isin([128, 64, 32]))           # M

# -- Other
filters &= (experiments['note'].str.contains('table_5_final|table_4.*1_worker'))    # Table 4 result for comparison
#filters &= (experiments['random_seed'] == 876944)   # FIXME : Add more seeds and do the mean

# -- Comparison
#filters |= ( (experiments['nb_resblock'] == 4) & (experiments['resblocks_out_chan'] == 128) & (experiments['note'].str.contains('table_4.*1_worker')) & experiments['extractor_type'].str.contains('Parallel') & (experiments['classifier_conv_out'] == 128) & (experiments['classifier_projection_out'] == 512) )

# -----------------------------------------------------------------------------------------------------
exp = experiments[filters]

# Display
groupby_columns = ['nb_resblock', 'resblocks_out_chan']
acc_columns = ['train_acc', 'best_val_acc', 'test_acc']
extra_columns = ['nb_trainable_param_million']

# Drop duplicates (Same experiment ran multiple time)
exp = exp.drop_duplicates(groupby_columns + ['random_seed'],keep='first')

# Grouping - Mean & Std calc
acc_std_columns = [f"{c}_std" for c in acc_columns]
columns = groupby_columns + acc_columns + extra_columns

exp_grouped = groupby_mean(exp, groupby_columns, acc_columns, columns, add_count_col=True, add_std_str=True)
train_std_mean, val_std_mean, test_std_mean = exp_grouped['train_acc_std'].mean() * 100, exp_grouped['best_val_acc_std'].mean() * 100, exp_grouped['test_acc_std'].mean() * 100
accuracy_std_mean = np.mean([train_std_mean, val_std_mean, test_std_mean])

exp_grouped = exp_grouped.sort_values('test_acc', ascending=False)#[columns]

# All experiments
display(color_by_multi_attribute(exp[columns + ['random_seed', 'note']].sort_values(groupby_columns + ['random_seed'], ascending=False), main_attribute='test_acc', attributes=groupby_columns))

# Color display
display(color_by_multi_attribute(exp_grouped, main_attribute='test_acc', 
                                 attributes=[],
                                 #attributes=['stem_spatial_location', 'resblock_spatial_location', 'classifier_spatial_location', 'test_acc'],
                                 cmaps=['Blues', 'YlOrRd', 'YlOrRd', 'YlOrRd', 'YlOrRd'], 
                                 format_dict=latex_format_dict))

# Latex code
latex = exp_grouped.to_latex(index=False, formatters=latex_format_dict).replace("\\textasciitilde", "$\\approx$")

print("\n",latex)
print(f"STD Means -- Global means : ± {accuracy_std_mean} Train : ± {train_std_mean} Val : ± {val_std_mean} - Test : ± {test_std_mean}")


In [None]:
# Table 4 - Resblocks
# Static parameters
    # Extractor = Parallel
    # K = 3
    # N = [8, 16, 32]
    # P = 64
    # ClassifierTopology = FCN
    # C = 512
    # H = 1024
# Variable Parameters
    # J = {4, 3, 2, 1}
    # M = {128, 64, 32}

filters = (experiments['date'] >= '2020-11-20')

# -- Input parameters
filters &= (experiments['input_type'].str.contains('1D'))
filters &= (experiments['n_fft'] == 512)
filters &= (experiments['keep_freq_point'] == 256)
filters &= (experiments['hop_length'] == 2048)
filters &= (experiments['resized_width'].isnull())

# -- Text Processing
filters &= (experiments['rnn_state_size'] == 1024)              # G

# -- Coordconv
filters &= (experiments['extractor_spatial_location'] == 'None')
filters &= (experiments['stem_spatial_location'] == 'Both')
filters &= (experiments['resblock_spatial_location'] == 'Both')
filters &= (experiments['classifier_spatial_location'] == 'Both')

# -- Extractor
#filters &= experiments['extractor_type'].str.contains('Parallel')
#filters &= (experiments['extractor_nb_block'] == 3)             # K
#filters &= (experiments['extractor_filters'] == [8, 16, 32])   # N
#filters &= (experiments['extractor_projection_size'] == 64)     # P      <---- FIXME : Might want to try with 32

# -- Classifier
filters &= (experiments['classifier_conv_out'] == 128)          # C      <---- FIXME: Make sure this is the good values
filters &= (experiments['classifier_projection_out'] == 512)    # H      <---- FIXME: Make sure this is the good values

# Variable Parameters (Resblocks)
filters &= (experiments['nb_resblock'] <= 4)                    # J
filters &= (experiments['resblocks_out_chan'].isin([128, 64, 32]))           # M

# -- Other
filters &= (experiments['note'].str.contains('table_5_baseline_extractor'))
#filters &= (experiments['note'].str.contains('table_5_final|table_4.*1_worker'))    # Table 4 result for comparison
#filters &= (experiments['random_seed'] == 876944)   # FIXME : Add more seeds and do the mean

# -- Comparison
#filters |= ( (experiments['nb_resblock'] == 4) & (experiments['resblocks_out_chan'] == 128) & (experiments['note'].str.contains('table_4.*1_worker')) & experiments['extractor_type'].str.contains('Parallel') & (experiments['classifier_conv_out'] == 128) & (experiments['classifier_projection_out'] == 512) )

# -----------------------------------------------------------------------------------------------------
exp = experiments[filters]

# Display
groupby_columns = ['nb_resblock', 'resblocks_out_chan']
acc_columns = ['train_acc', 'best_val_acc', 'test_acc']
extra_columns = ['nb_trainable_param_million']

# Drop duplicates (Same experiment ran multiple time)
exp = exp.drop_duplicates(groupby_columns + ['random_seed'],keep='first')

# Grouping - Mean & Std calc
acc_std_columns = [f"{c}_std" for c in acc_columns]
columns = groupby_columns + acc_columns + extra_columns

exp_grouped = groupby_mean(exp, groupby_columns, acc_columns, columns, add_count_col=True, add_std_str=True)
train_std_mean, val_std_mean, test_std_mean = exp_grouped['train_acc_std'].mean() * 100, exp_grouped['best_val_acc_std'].mean() * 100, exp_grouped['test_acc_std'].mean() * 100
accuracy_std_mean = np.mean([train_std_mean, val_std_mean, test_std_mean])

exp_grouped = exp_grouped.sort_values('test_acc', ascending=False)#[columns]

# All experiments
display(color_by_multi_attribute(exp[columns + ['random_seed', 'note']].sort_values(groupby_columns + ['random_seed'], ascending=False), main_attribute='test_acc', attributes=groupby_columns))

# Color display
display(color_by_multi_attribute(exp_grouped, main_attribute='test_acc', 
                                 attributes=[],
                                 #attributes=['stem_spatial_location', 'resblock_spatial_location', 'classifier_spatial_location', 'test_acc'],
                                 cmaps=['Blues', 'YlOrRd', 'YlOrRd', 'YlOrRd', 'YlOrRd'], 
                                 format_dict=latex_format_dict))

# Latex code
latex = exp_grouped.to_latex(index=False, formatters=latex_format_dict).replace("\\textasciitilde", "$\\approx$")

print("\n",latex)
print(f"STD Means -- Global means : ± {accuracy_std_mean} Train : ± {train_std_mean} Val : ± {val_std_mean} - Test : ± {test_std_mean}")


In [None]:
# Table 5 - Extractor filter reduction
# Static parameters :
    # G = 1024
    # J = 3
    # M = 64
    # ClassifierTopology = FCN
    # C = 512
    # H = 1024
# Variable parameters :
    # k = {4,3,2,1}
    # P = {128, 64, 32, None}

filters = (experiments['date'] >= '2020-11-20')

# -- Input parameters
filters &= (experiments['input_type'].str.contains('1D'))
filters &= (experiments['n_fft'] == 512)
filters &= (experiments['keep_freq_point'] == 256)
filters &= (experiments['hop_length'] == 2048)
filters &= (experiments['resized_width'].isnull())

# -- Text Processing
filters &= (experiments['rnn_state_size'] == 1024)              # G

# -- Coordconv
filters &= (experiments['extractor_spatial_location'] == 'None')
filters &= (experiments['stem_spatial_location'] == 'Both')
filters &= (experiments['resblock_spatial_location'] == 'Both')
filters &= (experiments['classifier_spatial_location'] == 'Both')

# -- Resblocks
filters &= (experiments['nb_resblock'] == 3)                   # J       <---- FIXME: Make sure this is the good values
filters &= (experiments['resblocks_out_chan'] == 64)           # M       <---- FIXME: Make sure this is the good values

# -- Classifier
filters &= (experiments['classifier_conv_out'] == 128)          # C      <---- FIXME: Make sure this is the good values
filters &= (experiments['classifier_projection_out'] == 512)    # H      <---- FIXME: Make sure this is the good values

# Variable Parameters (Extractor configuration)
#filters &= experiments['extractor_type'].str.contains('Parallel|Interleaved')
filters &= experiments['extractor_type'].str.contains('Parallel')
filters &= (experiments['extractor_nb_block'] <= 4)             # K
#filters &= (experiments['extractor_filters'] == [8, 16, 32])   # N
filters &= (experiments['extractor_projection_size'].isin([128, 64, 32, None]))     # P

# -- Other
#filters &= (experiments['random_seed'] == 876944)   # FIXME : Add more seeds and do the mean

# -----------------------------------------------------------------------------------------------------
exp = experiments[filters]

# Display
groupby_columns = ['extractor_nb_block', 'extractor_projection_size']
acc_columns = ['train_acc', 'best_val_acc', 'test_acc']
extra_columns = ['nb_trainable_param_million']

# Drop duplicates (Same experiment ran multiple time)
exp = exp.drop_duplicates(groupby_columns + ['random_seed'],keep='first')

# Grouping - Mean & Std calc
acc_std_columns = [f"{c}_std" for c in acc_columns]
columns = groupby_columns + acc_columns + extra_columns

exp_grouped = groupby_mean(exp, groupby_columns, acc_columns, columns, add_count_col=True, add_std_str=True)
train_std_mean, val_std_mean, test_std_mean = exp_grouped['train_acc_std'].mean() * 100, exp_grouped['best_val_acc_std'].mean() * 100, exp_grouped['test_acc_std'].mean() * 100
accuracy_std_mean = np.mean([train_std_mean, val_std_mean, test_std_mean])

exp_grouped = exp_grouped.sort_values('test_acc', ascending=False)#[columns]

# All experiments
#display(color_by_multi_attribute(exp[columns + ['random_seed', 'note']].sort_values(groupby_columns + ['random_seed'], ascending=False), main_attribute='test_acc', attributes=groupby_columns))

# Color display
display(color_by_multi_attribute(exp_grouped, main_attribute='test_acc', 
                                 attributes=['extractor_nb_block', 'extractor_projection_size'],
                                 #attributes=['stem_spatial_location', 'resblock_spatial_location', 'classifier_spatial_location', 'test_acc'],
                                 cmaps=['Blues'], 
                                 format_dict=latex_format_dict))

# Latex code
latex = exp_grouped.to_latex(index=False, formatters=latex_format_dict).replace("\\textasciitilde", "$\\approx$")

print("\n",latex)
print(f"STD Means -- Global means : ± {accuracy_std_mean} Train : ± {train_std_mean} Val : ± {val_std_mean} - Test : ± {test_std_mean}")

In [None]:
# Table 7 - CoordConv
    # K = 3
    # N_0 = 8
    # P = 64
    # G = 1024
    # J = 3
    # M = 64
    # ClassifierTopology = FCN
    # C = 512
    # H = 1024
columns = ['extractor_spatial_location', 'stem_spatial_location', 'resblock_spatial_location', 'classifier_spatial_location' , 'train_acc', 'best_val_acc', 'test_acc', 'best_val_loss', 'test_loss', 'nb_trainable_param_round', 'nb_epoch_trained']
#columns = ['extractor_filters', 'classifier_conv_out', 'classifier_projection_out',  'keep_freq_point', 'hop_length', 'extractor_type', 'nb_resblock', 'resblocks_out_chan', 'train_acc', 'best_val_acc', 'test_acc', 'best_val_loss', 'test_loss', 'nb_trainable_param_round', 'nb_epoch_trained', 'folder_dated']


filters = (experiments['date'] >= '2020-11-20')
#filters = (experiments['date'] >= '2020-09-20')

# -- Input parameters
filters &= (experiments['input_type'].str.contains('1D'))
filters &= (experiments['n_fft'] == 512)
filters &= (experiments['keep_freq_point'] == 256)
filters &= (experiments['hop_length'] == 2048)
filters &= (experiments['resized_width'].isnull())

# -- Text Processing
filters &= (experiments['rnn_state_size'] == 1024)              # G

# -- Extractor
filters &= (experiments['extractor_nb_block'] == 3)             # K
#filters &= (experiments['extractor_filters'] == [8, 16, 32])   # N
filters &= (experiments['extractor_projection_size'] == 64)     # P

# -- Resblocks
filters &= (experiments['nb_resblock'] == 3)                   # J       <---- FIXME: Make sure this is the good values
filters &= (experiments['resblocks_out_chan'] == 64)           # M       <---- FIXME: Make sure this is the good values

# -- Classifier
filters &= (experiments['classifier_conv_out'] == 128)          # C      <---- FIXME: Make sure this is the good values
filters &= (experiments['classifier_projection_out'] == 512)    # H      <---- FIXME: Make sure this is the good values

# Variable Parameters (CoordConv)
filters &= (experiments['extractor_spatial_location'].str.contains('None|Time|Freq|Both'))
filters &= (experiments['stem_spatial_location'].str.contains('None|Time|Freq|Both'))
filters &= (experiments['resblock_spatial_location'].str.contains('None|Time|Freq|Both'))
filters &= (experiments['classifier_spatial_location'].str.contains('None|Time|Freq|Both'))

# -- Other
filters &= (experiments['note'].str.contains('table_7|table_5_final'))
#filters &= (experiments['random_seed'] == 876944)   # FIXME : Add more seeds and do the mean

# -----------------------------------------------------------------------------------------------------
exp = experiments[filters]

# Display
groupby_columns = ['extractor_spatial_location', 'stem_spatial_location', 'resblock_spatial_location', 'classifier_spatial_location']
acc_columns = ['train_acc', 'best_val_acc', 'test_acc']
extra_columns = ['nb_trainable_param_million']#, 'folder_dated']

# Drop duplicates (Same experiment ran multiple time)
exp = exp.drop_duplicates(groupby_columns + ['random_seed'],keep='first')

# Grouping - Mean & Std calc
acc_std_columns = [f"{c}_std" for c in acc_columns]
columns = groupby_columns + acc_columns + extra_columns

exp_grouped = groupby_mean(exp, groupby_columns, acc_columns, columns, add_count_col=True, add_std_str=True)
train_std_mean, val_std_mean, test_std_mean = exp_grouped['train_acc_std'].mean() * 100, exp_grouped['best_val_acc_std'].mean() * 100, exp_grouped['test_acc_std'].mean() * 100
accuracy_std_mean = np.mean([train_std_mean, val_std_mean, test_std_mean])

exp_grouped = exp_grouped.sort_values('test_acc', ascending=False)[columns]

#display(color_by_multi_attribute(exp[columns + ['random_seed', 'note']].sort_values(groupby_columns + ['random_seed'], ascending=False), main_attribute='test_acc', attributes=groupby_columns))

# Color display
display(color_by_multi_attribute(exp_grouped, main_attribute='test_acc', 
                                 attributes=['extractor_spatial_location', 'stem_spatial_location', 'resblock_spatial_location', 'classifier_spatial_location', 'test_acc', 'test_acc_std'],
                                 cmaps=['Blues', 'YlOrRd', 'YlOrRd', 'YlOrRd', 'YlOrRd'], 
                                 format_dict=latex_format_dict))

# Latex code
latex = exp_grouped.to_latex(index=False, formatters=latex_format_dict).replace("\\textasciitilde", "$\\approx$")

print("\n",latex)
print(f"STD Means -- Global means : ± {accuracy_std_mean} Train : ± {train_std_mean} Val : ± {val_std_mean} - Test : ± {test_std_mean}")


In [None]:
def filter_outliers(df, groupby_columns, outlier_col='nb_epoch_trained'):
    # hacky code... 
    # Return a dataframe mask where each df['nb_epoch_trained'] > mean(df['nb_epoch_trained')] - std(df['nb_epoch_trained'])
    columns_of_interest = [*groupby_columns, outlier_col]
    
    if not isinstance(groupby_columns, list):
        groupby_columns = [groupby_columns]
        
    nb_groupby_cols = len(groupby_columns)
    
    grouped = df[columns_of_interest].groupby(groupby_columns).agg({outlier_col: lambda x: int(np.mean(x) - np.std(x))})
    
    new_filters = None
    for grouped_cols, row in grouped.iterrows():
        if nb_groupby_cols == 1:
            grouped_cols = (grouped_cols, )
   
        new_filter = (df[outlier_col] > row[outlier_col])
        
        for col_name, col_val in zip(groupby_columns, grouped_cols):
            new_filter &= (df[col_name] == col_val)

        if new_filters is None:
            new_filters = new_filter
        else:
            new_filters |= new_filter
            
    return df[new_filters]


def keep_x_best(filtered_df, groupby_columns, nb_to_keep, discriminative_attribute='test_acc'):
    return filtered_df.sort_values([*groupby_columns, discriminative_attribute], ascending=False).groupby(groupby_columns, as_index=False).apply(lambda x: x.iloc[:nb_to_keep])


def print_missing_seeds(df, groupby_cols, all_seeds):
    if not isinstance(all_seeds, set):
        all_seeds = set(all_seeds)
    
    def print_by_group(x):
        missing_seeds = all_seeds - set(x['random_seed'])
        if len(missing_seeds) > 0:
            print(x.name, "  Missing : ", missing_seeds)
            
    df.sort_values(groupby_cols, ascending=False).groupby(groupby_cols).apply(print_by_group)


def show_table2(df, filters, groupby_columns, acc_columns, extra_columns=None, format_dict=None, attribute_by_color=None, mean_std_col=False, display_all=False, hardcoded_cols=None, 
               show_count_col=False, inplace_std=False, remove_outliers=False, print_latex=True, nb_to_keep=None, all_seeds=None):
    
    exp = df[filters]
    
    if extra_columns is None:
        extra_columns = []
    
    if attribute_by_color is None:
        attribute_by_color = {}
        
    if all_seeds is not None:
        print_missing_seeds(exp, groupby_columns, all_seeds)

    # Drop duplicates (Same experiment ran multiple time)
    exp = exp.sort_values('date', ascending=False).drop_duplicates([*groupby_columns, 'random_seed', 'input_type', 'config'],keep='first')
        
    if remove_outliers:
        exp = filter_outliers(exp, groupby_columns)
    
    if nb_to_keep is not None:
        exp = keep_x_best(exp, groupby_columns, nb_to_keep, 'test_acc')

    # Grouping - Mean & Std calc
    acc_std_columns = [f"{c}_std" for c in acc_columns]
    columns = groupby_columns + acc_columns + extra_columns
    columns_to_show = [*groupby_columns, *acc_columns, *extra_columns]
    
    # All experiments
    if display_all:
        all_exp = exp.sort_values([*groupby_columns, 'random_seed'], ascending=False)[columns_to_show]
        display(color_by_multi_attribute(all_exp, 
                                         main_attribute='test_acc', 
                                         attributes=groupby_columns,
                                         format_dict=format_dict))

    exp_grouped = groupby_mean(exp, groupby_columns, acc_columns, columns, add_count_col=show_count_col, add_std_str=True, inplace_std_str=inplace_std).sort_values('test_acc', ascending=False).reset_index(drop=True)
    
    if hardcoded_cols is not None:
        for col_name, col_conf in hardcoded_cols.items():
            
            if col_conf['type'] == "replace_groupby":
                columns_to_show = [c for c in columns_to_show if c not in groupby_columns]
            
            exp_grouped[col_name] = col_conf['values']
            columns_to_show = [col_name, *columns_to_show]
            
    if inplace_std:
        mean_std_col = False
        columns_to_show = [c if c not in acc_columns else f'{c}_std' for c in columns_to_show]
        attribute_by_color = {(name if name not in acc_columns else f"{name}_std"):color for name, color in attribute_by_color.items()}
    
    if mean_std_col:
        exp_grouped['mean_std'] = exp_grouped[acc_std_columns].mean(axis=1)
        
        if len(extra_columns) == 0:
            up_to_extra_col = columns_to_show
        else:
            up_to_extra_col = columns_to_show[:columns_to_show.index(extra_columns[0])]
            
        columns_to_show = [*up_to_extra_col, 'mean_std', *extra_columns]

    if show_count_col:
        columns_to_show.append('count')
        
    exp_grouped = exp_grouped[columns_to_show]
    
    main_attribute_to_color = 'test_acc' if not inplace_std else 'test_acc_std'

    # Color display      
    display(color_by_multi_attribute(exp_grouped, main_attribute=main_attribute_to_color,
                                     attributes=list(attribute_by_color.keys()), 
                                     cmaps=['Blues', *list([v for v in attribute_by_color.values() if v is not None])],
                                     format_dict=format_dict))

    # Latex code
    if print_latex:
        latex = exp_grouped[columns_to_show].to_latex(index=False, formatters=format_dict, escape=False).replace("\\textasciitilde", "$\\approx$").replace(" ± ", " ±")
        latex = re.sub(r'&\s*', '& ', latex)

        print("\n",latex)
    
    return exp_grouped


def get_table_extractor_type_filters(df):
    filters = (df['date'] >= '2020-09-15')

    # -- Input parameters
    filters &= (df['resized_width'].isnull())
    filters &= (~df['normalisation'].str.contains("imagenet_stats", na=False))

    # -- Text Processing
    filters &= (df['rnn_state_size'] == 4096)              # G

    # -- Coordconv
    filters &= (df['extractor_spatial_location'] == 'None')
    filters &= (df['stem_spatial_location'] == 'Both')
    filters &= (df['resblock_spatial_location'] == 'Both')
    filters &= (df['classifier_spatial_location'] == 'Both')

    # -- Resblocks
    filters &= (df['nb_resblock'] == 4)                    # J
    filters &= (df['resblocks_out_chan'] == 128)           # M

    # -- Classifier
    filters &= (df['classifier_conv_out'] == 512)          # C
    filters &= (df['classifier_projection_out'] == 1024)    # H

    # Variable Parameters (Input_type & Extractor)
    filters &= (~df['RGB_colormap'].str.contains('Blues', na=False))

    input_1d_filter = (df['input_type'].str.contains('1D'))
    input_1d_filter &= (df['n_fft'] == 512)
    input_1d_filter &= (df['keep_freq_point'] == 256) 
    input_1d_filter &= (df['hop_length'] == 2048)

    filters &= ((df['input_type'].str.contains('RGB') & (df['extractor_type'].str.contains('Resnet'))) | input_1d_filter )

    #filters &= (df['extractor_type'].str.contains('Resnet|Parallel') | (df['extractor_type'].str.startswith('Interleaved')))
    #filters &= (df['extractor_type'].str.contains('Baseline') | (df['extractor_type'].str.contains('Resnet')))
    
    #filters &= (experiments['extractor_type'].str.contains('Parallel|Interleaved|Resnet|Baseline'))
    filters &= (experiments['extractor_type'].str.contains('Parallel|Interleaved|Resnet'))

    # -- Other
    filters &= (df['note'].str.contains('table_1|table_2_final_final'))
    #filters &= (df['random_seed'] == 876944)   # FIXME : Add more seeds and do the mean
    
    filters |= (df['note'].str.contains('fix_size'))
    
    filters |= (df['config'].str.contains('table_1_conv_2d_extractor_32_smaller_filters'))
    
    return filters


def get_table_coordconv_per_q_type_filters(df):
    filters = (df['date'] >= '2020-11-20')
    #filters = (df['date'] >= '2020-09-20')

    # -- Input parameters
    filters &= (df['input_type'].str.contains('1D'))
    filters &= (df['n_fft'] == 512)
    filters &= (df['keep_freq_point'] == 256)
    filters &= (df['hop_length'] == 2048)
    filters &= (df['resized_width'].isnull())

    # -- Text Processing
    filters &= (df['rnn_state_size'] == 1024)              # G

    # -- Extractor
    filters &= (df['extractor_nb_block'] == 3)             # K
    #filters &= (df['extractor_filters'] == [8, 16, 32])   # N
    filters &= (df['extractor_projection_size'] == 64)     # P

    # -- Resblocks
    filters &= (df['nb_resblock'] == 3)                   # J       <---- FIXME: Make sure this is the good values
    filters &= (df['resblocks_out_chan'] == 64)           # M       <---- FIXME: Make sure this is the good values

    # -- Classifier
    filters &= (df['classifier_conv_out'] == 128)          # C      <---- FIXME: Make sure this is the good values
    filters &= (df['classifier_projection_out'] == 512)    # H      <---- FIXME: Make sure this is the good values

    # Variable Parameters (CoordConv)
    filters &= df['extractor_spatial_location'].str.contains('None')
    specific_configs = ((df['stem_spatial_location'].str.contains('Both')) & (df['resblock_spatial_location'].str.contains('Both')) & (df['classifier_spatial_location'].str.contains('Both')))
    specific_configs |= ((df['stem_spatial_location'].str.contains('None')) & (df['resblock_spatial_location'].str.contains('Time')) & (df['classifier_spatial_location'].str.contains('None')))
    specific_configs |= ((df['stem_spatial_location'].str.contains('Freq')) & (df['resblock_spatial_location'].str.contains('None')) & (df['classifier_spatial_location'].str.contains('None')))
    specific_configs |= ((df['stem_spatial_location'].str.contains('None')) & (df['resblock_spatial_location'].str.contains('None')) & (df['classifier_spatial_location'].str.contains('None')))

    filters &= specific_configs

    # -- Other
    filters &= (df['note'].str.contains('table_7|table_5_final'))
    
    return filters


def get_table_coordconv_placement_filters(df):
    filters = (df['date'] >= '2020-11-20')
    #filters = (df['date'] >= '2020-09-20')

    # -- Input parameters
    filters &= (df['input_type'].str.contains('1D'))
    filters &= (df['n_fft'] == 512)
    filters &= (df['keep_freq_point'] == 256)
    filters &= (df['hop_length'] == 2048)
    filters &= (df['resized_width'].isnull())

    # -- Text Processing
    filters &= (df['rnn_state_size'] == 1024)              # G

    # -- Extractor
    filters &= (df['extractor_nb_block'] == 3)             # K
    #filters &= (df['extractor_filters'] == [8, 16, 32])   # N
    filters &= (df['extractor_projection_size'] == 64)     # P

    # -- Resblocks
    filters &= (df['nb_resblock'] == 3)                   # J       <---- FIXME: Make sure this is the good values
    filters &= (df['resblocks_out_chan'] == 64)           # M       <---- FIXME: Make sure this is the good values

    # -- Classifier
    filters &= (df['classifier_conv_out'] == 128)          # C      <---- FIXME: Make sure this is the good values
    filters &= (df['classifier_projection_out'] == 512)    # H      <---- FIXME: Make sure this is the good values

    # Variable Parameters (CoordConv)
    filters &= (df['extractor_spatial_location'].str.contains('None|Time|Freq|Both'))
    filters &= (df['stem_spatial_location'].str.contains('None|Time|Freq|Both'))
    filters &= (df['resblock_spatial_location'].str.contains('None|Time|Freq|Both'))
    filters &= (df['classifier_spatial_location'].str.contains('None|Time|Freq|Both'))

    # -- Other
    #filters &= (df['note'].str.contains('table_7|table_5_final'))
    #filters &= (df['random_seed'] == 876944)   # FIXME : Add more seeds and do the mean
    
    return filters


def get_table_coordconv_bigger_placement_filters(df):
    filters = (df['date'] >= '2020-11-20')
    #filters = (df['date'] >= '2020-09-20')

    # -- Input parameters
    filters &= (df['input_type'].str.contains('1D'))
    filters &= (df['n_fft'] == 512)
    filters &= (df['keep_freq_point'] == 256)
    filters &= (df['hop_length'] == 2048)
    filters &= (df['resized_width'].isnull())

    # -- Text Processing
    filters &= (df['rnn_state_size'] == 1024)              # G

    # -- Extractor
    filters &= (df['extractor_nb_block'] == 3)             # K
    #filters &= (df['extractor_filters'] == [8, 16, 32])   # N
    filters &= (df['extractor_projection_size'] == 64)     # P

    # -- Resblocks
    filters &= (df['nb_resblock'] == 3)                   # J       <---- FIXME: Make sure this is the good values
    filters &= (df['resblocks_out_chan'] == 64)           # M       <---- FIXME: Make sure this is the good values

    # -- Classifier
    filters &= (df['classifier_conv_out'] == 128)          # C      <---- FIXME: Make sure this is the good values
    filters &= (df['classifier_projection_out'] == 512)    # H      <---- FIXME: Make sure this is the good values

    # Variable Parameters (CoordConv)
    filters &= (df['extractor_spatial_location'].str.contains('None|Time|Freq|Both'))
    filters &= (df['stem_spatial_location'].str.contains('None|Time|Freq|Both'))
    filters &= (df['resblock_spatial_location'].str.contains('None|Time|Freq|Both'))
    filters &= (df['classifier_spatial_location'].str.contains('None|Time|Freq|Both'))

    # -- Other
    #filters &= (df['note'].str.contains('table_7|table_5_final'))
    filters &= (df['note'].str.contains('table_7_bigger'))
    #filters &= (df['random_seed'] == 876944)   # FIXME : Add more seeds and do the mean
    
    return filters


In [None]:
from utils.notebook.experiment_explorer import get_full_sync_experiment_from_drive_script

get_full_sync_experiment_from_drive_script(exp, root_output_path, dryrun=True)[0]

In [None]:
# Baseline model
    # RGB vs 1D
    # Resnet    
    # Mel Vs Spectrograms

# Our model
    # Baseline (4Resblock_128filters) + Parallel extractor
    # Baseline (4Resblock_128filters) + Interleaved Extractor
    # Our best configuration for parallel extractor
    # Our best configuration for Interleaved extractor
    
# Parameter reduction
    # Best extractor (Either parallel or interleaved)
        # GRU
        # Classifier
        # Resblocks

# Dataset Sizes

# One modality

# Batching

In [None]:
# Our model
    # Baseline (4Resblock_128filters) + Parallel extractor
    # Baseline (4Resblock_128filters) + Interleaved Extractor
    # Our best configuration for parallel extractor
    # Our best configuration for Interleaved extractor
    
columns = ['extractor_filters', 'extractor_projection_size', 'test_acc', 'max_freq', 'n_fft', 'keep_freq_point', 'extractor_type', 'nb_trainable_param_round', 'note', 'folder']

# From recent experiments
filters = (experiments['date'] >= '2020-09-01')

# Parallel or Interleaved extractor
filters &= (experiments['extractor_type'].isin(['freq_time_interlaced', 'freq_time_separated']))

# Baseline Resblocks config
filters &= (experiments['nb_resblock'] == 4) & (experiments['resblocks_out_chan'] == 128)

filters &= (experiments['n_mels'].isnull())


exp = experiments[filters].sort_values('test_acc', ascending=False)

# NOTE : Hardcoded... This is dependant on the order of the DF..
names_by_index = {
    500: '1D-ConvLearned',
    391: 'RGB-Resnet-Imagenet-Stats',
    248: 'RGB-ConvLearned',
    130: 'RGB-Resnet-ClearStats',
    392: 'RGB-Resnet-Imagenet-Renorm'
}

#for index, name in names_by_index.items():
#    exp.loc[index, 'name'] = name

exp['name'] = exp.apply(lambda x: '4ResBl-128F', axis=1)
columns = ['name'] + columns

color_by_multi_attribute(exp[columns], main_attribute="extractor_type", attributes=['n_fft'], format_dict=latex_format_dict)

#exp[columns].style.format(latex_format_dict)

In [None]:
# Parameter reduction
    # Best extractor (Either parallel or interleaved)
        # GRU
    
columns = ['rnn_state_size', 'extractor_filters', 'extractor_projection_size', 'test_acc', 'max_freq', 'n_fft', 'keep_freq_point', 'extractor_type', 'nb_trainable_param_round']#, 'note', 'folder']

# From recent experiments
filters = (experiments['date'] >= '2020-09-01')

# Parallel or Interleaved extractor
filters &= (experiments['extractor_type'].isin(['freq_time_interlaced', 'freq_time_separated']))

# Baseline Resblocks config
filters &= (experiments['nb_resblock'] == 3) & (experiments['resblocks_out_chan'] == 64)

filters &= (experiments['n_mels'].isnull())


exp = experiments[filters].sort_values('test_acc', ascending=False)

# NOTE : Hardcoded... This is dependant on the order of the DF..
names_by_index = {
    500: '1D-ConvLearned',
    391: 'RGB-Resnet-Imagenet-Stats',
    248: 'RGB-ConvLearned',
    130: 'RGB-Resnet-ClearStats',
    392: 'RGB-Resnet-Imagenet-Renorm'
}

#for index, name in names_by_index.items():
#    exp.loc[index, 'name'] = name

exp['name'] = exp.apply(lambda x: '3ResBl-64F', axis=1)
columns = ['name'] + columns

color_by_multi_attribute(exp[columns], main_attribute="extractor_type", attributes=['n_fft'], format_dict=latex_format_dict)

#exp[columns].style.format(latex_format_dict)

In [None]:
# Parameter reduction
    # Best extractor (Either parallel or interleaved)
        # Classifier
    
columns = ['classifier_type', 'classifier_conv_out', 'classifier_projection_out', 'classifier_global_pool', 'extractor_filters', 'extractor_projection_size', 'test_acc', 'max_freq', 'n_fft', 'keep_freq_point', 'extractor_type', 'nb_trainable_param_round', 'note', 'folder']

# From recent experiments
filters = (experiments['date'] >= '2020-09-01')

# Parallel or Interleaved extractor
filters &= (experiments['extractor_type'].isin(['freq_time_interlaced', 'freq_time_separated']))

# Baseline Resblocks config
filters &= (experiments['nb_resblock'] == 3) & (experiments['resblocks_out_chan'] == 64)

filters &= (experiments['n_mels'].isnull())


exp = experiments[filters].sort_values('test_acc', ascending=False)

# NOTE : Hardcoded... This is dependant on the order of the DF..
names_by_index = {
    500: '1D-ConvLearned',
    391: 'RGB-Resnet-Imagenet-Stats',
    248: 'RGB-ConvLearned',
    130: 'RGB-Resnet-ClearStats',
    392: 'RGB-Resnet-Imagenet-Renorm'
}

#for index, name in names_by_index.items():
#    exp.loc[index, 'name'] = name

exp['name'] = exp.apply(lambda x: '3ResBl-64F', axis=1)
columns = ['name'] + columns

color_by_multi_attribute(exp[columns], main_attribute="extractor_type", attributes=['n_fft'], format_dict=latex_format_dict)

#exp[columns].style.format(latex_format_dict)

In [None]:
# Parameter reduction
    # Best extractor (Either parallel or interleaved)
        # Resblocks
    
columns = ['nb_resblock', 'resblocks_out_chan', 'extractor_filters', 'extractor_projection_size', 'test_acc', 'max_freq', 'n_fft', 'keep_freq_point', 'extractor_type', 'nb_trainable_param_round', 'note', 'folder']

# From recent experiments
filters = (experiments['date'] >= '2020-09-01')

# Parallel or Interleaved extractor
filters &= (experiments['extractor_type'].isin(['freq_time_interlaced', 'freq_time_separated']))

filters &= (experiments['n_mels'].isnull())


exp = experiments[filters].sort_values('test_acc', ascending=False)

# NOTE : Hardcoded... This is dependant on the order of the DF..
names_by_index = {
    500: '1D-ConvLearned',
    391: 'RGB-Resnet-Imagenet-Stats',
    248: 'RGB-ConvLearned',
    130: 'RGB-Resnet-ClearStats',
    392: 'RGB-Resnet-Imagenet-Renorm'
}

#for index, name in names_by_index.items():
#    exp.loc[index, 'name'] = name

exp['name'] = exp.apply(lambda x: '3ResBl-64F', axis=1)
columns = ['name'] + columns

color_by_multi_attribute(exp[columns], main_attribute="extractor_type", attributes=['n_fft'], format_dict=latex_format_dict)

#exp[columns].style.format(latex_format_dict)

In [None]:
exp

# Table Generation

In [None]:
## Network reduction -- GRU units
df_filter = ((experiments['note'] == 'final_dropout') & (experiments['config'].str.contains('reduction')) & (~experiments['config'].str.contains('proj')) & (~experiments['config'].str.contains('conv')) & (experiments['classifier_type'] == 'fcn'))
columns = ['nb_trainable_param', 'rnn_state_size', 'train_acc', 'best_val_acc', 'test_acc', 'nb_epoch_runned']#, 'train_time']
GRU_table = experiments[df_filter][columns].sort_values('nb_trainable_param', ascending=False).reset_index(drop=True)

text = ['hola', 'quetal', 'muybien', 'tu', 'fff']
GRU_table['text'] = GRU_table.apply(lambda s: text[s.name], axis=1)

# Reorder columns (Set text first)
GRU_table = GRU_table[['text'] + GRU_table.columns.tolist()[:-1]]

#print(GRU_table.to_latex(index=False, formatters=latex_format_dict))
GRU_table.style.format(latex_format_dict)

In [None]:
## Network reduction -- Classifier
df_filter =  (experiments['note'] == 'final') & (experiments['config'].str.contains('reduction'))
df_filter &= (~experiments['config'].str.contains('extractor')) & (~experiments['config'].str.contains('proj'))
df_filter &= (experiments['rnn_state_size'].isin([4096, 1024, 256]))

columns = ['nb_trainable_param', 'rnn_state_size', 'classifier_type', 'classifier_conv_out', 'classifier_projection_out', 'train_acc', 'best_val_acc', 'test_acc', 'nb_epoch_trained']#, 'train_time']
classifier = experiments[df_filter].sort_values('nb_trainable_param', ascending=False)[columns]

#print(classifier.to_latex(index=False, formatters=latex_format_dict))
classifier.style.format(latex_format_dict)

In [None]:
## Network reduction -- Reduction Filters/Nb Resblocks
df_filter =  (experiments['note'] == 'final') & (experiments['config'].str.contains('reduction'))
df_filter &= (experiments['config'].str.contains('extractor')) & (~experiments['config'].str.contains('proj'))
df_filter &= (experiments['rnn_state_size'].isin([4096, 1024, 256]))

only_rnn_reduction_filter = (experiments['note'] == 'final') & (experiments['config'].isin(['reduction_original_rnn_1024_fcn_no_conv_hidden_256', "reduction_original_rnn_1024_fcn_conv_256_hidden_512"]))

df_filter |= only_rnn_reduction_filter

columns = ['nb_trainable_param', 'extractor_out_chan', 'stem_out_chan', 'nb_resblock', 'classifier_conv_out', 'classifier_projection_out', 'train_acc', 'best_val_acc', 'test_acc', 'nb_epoch_runned']#, 'train_time']

reduction_experiments = experiments[df_filter].sort_values('nb_trainable_param', ascending=False)[columns]

#print(reduction_experiments.to_latex(index=False, formatters=latex_format_dict))
reduction_experiments.style.format(latex_format_dict)

In [None]:
## Feature Extractor -- Parallel Extractor
df_filter = (experiments['note'] == 'extractor') & (experiments['extractor_type'] == 'freq_time_separated')

columns = ['extractor_filters', 'extractor_projection_size', 'extractor_out_chan', 'train_acc', 'best_val_acc', 'test_acc', 'nb_trainable_param']#, 'train_time']

parallel_extractor = experiments[df_filter]

# Average Results grouped over ['config', 'nb_scene', 'nb_q_per_scene']
parallel_extractor = groupby_mean(parallel_extractor, 
                                     groupby_columns=['config', 'nb_scene', 'nb_q_per_scene'],
                                     mean_columns=['train_acc', 'best_val_acc', 'test_acc'],
                                    selected_columns=columns,
                                    add_count_col=False)

#parallel_extractor = convert_cols_to_int(parallel_extractor, ['nb_epoch_runned'])

parallel_extractor = parallel_extractor.sort_values('nb_trainable_param', ascending=False)#[columns]

#print(parallel_extractor.to_latex(index=False, formatters=latex_format_dict))
parallel_extractor.style.format(latex_format_dict)

In [None]:
## Feature Extractor -- Interlaced Extractor -- Time First
df_filter = (experiments['note'] == 'extractor') & (experiments['extractor_type'] == 'freq_time_interlaced')
df_filter &= (experiments['config'].str.contains('timefirst'))

columns = ['nb_trainable_param', 'extractor_nb_block', 'extractor_filters', 'extractor_projection_size', 'extractor_out_chan', 'train_acc', 'best_val_acc', 'test_acc', 'nb_epoch_runned']#, 'train_time']

interleaved_extractor_timefirst = experiments[df_filter]

# Average Results grouped over ['config', 'nb_scene', 'nb_q_per_scene']
interleaved_extractor_timefirst = groupby_mean(interleaved_extractor_timefirst, 
                                     groupby_columns=['config', 'nb_scene', 'nb_q_per_scene'],
                                     mean_columns=['train_acc', 'best_val_acc', 'test_acc', 'nb_epoch_runned'],
                                    selected_columns=columns,
                                    add_count_col=False)

interleaved_extractor_timefirst = convert_cols_to_int(interleaved_extractor_timefirst, ['nb_epoch_runned'])
interleaved_extractor_timefirst = interleaved_extractor_timefirst.sort_values('test_acc', ascending=False)#[columns]

#print(interleaved_extractor_timefirst.to_latex(index=False, formatters=latex_format_dict))
interleaved_extractor_timefirst.style.format(latex_format_dict)

In [None]:
## Feature Extractor -- Interlaced Extractor -- Freq First
df_filter = (experiments['note'] == 'extractor') & (experiments['extractor_type'] == 'freq_time_interlaced')
df_filter &= (~experiments['config'].str.contains('timefirst'))

columns = ['nb_trainable_param', 'extractor_nb_block', 'extractor_filters', 'extractor_projection_size', 'extractor_out_chan', 'train_acc', 'best_val_acc', 'test_acc', 'nb_epoch_runned']#, 'train_time']

interleaved_extractor_freqfirst = experiments[df_filter]

# Average Results grouped over ['config', 'nb_scene', 'nb_q_per_scene']
interleaved_extractor_freqfirst = groupby_mean(interleaved_extractor_freqfirst, 
                                     groupby_columns=['config', 'nb_scene', 'nb_q_per_scene'],
                                     mean_columns=['train_acc', 'best_val_acc', 'test_acc', 'nb_epoch_runned'],
                                    selected_columns=columns,
                                    add_count_col=False)

interleaved_extractor_freqfirst = convert_cols_to_int(interleaved_extractor_freqfirst, ['nb_epoch_runned'])
interleaved_extractor_freqfirst = interleaved_extractor_freqfirst.sort_values('nb_trainable_param', ascending=False)#[columns]

#print(interleaved_extractor_freqfirst.to_latex(index=False, formatters=latex_format_dict))
interleaved_extractor_freqfirst.style.format(latex_format_dict)

In [None]:
## Dataset size comparison -- Mixed -- 100k, 200k, 400k samples
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
import numpy as np


fig_name = "dataset_size_all_samples.pdf"
df_filter = (experiments['note'] == 'dataset_size')
columns = ['nb_sample', 'nb_scene', 'nb_q_per_scene', 'test_acc']
dataset_size = experiments[df_filter].sort_values('nb_q_per_scene')


fig, ax = plt.subplots()
lines = []

grouped_by_sample_400k = dataset_size[dataset_size['nb_sample'] == 400000]

# Get colorlist
group_unique_keys = grouped_by_sample_400k['nb_scene'].unique()
colorlist = {key: colors.rgb2hex(matplotlib.cm.gist_rainbow_r(i)) for key, i in
             zip(group_unique_keys, np.linspace(0, 0.9, len(group_unique_keys)))}

# Plot 400k lines & markers
lines += ax.plot(grouped_by_sample_400k['nb_q_per_scene'], grouped_by_sample_400k['test_acc'], linewidth=1, linestyle=':', zorder=1)
grouped_scatter(grouped_by_sample_400k, 'nb_scene', 'nb_q_per_scene', 'test_acc', ax = ax, show_label=True, colorlist=colorlist, 
                label_modifier=lambda n: f"{int(n/1000)}k scenes  ", additional_params={"marker": ",", "zorder":2, "edgecolor":lines[0].get_markerfacecolor(), "linewidth":1})

# Plot 200k lines & markers
grouped_by_sample_200k = dataset_size[dataset_size['nb_sample'] == 200000]
lines += ax.plot(grouped_by_sample_200k['nb_q_per_scene'], grouped_by_sample_200k['test_acc'], linewidth=1, linestyle=':', zorder=1)
grouped_scatter(grouped_by_sample_200k, 'nb_scene', 'nb_q_per_scene', 'test_acc', ax = ax, show_label=False, colorlist=colorlist, additional_params={"marker": ",", "zorder":2, "edgecolor":lines[1].get_markerfacecolor(), "linewidth":1})

# Plot 100k lines & markers
grouped_by_sample_100k = dataset_size[dataset_size['nb_sample'] == 100000]
lines += ax.plot(grouped_by_sample_100k['nb_q_per_scene'], grouped_by_sample_100k['test_acc'], linewidth=1, linestyle=':', zorder=1)
grouped_scatter(grouped_by_sample_100k, 'nb_scene', 'nb_q_per_scene', 'test_acc', ax = ax, show_label=False, colorlist=colorlist, additional_params={"marker": ",", "zorder":2, "edgecolor":lines[2].get_markerfacecolor(), "linewidth":1})

# Remove marker border from legend
for legend_handle in ax.get_legend().legendHandles:
    legend_handle.set_linewidths(0)

# Add Second legend
ax.add_artist(matplotlib.legend.Legend(ax, lines, ['400k samples', '200k samples', '100k samples'], loc='center right'))

# Set axis infos
ax.set_xscale('log')
ax.set_xticks(dataset_size['nb_q_per_scene'].unique())
ax.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
ax.set_xlim([0.9, 43])
ax.set_xlabel('Number of question per scene')
ax.set_ylabel('Accuracy')

#fig.savefig(f"stats/{fig_name}", bbox_inches='tight')

In [None]:
## Dataset size comparison -- Mixed -- 100k, 200k, 400k samples
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
import seaborn as sns

fig_name = "dataset_size_all_samples_giampi.pdf"
df_filter = (experiments['note'] == 'dataset_size')
columns = ['nb_sample', 'nb_scene', 'nb_q_per_scene', 'test_acc']
dataset_size = experiments[df_filter].sort_values('nb_q_per_scene')
#print(dataset_size)

fig, ax = plt.subplots()
sns.set_palette("colorblind")
g = sns.barplot(x='nb_scene', y='test_acc', hue='nb_sample', data=dataset_size)
# the following is an ugly hack that only works if the accuracy values are all different!!
for patch in ax.patches:
    if np.isnan(patch.get_height()):
        continue
    idx = np.argmin(np.abs(dataset_size.test_acc-patch.get_height()))
    #g.text(patch.get_x()+patch.get_width()/2, patch.get_height(), dataset_size.nb_q_per_scene[idx], color='black', ha='center')
    g.text(patch.get_x()+patch.get_width()/2, 0.96, dataset_size.nb_q_per_scene[idx], color='black', ha='center')
g.text(1.5, 1.01, '# questions per scene', color='black')
plt.xlabel('# scenes')
plt.ylabel('Accuracy')
h, l = ax.get_legend_handles_labels()
ax.legend(h, l, title='# examples', loc='lower left')

#fig.savefig(f"stats/{fig_name}", bbox_inches='tight')

In [None]:
dataset_size

In [None]:
fig_name = "dataset_size_all_samples_giampi2.pdf"
df_filter = (experiments['note'] == 'dataset_size')
columns = ['nb_sample', 'nb_scene', 'nb_q_per_scene', 'test_acc']
dataset_size = experiments[df_filter].sort_values('nb_q_per_scene')
#print(dataset_size)

fig, ax = plt.subplots()
sns.set_palette("colorblind")
g = sns.barplot(x='nb_sample', y='test_acc', hue='nb_scene', data=dataset_size)
# the following is an ugly hack that only works if the accuracy values are all different!!
for patch in ax.patches:
    if np.isnan(patch.get_height()):
        continue
    idx = np.argmin(np.abs(dataset_size.test_acc-patch.get_height()))
    #g.text(patch.get_x()+patch.get_width()/2, patch.get_height(), dataset_size.nb_q_per_scene[idx], color='black', ha='center')
    g.text(patch.get_x()+patch.get_width()/2, 0.96, dataset_size.nb_q_per_scene[idx], color='black', ha='center')
g.text(0.5, 1.01, '# questions per scene', color='black')
plt.xlabel('# examples')
plt.ylabel('Accuracy')
h, l = ax.get_legend_handles_labels()
ax.legend(h, l, title='# scenes', loc='lower right')

#fig.savefig(f"stats/{fig_name}", bbox_inches='tight')

In [None]:
fig_name = "dataset_size_all_samples_giampi3.pdf"
df_filter = (experiments['note'] == 'dataset_size')
columns = ['nb_sample', 'nb_scene', 'nb_q_per_scene', 'test_acc']
dataset_size = experiments[df_filter].sort_values('nb_q_per_scene')
subset100k = dataset_size.nb_sample == 100000
subset200k = dataset_size.nb_sample == 200000
subset400k = dataset_size.nb_sample == 400000
#print(dataset_size)

fig, ax = plt.subplots(1,3, figsize=(6,4), sharey=True)
#sns.set_palette("colorblind")
g = sns.barplot(x='nb_q_per_scene', y='test_acc', data=dataset_size[subset100k], color='dodgerblue', ax=ax[0])
ax[0].set_xlabel('')
ax[0].set_xticklabels(['100k x 1', '50k x 2', '20k x 5', '10k x 10'], rotation=90)
ax[0].set_ylabel('Accuracy')
ax[0].set_title('100k records')
mean = dataset_size[subset100k]['test_acc'].mean()
ax[0].axhline(y=mean, linestyle="--", color='orange')

g = sns.barplot(x='nb_q_per_scene', y='test_acc', data=dataset_size[subset200k], color='dodgerblue', ax=ax[1])
ax[1].set_xlabel('(# scenes) x (# questions per scene)')
ax[1].set_xticklabels(['200k x 1', '100k x 2', '50k x 4', '20k x 10', '10k x 20'], rotation=90)
ax[1].set_ylabel('')
ax[1].set_title('200k records')
mean = dataset_size[subset200k]['test_acc'].mean()
ax[1].axhline(y=mean, linestyle="--", color='orange')

g = sns.barplot(x='nb_q_per_scene', y='test_acc', data=dataset_size[subset400k], color='dodgerblue', ax=ax[2])
ax[2].set_xlabel('')
ax[2].set_xticklabels(['400k x 1', '200k x 2', '100k x 4', '50k x 8', '20k x 20', '10k x 40'], rotation=90)
ax[2].set_ylabel('')
ax[2].set_title('400k records')
mean = dataset_size[subset400k]['test_acc'].mean()
ax[2].axhline(y=mean, linestyle="--", color='orange')

plt.subplots_adjust(wspace=-0.5)
plt.tight_layout()
#h, l = ax.get_legend_handles_labels()
#ax.legend(h, l, title='# scenes', loc='lower right')

fig.savefig(f"stats/{fig_name}", bbox_inches='tight')

In [None]:
print([f for f in dir(ax[2]) if '_' not in f])

In [None]:
ax[2].lines[3].get_ydata()

In [None]:
# Dataset size 400k - Data

columns = ['nb_trainable_param', 'train_acc', 'best_val_acc', 'test_acc', 'nb_epoch_runned']#, 'train_time']

#print(grouped_by_sample_400k[columns].to_latex(index=False, formatters=latex_format_dict))
grouped_by_sample_400k[columns].style.format(latex_format_dict)

In [None]:
# Dataset size 200k - Data

columns = ['nb_trainable_param', 'train_acc', 'best_val_acc', 'test_acc', 'nb_epoch_runned']#, 'train_time']

#print(grouped_by_sample_200k[columns].to_latex(index=False, formatters=latex_format_dict))
grouped_by_sample_200k[columns].style.format(latex_format_dict)

In [None]:
# Dataset size 100k - Data

columns = ['nb_trainable_param', 'train_acc', 'best_val_acc', 'test_acc', 'nb_epoch_runned']#, 'train_time']

#print(grouped_by_sample_100k[columns].to_latex(index=False, formatters=latex_format_dict))
grouped_by_sample_100k[columns].style.format(latex_format_dict)