In [None]:
def render_mpl_table(data, col_width=25.0, row_height=0.525, font_size=10,
                     header_color='#40466e', row_colors=['#f1f1f2', 'w'], edge_color='w',
                     bbox=[0, 0, 1, 1], header_columns=0,
                     ax=None, **kwargs):
    if ax is None:
        size = (np.array(data.shape[::-1]) + np.array([0, 1])) * np.array([col_width, row_height])
        fig, ax = plt.subplots(figsize=size)
        ax.axis('off')
    mpl_table = ax.table(cellText=data.values, bbox=bbox, colLabels=data.columns, **kwargs)
    mpl_table.auto_set_font_size(False)
    mpl_table.set_fontsize(font_size)

    for k, cell in mpl_table._cells.items():
        cell.set_edgecolor(edge_color)
        if k[0] == 0 or k[1] < header_columns:
            cell.set_text_props(weight='bold', color='w')
            cell.set_facecolor(header_color)
        else:
            cell.set_facecolor(row_colors[k[0]%len(row_colors) ])
    return ax.get_figure(), ax

fig,ax = render_mpl_table(dfred, header_columns=0, col_width=2.0)
fig.savefig("table_mpl.png")

In [None]:
df2000red = df2000[['model_unique_name','out', 'val_accuracy_score','val_balanced_accuracy_score', 'val_explained_variance_score']].copy()
df250red = df250[['model_unique_name','out', 'val_accuracy_score','val_balanced_accuracy_score', 'val_explained_variance_score']].copy()

df2000red = df2000red.rename(columns = {'val_accuracy_score':'val_accuracy_score_2000' ,'val_balanced_accuracy_score' : 'val_balanced_accuracy_score_2000', 'val_explained_variance_score':'val_explained_variance_score_2000'})
df250red = df250red.rename(columns = {'val_accuracy_score':'val_accuracy_score_250' ,'val_balanced_accuracy_score' : 'val_balanced_accuracy_score_250', 'val_explained_variance_score':'val_explained_variance_score_250'})

#we create a column with the important metric for each task 2000
df2000red['val_metric_2000'] = df2000red['val_balanced_accuracy_score_2000'][(df2000red['out'] == 'sex')]
df2000red['val_metric_2000'].update(df2000red['val_balanced_accuracy_score_2000'][(df2000red['out'] == 'mood_disorder')])
df2000red['val_metric_2000'].update(df2000red['val_accuracy_score_2000'][(df2000red['out'] == 'srt_right_ear_classification')])
df2000red['val_metric_2000'].update(df2000red['val_accuracy_score_2000'][(df2000red['out'] == 'alc_int_freq')])
df2000red['val_metric_2000'].update(df2000red['val_explained_variance_score_2000'][(df2000red['out'] == 'mean_fa_fornix')])

#we create a column with the important metric for each task 250
df250red['val_metric_250'] = df250red['val_balanced_accuracy_score_250'][(df250red['out'] == 'sex')]
df250red['val_metric_250'].update(df250red['val_balanced_accuracy_score_250'][(df250red['out'] == 'mood_disorder')])
df250red['val_metric_250'].update(df250red['val_accuracy_score_250'][(df250red['out'] == 'srt_right_ear_classification')])
df250red['val_metric_250'].update(df250red['val_accuracy_score_250'][(df250red['out'] == 'alc_int_freq')])
df250red['val_metric_250'].update(df250red['val_explained_variance_score_250'][(df250red['out'] == 'mean_fa_fornix')])

          
df2000red = df2000red.sort_values('out')
df250red = df250red.sort_values('out')

In [None]:
val_metric_2000 = df2000red['val_metric_2000'].tolist()
#val_metric_2000

In [None]:
dfred = df250red.copy()
dfred[['val_accuracy_score_2000', 'val_balanced_accuracy_score_2000','val_explained_variance_score_2000','val_metric_2000']] = df2000red[['val_accuracy_score_2000', 'val_balanced_accuracy_score_2000','val_explained_variance_score_2000', 'val_metric_2000']]
#dfred[['val_accuracy_score_8000', 'val_balanced_accuracy_score_8000','val_explained_variance_score_8000']] = df8000red[['val_accuracy_score_8000', 'val_balanced_accuracy_score_8000','val_explained_variance_score_8000']]

In [None]:
out = pd.unique(dfred['out'].tolist())
model_unique_name = pd.unique(dfred['model_unique_name'].tolist())
val_accuracy_score_250 = dfred['val_accuracy_score_250'].tolist()
val_accuracy_score_2000 = dfred['val_accuracy_score_2000'].tolist()

In [None]:
def plot_learning_rate(data, label, metric):
    df = data[(data['out'] == label)]
    df = data[['model_unique_name', f"{metric}_250", f"{metric}_2000"]].copy()
    

In [None]:
data = dfred[(dfred['out'] == 'sex')]
model_unique_name = data['model_unique_name'].tolist()
val_accuracy_score_250 = data['val_accuracy_score_250'].tolist()
val_accuracy_score_2000 = data['val_accuracy_score_2000'].tolist()
label = 'sex'
metric = 'val_accuracy_score'
sample_size = [250, 2000]
twofifty = [250] * 105
#df = data[['model_unique_name', f"{metric}_250", f"{metric}_2000"]].copy()
#df.plot(kind='scatter',x=sample_size,y=f'val_accuracy_score_{sample_size}',color='blue', figsize =(22, 8), title=f'{label}')

for index, model in enumerate(model_unique_name):
    
    val_scores = [val_accuracy_score_250[index], val_accuracy_score_2000[index]]  
    nsamples = [sample_size] * len(val_accuracy_score_250)
    plt.errorbar(nsamples[index],val_scores, label = model, )
    
plt.legend()
plt.show()

In [None]:
datasex = dfred[(dfred['out'] == 'sex')]
sns.relplot(data=datasex, kind ='line')
plt.show()

In [None]:
#dftask = dfred[(dfred['out'] == 'mood_disorder')]
for model in model_unique_name: 
    sns.lineplot(  
           data= dfred[(dfred['model_unique_name'] == f'{model}')][(dfred['out'] == 'srt_right_ear_classification')], 
           x='nsamples', y='val_metric',  label = f'{model}', err_style="bars", style='TL_type', legend = False)
plt.ylim(0,1)
plt.legend()
plt.show()

In [None]:
#loop to compute the mean and standard deviation of the 3cv
dfredstats = pd.DataFrame({'out':['NaN']* (len(out)*len(model_unique_name)), 'model_unique_name':['NaN']*35, 'mean_250':['NaN']*35,'std_250':['NaN']*35, 'mean_2000':['NaN']*35, 'std_2000':['NaN']*35})
mean_250, std_250, mean_2000, std_2000, modellist, outlist = [], [],[],[],[],[]
for task in out:
    dfredstats['out'].update(dfredstats['out'][(dfredstats['out'] == 'mood_disorder')])
    for index, model in enumerate(model_unique_name):
        mean_250.append(pd.Series.mean(dfred['val_metric_250'][(dfred['model_unique_name'] == f'{model}')]))
        std_250.append(pd.Series.std(dfred['val_metric_250'][(dfred['model_unique_name'] == f'{model}')]))
        mean_2000.append(pd.Series.mean(dfred['val_metric_2000'][(dfred['model_unique_name'] == f'{model}')]))
        std_2000.append(pd.Series.std(dfred['val_metric_2000'][(dfred['model_unique_name'] == f'{model}')]))
        modellist.append(model)
        outlist.append(task)
dfredstats['model_unique_name'] = modellist
dfredstats['out'] = outlist
dfredstats['std_250'] = std_250
dfredstats['mean_2000'] = mean_2000
dfredstats['std_2000'] = std_2000
dfredstats['mean_250'] = mean_250
        #dfredstats['mean_8000'] = mean_8000
        #dfredstats['std_8000'] = std_8000
        #print(task, model, mean_250, std_250, mean_2000, std_2000)

In [None]:
def plot_metric_and_label(df, label, metric, n_samples):
    data = df[(df['out'] == label)]
    dflbl = data[['model_unique_name', f"{metric}_250", f"{metric}_2000"]].copy()
    dflbl.plot(kind='scatter',x='model_unique_name',y=f'{metric}_{n_samples}',color='blue', figsize =(22, 8), title=f'{label}')
    plt.show()
    
plot_metric_and_label(dfred, 'srt_right_ear_classification', 'val_accuracy_score', '250')

In [None]:
#dftask = dfred[(dfred['out'] == 'mood_disorder')]
for model in model_unique_name: 
    sns.lineplot(kind='point',
           data= dfred[(dfred['model_unique_name'] == f'{model}')][(dfred['out'] == 'sex')], 
           x='nsamples', y='val_balanced_accuracy_score', col = 'model_unique_name', label = f'{model}')
plt.legend()
plt.show()