# Evaluate the Finetune Results

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import os
import dict_dtype, utils

import importlib

importlib.reload(dict_dtype)
importlib.reload(utils)

<module 'utils' from '/scratch/ylee/ChemLlama/eval/utils.py'>

In [2]:
from pandas import factorize

In [3]:
pd.set_option('display.max_rows', None) # show all
pd.reset_option('display.max_rows') # undo

## Load csv

In [4]:
dir_csv_main = '../finetune/evaluations'
list_sub_dir = sorted([d for d in os.listdir(dir_csv_main) if os.path.isdir(os.path.join(dir_csv_main, d)) and "ft_version" in d])

print(list_sub_dir)
list_csv = sorted([f for f in os.listdir(f"{dir_csv_main}/{list_sub_dir[0]}") if f.endswith('.csv')])
list_csv

['ft_version_0', 'ft_version_1', 'ft_version_2', 'ft_version_3', 'ft_version_4']


['ChemBart_Medium_10m.csv',
 'ChemBart_Medium_20m.csv',
 'ChemBart_Medium_30m.csv',
 'ChemBart_Small_10m.csv',
 'ChemBart_Small_20m.csv',
 'ChemBart_Small_30m.csv',
 'ChemBerta_Medium_10m.csv',
 'ChemBerta_Medium_20m.csv',
 'ChemBerta_Medium_30m.csv',
 'ChemBerta_Small_10m.csv',
 'ChemBerta_Small_20m.csv',
 'ChemBerta_Small_30m.csv',
 'ChemLlama_Medium_10m.csv',
 'ChemLlama_Medium_20m.csv',
 'ChemLlama_Medium_30m.csv',
 'ChemLlama_Small_10m.csv',
 'ChemLlama_Small_20m.csv',
 'ChemLlama_Small_30m.csv']

In [5]:
list_df = list()
for ver, local_sub_dir in enumerate(list_sub_dir):
    for local_csv in list_csv:
        local_df = pd.read_csv(f"{dir_csv_main}/{local_sub_dir}/{local_csv}")
        local_df.insert(loc=0, column='ft_version', value=[ver]*local_df.shape[0])
        list_df.append(local_df)

list_df    

[     ft_version       model_mtr_name  model_mtr_ep         dataset_name  \
 0             0  ChemBart_Medium_10m           0.0  bace_classification   
 1             0  ChemBart_Medium_10m           0.0  bace_classification   
 2             0  ChemBart_Medium_10m           0.0  bace_classification   
 3             0  ChemBart_Medium_10m           0.0  bace_classification   
 4             0  ChemBart_Medium_10m           0.0  bace_classification   
 ..          ...                  ...           ...                  ...   
 289           0  ChemBart_Medium_10m           6.0         tox21_sr_p53   
 290           0  ChemBart_Medium_10m           6.0         tox21_sr_p53   
 291           0  ChemBart_Medium_10m           6.0         tox21_sr_p53   
 292           0  ChemBart_Medium_10m           6.0         tox21_sr_p53   
 293           0  ChemBart_Medium_10m           6.0         tox21_sr_p53   
 
        dataset_type  metric_1  metric_2  p_value_mantissa  p_value_exponent  \
 0    

In [6]:
df_all_prev = pd.concat(list_df, axis=0, ignore_index=True).astype(dict_dtype.dict_dtype)
df_all_prev['p_value_exponent'] = df_all_prev['p_value_exponent'].fillna(0)
df_all_prev

Unnamed: 0,ft_version,model_mtr_name,model_mtr_ep,dataset_name,dataset_type,metric_1,metric_2,p_value_mantissa,p_value_exponent,epoch,loss,loss_ranking,metric_1_ranking
0,0,ChemBart_Medium_10m,0,bace_classification,classification,0.648188,,0.001820,0.0,0,0.993232,6,6
1,0,ChemBart_Medium_10m,0,bace_classification,classification,0.760688,,1.253343,-8.0,1,0.717989,5,5
2,0,ChemBart_Medium_10m,0,bace_classification,classification,0.805435,,8.985626,-12.0,2,0.650228,0,3
3,0,ChemBart_Medium_10m,0,bace_classification,classification,0.800000,,2.386729,-11.0,3,0.651059,1,4
4,0,ChemBart_Medium_10m,0,bace_classification,classification,0.808333,,5.272629,-12.0,4,0.660707,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
26455,4,ChemLlama_Small_30m,6,tox21_sr_p53,classification,0.773162,,4.677256,-10.0,2,0.181836,5,3
26456,4,ChemLlama_Small_30m,6,tox21_sr_p53,classification,0.770786,,6.656599,-10.0,3,0.169517,4,4
26457,4,ChemLlama_Small_30m,6,tox21_sr_p53,classification,0.808014,,1.806867,-12.0,4,0.155549,0,2
26458,4,ChemLlama_Small_30m,6,tox21_sr_p53,classification,0.820493,,2.071732,-13.0,5,0.160060,1,1


In [7]:
# new_cols = df_all_prev['model_mtr_name'].str.split("_", expand=True).astype('category')
new_cols = df_all_prev['model_mtr_name'].str.split("_", expand=True)
list_new_cols = ['model_type', 'model_size', 'data_size']
new_cols.columns = list_new_cols
df_all = pd.concat([df_all_prev, new_cols], axis=1)
df_all

Unnamed: 0,ft_version,model_mtr_name,model_mtr_ep,dataset_name,dataset_type,metric_1,metric_2,p_value_mantissa,p_value_exponent,epoch,loss,loss_ranking,metric_1_ranking,model_type,model_size,data_size
0,0,ChemBart_Medium_10m,0,bace_classification,classification,0.648188,,0.001820,0.0,0,0.993232,6,6,ChemBart,Medium,10m
1,0,ChemBart_Medium_10m,0,bace_classification,classification,0.760688,,1.253343,-8.0,1,0.717989,5,5,ChemBart,Medium,10m
2,0,ChemBart_Medium_10m,0,bace_classification,classification,0.805435,,8.985626,-12.0,2,0.650228,0,3,ChemBart,Medium,10m
3,0,ChemBart_Medium_10m,0,bace_classification,classification,0.800000,,2.386729,-11.0,3,0.651059,1,4,ChemBart,Medium,10m
4,0,ChemBart_Medium_10m,0,bace_classification,classification,0.808333,,5.272629,-12.0,4,0.660707,2,1,ChemBart,Medium,10m
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26455,4,ChemLlama_Small_30m,6,tox21_sr_p53,classification,0.773162,,4.677256,-10.0,2,0.181836,5,3,ChemLlama,Small,30m
26456,4,ChemLlama_Small_30m,6,tox21_sr_p53,classification,0.770786,,6.656599,-10.0,3,0.169517,4,4,ChemLlama,Small,30m
26457,4,ChemLlama_Small_30m,6,tox21_sr_p53,classification,0.808014,,1.806867,-12.0,4,0.155549,0,2,ChemLlama,Small,30m
26458,4,ChemLlama_Small_30m,6,tox21_sr_p53,classification,0.820493,,2.071732,-13.0,5,0.160060,1,1,ChemLlama,Small,30m


In [8]:
# model type and size
df_all["mts"] = df_all['model_type'] + "_" + df_all["model_size"]
# model type and data size
df_all['mtds'] = df_all['model_type'] + "_" + df_all["data_size"]
# model size and data size
df_all['msds'] = df_all['model_size'] + "_" + df_all["data_size"]
df_all

Unnamed: 0,ft_version,model_mtr_name,model_mtr_ep,dataset_name,dataset_type,metric_1,metric_2,p_value_mantissa,p_value_exponent,epoch,loss,loss_ranking,metric_1_ranking,model_type,model_size,data_size,mts,mtds,msds
0,0,ChemBart_Medium_10m,0,bace_classification,classification,0.648188,,0.001820,0.0,0,0.993232,6,6,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
1,0,ChemBart_Medium_10m,0,bace_classification,classification,0.760688,,1.253343,-8.0,1,0.717989,5,5,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
2,0,ChemBart_Medium_10m,0,bace_classification,classification,0.805435,,8.985626,-12.0,2,0.650228,0,3,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
3,0,ChemBart_Medium_10m,0,bace_classification,classification,0.800000,,2.386729,-11.0,3,0.651059,1,4,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
4,0,ChemBart_Medium_10m,0,bace_classification,classification,0.808333,,5.272629,-12.0,4,0.660707,2,1,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26455,4,ChemLlama_Small_30m,6,tox21_sr_p53,classification,0.773162,,4.677256,-10.0,2,0.181836,5,3,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m
26456,4,ChemLlama_Small_30m,6,tox21_sr_p53,classification,0.770786,,6.656599,-10.0,3,0.169517,4,4,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m
26457,4,ChemLlama_Small_30m,6,tox21_sr_p53,classification,0.808014,,1.806867,-12.0,4,0.155549,0,2,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m
26458,4,ChemLlama_Small_30m,6,tox21_sr_p53,classification,0.820493,,2.071732,-13.0,5,0.160060,1,1,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m


# Regression DF

In [9]:
df_reg = df_all[df_all['dataset_type'] == 'regression']
df_reg

Unnamed: 0,ft_version,model_mtr_name,model_mtr_ep,dataset_name,dataset_type,metric_1,metric_2,p_value_mantissa,p_value_exponent,epoch,loss,loss_ranking,metric_1_ranking,model_type,model_size,data_size,mts,mtds,msds
7,0,ChemBart_Medium_10m,0,bace_regression,regression,0.878964,0.731820,2.095776,-28.0,0,0.375684,0,0,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
8,0,ChemBart_Medium_10m,0,bace_regression,regression,1.015678,0.823957,4.048009,-25.0,1,0.552969,6,6,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
9,0,ChemBart_Medium_10m,0,bace_regression,regression,0.970207,0.802336,2.573282,-24.0,2,0.518979,4,4,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
10,0,ChemBart_Medium_10m,0,bace_regression,regression,0.996983,0.756401,2.810645,-21.0,3,0.488689,3,5,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
11,0,ChemBart_Medium_10m,0,bace_regression,regression,0.889537,0.681805,4.754197,-27.0,4,0.534026,5,1,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26448,4,ChemLlama_Small_30m,6,lipo,regression,0.791097,0.623674,1.391235,-29.0,2,0.612362,4,5,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m
26449,4,ChemLlama_Small_30m,6,lipo,regression,0.780469,0.614752,2.166945,-35.0,3,0.596495,3,3,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m
26450,4,ChemLlama_Small_30m,6,lipo,regression,0.752072,0.583458,3.094517,-39.0,4,0.572883,2,2,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m
26451,4,ChemLlama_Small_30m,6,lipo,regression,0.738526,0.571243,5.030823,-41.0,5,0.561690,1,0,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m


### Best Regression

In [10]:
df_reg_best = df_reg.groupby(by=['ft_version', 'model_mtr_name', 'dataset_name'])['loss'].min().reset_index()
df_reg_best = pd.merge(df_reg, df_reg_best, on=['ft_version', 'model_mtr_name', 'dataset_name', 'loss'])
df_reg_best

Unnamed: 0,ft_version,model_mtr_name,model_mtr_ep,dataset_name,dataset_type,metric_1,metric_2,p_value_mantissa,p_value_exponent,epoch,loss,loss_ranking,metric_1_ranking,model_type,model_size,data_size,mts,mtds,msds
0,0,ChemBart_Medium_10m,4,delaney,regression,0.456320,0.357732,7.292727,-37.0,3,0.371624,0,3,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
1,0,ChemBart_Medium_10m,4,lipo,regression,0.721048,0.566975,2.102943,-43.0,6,0.534508,0,1,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
2,0,ChemBart_Medium_10m,6,bace_regression,regression,0.858430,0.718267,1.841017,-23.0,0,0.373197,0,0,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
3,0,ChemBart_Medium_20m,0,delaney,regression,0.473180,0.379224,7.223428,-40.0,6,0.383850,0,0,ChemBart,Medium,20m,ChemBart_Medium,ChemBart_20m,Medium_20m
4,0,ChemBart_Medium_20m,2,bace_regression,regression,0.815301,0.664588,3.910137,-24.0,3,0.374928,0,0,ChemBart,Medium,20m,ChemBart_Medium,ChemBart_20m,Medium_20m
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,4,ChemLlama_Small_20m,1,bace_regression,regression,0.913419,0.755842,3.620652,-25.0,0,0.388239,0,1,ChemLlama,Small,20m,ChemLlama_Small,ChemLlama_20m,Small_20m
266,4,ChemLlama_Small_20m,6,delaney,regression,0.468015,0.381498,1.597436,-39.0,6,0.392031,0,1,ChemLlama,Small,20m,ChemLlama_Small,ChemLlama_20m,Small_20m
267,4,ChemLlama_Small_30m,1,bace_regression,regression,0.887489,0.721201,1.583451,-25.0,0,0.390893,0,3,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m
268,4,ChemLlama_Small_30m,3,lipo,regression,0.746799,0.582347,1.576705,-40.0,5,0.544946,0,0,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m


In [11]:
# df_reg_best.sort_values(by=['ft_version', 'model_mtr_name']).to_csv('./df_reg_best.csv', index=False)

In [12]:
df_reg_best.sort_values(by=['dataset_name', 'metric_1'], ascending=True)

Unnamed: 0,ft_version,model_mtr_name,model_mtr_ep,dataset_name,dataset_type,metric_1,metric_2,p_value_mantissa,p_value_exponent,epoch,loss,loss_ranking,metric_1_ranking,model_type,model_size,data_size,mts,mtds,msds
246,4,ChemBerta_Small_20m,0,bace_regression,regression,0.714798,0.577076,2.897342,-29.0,4,0.363430,0,0,ChemBerta,Small,20m,ChemBerta_Small,ChemBerta_20m,Small_20m
64,1,ChemBart_Small_10m,1,bace_regression,regression,0.754425,0.623590,8.168752,-28.0,3,0.410206,0,0,ChemBart,Small,10m,ChemBart_Small,ChemBart_10m,Small_10m
147,2,ChemLlama_Medium_20m,1,bace_regression,regression,0.764033,0.637440,4.116205,-26.0,1,0.384491,0,0,ChemLlama,Medium,20m,ChemLlama_Medium,ChemLlama_20m,Medium_20m
114,2,ChemBart_Medium_30m,1,bace_regression,regression,0.765883,0.611007,1.272846,-28.0,3,0.389012,0,0,ChemBart,Medium,30m,ChemBart_Medium,ChemBart_30m,Medium_30m
126,2,ChemBerta_Medium_10m,0,bace_regression,regression,0.784334,0.626046,6.174128,-28.0,1,0.371112,0,0,ChemBerta,Medium,10m,ChemBerta_Medium,ChemBerta_10m,Medium_10m
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,2,ChemLlama_Small_10m,4,lipo,regression,0.764387,0.597764,2.794790,-38.0,6,0.518847,0,1,ChemLlama,Small,10m,ChemLlama_Small,ChemLlama_10m,Small_10m
158,2,ChemLlama_Small_20m,2,lipo,regression,0.766594,0.610190,5.638198,-34.0,5,0.537129,0,0,ChemLlama,Small,20m,ChemLlama_Small,ChemLlama_20m,Small_20m
101,1,ChemLlama_Small_10m,4,lipo,regression,0.769646,0.598591,7.212917,-40.0,6,0.546262,0,2,ChemLlama,Small,10m,ChemLlama_Small,ChemLlama_10m,Small_10m
57,1,ChemBart_Medium_20m,2,lipo,regression,0.770784,0.611809,2.087991,-36.0,6,0.530276,0,1,ChemBart,Medium,20m,ChemBart_Medium,ChemBart_20m,Medium_20m


# Best Reg only by Model Type

In [13]:
utils.calculate_SEM(df_reg_best.groupby(by=['model_type', 'dataset_name'])['metric_1'])

Unnamed: 0,model_type,dataset_name,metric_1,standard_error
0,ChemBart,bace_regression,0.895847,0.029971
1,ChemBart,delaney,0.478322,0.012365
2,ChemBart,lipo,0.737041,0.004775
3,ChemBerta,bace_regression,0.922637,0.03772
4,ChemBerta,delaney,0.488503,0.018287
5,ChemBerta,lipo,0.735585,0.006088
6,ChemLlama,bace_regression,0.918288,0.029837
7,ChemLlama,delaney,0.491942,0.031576
8,ChemLlama,lipo,0.741595,0.005231


In [14]:
utils.calculate_SEM(df_reg_best.groupby(by=['model_mtr_name', 'dataset_name'])['metric_1'])

Unnamed: 0,model_mtr_name,dataset_name,metric_1,standard_error
0,ChemBart_Medium_10m,bace_regression,0.875031,0.008909
1,ChemBart_Medium_10m,delaney,0.471799,0.004381
2,ChemBart_Medium_10m,lipo,0.73069,0.002637
3,ChemBart_Medium_20m,bace_regression,0.85417,0.007054
4,ChemBart_Medium_20m,delaney,0.508952,0.008036
5,ChemBart_Medium_20m,lipo,0.736278,0.002765
6,ChemBart_Medium_30m,bace_regression,0.874283,0.013085
7,ChemBart_Medium_30m,delaney,0.474759,0.004323
8,ChemBart_Medium_30m,lipo,0.730323,0.002143
9,ChemBart_Small_10m,bace_regression,0.879805,0.014006


In [15]:
utils.calculate_SEM(df_reg_best.groupby(by=['mts', 'dataset_name'])['metric_1'])

Unnamed: 0,mts,dataset_name,metric_1,standard_error
0,ChemBart_Medium,bace_regression,0.867828,0.016217
1,ChemBart_Medium,delaney,0.48517,0.010234
2,ChemBart_Medium,lipo,0.73243,0.00411
3,ChemBart_Small,bace_regression,0.923866,0.023956
4,ChemBart_Small,delaney,0.471475,0.00693
5,ChemBart_Small,lipo,0.741651,0.002046
6,ChemBerta_Medium,bace_regression,0.929328,0.030752
7,ChemBerta_Medium,delaney,0.495274,0.013359
8,ChemBerta_Medium,lipo,0.731018,0.002995
9,ChemBerta_Small,bace_regression,0.915947,0.022859


In [16]:
utils.calculate_SEM(df_reg_best.groupby(by=['mtds', 'dataset_name'])['metric_1'])

Unnamed: 0,mtds,dataset_name,metric_1,standard_error
0,ChemBart_10m,bace_regression,0.877418,0.015657
1,ChemBart_10m,delaney,0.459106,0.005655
2,ChemBart_10m,lipo,0.735726,0.002937
3,ChemBart_20m,bace_regression,0.926905,0.020937
4,ChemBart_20m,delaney,0.495727,0.008492
5,ChemBart_20m,lipo,0.737878,0.003034
6,ChemBart_30m,bace_regression,0.883218,0.014868
7,ChemBart_30m,delaney,0.480133,0.005685
8,ChemBart_30m,lipo,0.737518,0.002559
9,ChemBerta_10m,bace_regression,0.902763,0.016915


In [17]:
utils.calculate_SEM(df_reg_best.groupby(by=['msds', 'dataset_name'])['metric_1'])

Unnamed: 0,msds,dataset_name,metric_1,standard_error
0,Medium_10m,bace_regression,0.879725,0.013959
1,Medium_10m,delaney,0.472565,0.010994
2,Medium_10m,lipo,0.731007,0.003424
3,Medium_20m,bace_regression,0.906984,0.032379
4,Medium_20m,delaney,0.48981,0.013153
5,Medium_20m,lipo,0.737211,0.00353
6,Medium_30m,bace_regression,0.904375,0.017003
7,Medium_30m,delaney,0.470251,0.007177
8,Medium_30m,lipo,0.730185,0.002829
9,Small_10m,bace_regression,0.922224,0.028567


In [18]:
df_reg_best

Unnamed: 0,ft_version,model_mtr_name,model_mtr_ep,dataset_name,dataset_type,metric_1,metric_2,p_value_mantissa,p_value_exponent,epoch,loss,loss_ranking,metric_1_ranking,model_type,model_size,data_size,mts,mtds,msds
0,0,ChemBart_Medium_10m,4,delaney,regression,0.456320,0.357732,7.292727,-37.0,3,0.371624,0,3,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
1,0,ChemBart_Medium_10m,4,lipo,regression,0.721048,0.566975,2.102943,-43.0,6,0.534508,0,1,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
2,0,ChemBart_Medium_10m,6,bace_regression,regression,0.858430,0.718267,1.841017,-23.0,0,0.373197,0,0,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
3,0,ChemBart_Medium_20m,0,delaney,regression,0.473180,0.379224,7.223428,-40.0,6,0.383850,0,0,ChemBart,Medium,20m,ChemBart_Medium,ChemBart_20m,Medium_20m
4,0,ChemBart_Medium_20m,2,bace_regression,regression,0.815301,0.664588,3.910137,-24.0,3,0.374928,0,0,ChemBart,Medium,20m,ChemBart_Medium,ChemBart_20m,Medium_20m
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,4,ChemLlama_Small_20m,1,bace_regression,regression,0.913419,0.755842,3.620652,-25.0,0,0.388239,0,1,ChemLlama,Small,20m,ChemLlama_Small,ChemLlama_20m,Small_20m
266,4,ChemLlama_Small_20m,6,delaney,regression,0.468015,0.381498,1.597436,-39.0,6,0.392031,0,1,ChemLlama,Small,20m,ChemLlama_Small,ChemLlama_20m,Small_20m
267,4,ChemLlama_Small_30m,1,bace_regression,regression,0.887489,0.721201,1.583451,-25.0,0,0.390893,0,3,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m
268,4,ChemLlama_Small_30m,3,lipo,regression,0.746799,0.582347,1.576705,-40.0,5,0.544946,0,0,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m


## Best Reg Avg By Epoch

In [19]:
# df_reg_best_ep
df_reg_best_ep = df_reg.groupby(by=['ft_version', 'model_mtr_name', 'model_mtr_ep','dataset_name'])['metric_1'].min().reset_index()
df_reg_best_ep = pd.merge(df_reg, df_reg_best_ep, on=['ft_version', 'model_mtr_name', 'model_mtr_ep', 'dataset_name', 'metric_1'])
df_reg_best_ep

Unnamed: 0,ft_version,model_mtr_name,model_mtr_ep,dataset_name,dataset_type,metric_1,metric_2,p_value_mantissa,p_value_exponent,epoch,loss,loss_ranking,metric_1_ranking,model_type,model_size,data_size,mts,mtds,msds
0,0,ChemBart_Medium_10m,0,bace_regression,regression,0.878964,0.731820,2.095776,-28.0,0,0.375684,0,0,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
1,0,ChemBart_Medium_10m,0,delaney,regression,0.473740,0.385443,1.035222,-37.0,6,0.404661,0,0,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
2,0,ChemBart_Medium_10m,0,lipo,regression,0.748903,0.579198,3.535616,-41.0,5,0.582682,0,0,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
3,0,ChemBart_Medium_10m,1,bace_regression,regression,0.851791,0.687378,1.711962,-24.0,3,0.443988,1,0,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
4,0,ChemBart_Medium_10m,1,delaney,regression,0.429409,0.357494,1.063178,-39.0,5,0.443409,2,0,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1885,4,ChemLlama_Small_30m,5,delaney,regression,0.435942,0.356790,1.598363,-38.0,4,0.417539,2,0,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m
1886,4,ChemLlama_Small_30m,5,lipo,regression,0.724260,0.564762,1.706134,-43.0,5,0.556957,1,0,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m
1887,4,ChemLlama_Small_30m,6,bace_regression,regression,0.829506,0.667297,3.489122,-26.0,0,0.420002,0,0,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m
1888,4,ChemLlama_Small_30m,6,delaney,regression,0.476766,0.395950,4.205481,-34.0,2,0.376626,0,0,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m


In [20]:
df_reg_best_ep_mean = df_reg_best_ep.groupby(by=['model_mtr_name', 'dataset_name', 'model_mtr_ep'])['metric_1'].mean()
df_reg_best_ep_mean = pd.merge(
    df_reg_best_ep_mean,
    df_reg_best_ep[["model_mtr_name", "model_mtr_ep", "dataset_name", "metric_1", "model_type", "model_size", "data_size", "mts", "mtds", "msds"]],
    on=['model_mtr_name', 'dataset_name', 'model_mtr_ep']
)
df_reg_best_ep_mean = df_reg_best_ep_mean.rename(columns={
    'metric_1_x': 'metric_1_mean',
    'metric_1_y': 'metric_1'
})

df_reg_best_ep_mean = df_reg_best_ep_mean.drop_duplicates(subset=['model_mtr_name', 'dataset_name', 'model_mtr_ep'], keep='first')
df_reg_best_ep_mean.drop(columns=['metric_1'], inplace=True)
df_reg_best_ep_mean

Unnamed: 0,model_mtr_name,dataset_name,model_mtr_ep,metric_1_mean,model_type,model_size,data_size,mts,mtds,msds
0,ChemBart_Medium_10m,bace_regression,0,0.852204,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
5,ChemBart_Medium_10m,bace_regression,1,0.858871,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
10,ChemBart_Medium_10m,bace_regression,2,0.813708,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
15,ChemBart_Medium_10m,bace_regression,3,0.875578,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
20,ChemBart_Medium_10m,bace_regression,4,0.864862,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
...,...,...,...,...,...,...,...,...,...,...
1865,ChemLlama_Small_30m,lipo,2,0.737494,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m
1870,ChemLlama_Small_30m,lipo,3,0.743630,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m
1875,ChemLlama_Small_30m,lipo,4,0.733452,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m
1880,ChemLlama_Small_30m,lipo,5,0.742287,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m


# Classification DF

In [21]:
df_cls = df_all[df_all['dataset_type'] == 'classification']
df_cls

Unnamed: 0,ft_version,model_mtr_name,model_mtr_ep,dataset_name,dataset_type,metric_1,metric_2,p_value_mantissa,p_value_exponent,epoch,loss,loss_ranking,metric_1_ranking,model_type,model_size,data_size,mts,mtds,msds
0,0,ChemBart_Medium_10m,0,bace_classification,classification,0.648188,,0.001820,0.0,0,0.993232,6,6,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
1,0,ChemBart_Medium_10m,0,bace_classification,classification,0.760688,,1.253343,-8.0,1,0.717989,5,5,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
2,0,ChemBart_Medium_10m,0,bace_classification,classification,0.805435,,8.985626,-12.0,2,0.650228,0,3,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
3,0,ChemBart_Medium_10m,0,bace_classification,classification,0.800000,,2.386729,-11.0,3,0.651059,1,4,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
4,0,ChemBart_Medium_10m,0,bace_classification,classification,0.808333,,5.272629,-12.0,4,0.660707,2,1,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26455,4,ChemLlama_Small_30m,6,tox21_sr_p53,classification,0.773162,,4.677256,-10.0,2,0.181836,5,3,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m
26456,4,ChemLlama_Small_30m,6,tox21_sr_p53,classification,0.770786,,6.656599,-10.0,3,0.169517,4,4,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m
26457,4,ChemLlama_Small_30m,6,tox21_sr_p53,classification,0.808014,,1.806867,-12.0,4,0.155549,0,2,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m
26458,4,ChemLlama_Small_30m,6,tox21_sr_p53,classification,0.820493,,2.071732,-13.0,5,0.160060,1,1,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m


### Best Classification

In [22]:
df_cls_best = df_cls.groupby(by=['ft_version', 'model_mtr_name', 'dataset_name'])['loss'].min().reset_index()
df_cls_best = pd.merge(df_cls, df_cls_best, on=['ft_version', 'model_mtr_name', 'dataset_name', 'loss'])
df_cls_best

Unnamed: 0,ft_version,model_mtr_name,model_mtr_ep,dataset_name,dataset_type,metric_1,metric_2,p_value_mantissa,p_value_exponent,epoch,loss,loss_ranking,metric_1_ranking,model_type,model_size,data_size,mts,mtds,msds
0,0,ChemBart_Medium_10m,0,bace_classification,classification,0.805435,,8.985626,-12.0,2,0.650228,0,3,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
1,0,ChemBart_Medium_10m,2,tox21_sr_p53,classification,0.757736,,4.367270,-9.0,4,0.160418,0,4,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
2,0,ChemBart_Medium_10m,5,hiv,classification,0.772591,,1.482182,-26.0,6,0.077621,0,1,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
3,0,ChemBart_Medium_20m,1,bace_classification,classification,0.818297,,7.892324,-13.0,3,0.666523,0,1,ChemBart,Medium,20m,ChemBart_Medium,ChemBart_20m,Medium_20m
4,0,ChemBart_Medium_20m,5,hiv,classification,0.777692,,1.645820,-27.0,6,0.077180,0,1,ChemBart,Medium,20m,ChemBart_Medium,ChemBart_20m,Medium_20m
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,4,ChemLlama_Small_20m,6,hiv,classification,0.798432,,1.394515,-31.0,6,0.078940,0,0,ChemLlama,Small,20m,ChemLlama_Small,ChemLlama_20m,Small_20m
266,4,ChemLlama_Small_20m,6,tox21_sr_p53,classification,0.808766,,1.590101,-12.0,4,0.156561,0,0,ChemLlama,Small,20m,ChemLlama_Small,ChemLlama_20m,Small_20m
267,4,ChemLlama_Small_30m,0,hiv,classification,0.789028,,1.068286,-29.0,6,0.077833,0,0,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m
268,4,ChemLlama_Small_30m,3,bace_classification,classification,0.804891,,9.920823,-12.0,2,0.679348,0,1,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m


In [23]:
# df_cls_best.sort_values(by=['ft_version', 'model_mtr_name']).to_csv('./df_cls_best.csv', index=False)

# Best CLS only by Model Type

In [24]:
utils.calculate_SEM(df_cls_best.groupby(by=['model_type', 'dataset_name'])['metric_1'])

Unnamed: 0,model_type,dataset_name,metric_1,standard_error
0,ChemBart,bace_classification,0.794559,0.008263
1,ChemBart,hiv,0.76109,0.00446
2,ChemBart,tox21_sr_p53,0.788581,0.007211
3,ChemBerta,bace_classification,0.802156,0.003802
4,ChemBerta,hiv,0.758133,0.007284
5,ChemBerta,tox21_sr_p53,0.788588,0.006989
6,ChemLlama,bace_classification,0.792156,0.005415
7,ChemLlama,hiv,0.769427,0.008741
8,ChemLlama,tox21_sr_p53,0.789914,0.00725


In [25]:
utils.calculate_SEM(df_cls_best.groupby(by=['model_mtr_name', 'dataset_name'])['metric_1'])

Unnamed: 0,model_mtr_name,dataset_name,metric_1,standard_error
0,ChemBart_Medium_10m,bace_classification,0.808514,0.001443
1,ChemBart_Medium_10m,hiv,0.752857,0.001727
2,ChemBart_Medium_10m,tox21_sr_p53,0.776235,0.001987
3,ChemBart_Medium_20m,bace_classification,0.793877,0.002222
4,ChemBart_Medium_20m,hiv,0.77159,0.001927
5,ChemBart_Medium_20m,tox21_sr_p53,0.786303,0.001284
6,ChemBart_Medium_30m,bace_classification,0.801957,0.000936
7,ChemBart_Medium_30m,hiv,0.760252,0.001048
8,ChemBart_Medium_30m,tox21_sr_p53,0.784487,0.003456
9,ChemBart_Small_10m,bace_classification,0.798659,0.003296


In [26]:
utils.calculate_SEM(df_cls_best.groupby(by=['mts', 'dataset_name'])['metric_1'])

Unnamed: 0,mts,dataset_name,metric_1,standard_error
0,ChemBart_Medium,bace_classification,0.801449,0.002984
1,ChemBart_Medium,hiv,0.761566,0.003196
2,ChemBart_Medium,tox21_sr_p53,0.782342,0.004022
3,ChemBart_Small,bace_classification,0.787669,0.007495
4,ChemBart_Small,hiv,0.760613,0.003218
5,ChemBart_Small,tox21_sr_p53,0.794821,0.005748
6,ChemBerta_Medium,bace_classification,0.802053,0.002103
7,ChemBerta_Medium,hiv,0.759565,0.005093
8,ChemBerta_Medium,tox21_sr_p53,0.78774,0.005578
9,ChemBerta_Small,bace_classification,0.802258,0.003248


In [27]:
utils.calculate_SEM(df_cls_best.groupby(by=['mtds', 'dataset_name'])['metric_1'])

Unnamed: 0,mtds,dataset_name,metric_1,standard_error
0,ChemBart_10m,bace_classification,0.803587,0.003536
1,ChemBart_10m,hiv,0.754529,0.003013
2,ChemBart_10m,tox21_sr_p53,0.785819,0.00352
3,ChemBart_20m,bace_classification,0.794094,0.004766
4,ChemBart_20m,hiv,0.764901,0.002499
5,ChemBart_20m,tox21_sr_p53,0.787563,0.00519
6,ChemBart_30m,bace_classification,0.785996,0.005634
7,ChemBart_30m,hiv,0.763838,0.001833
8,ChemBart_30m,tox21_sr_p53,0.792362,0.003947
9,ChemBerta_10m,bace_classification,0.799384,0.001983


In [28]:
utils.calculate_SEM(df_cls_best.groupby(by=['msds', 'dataset_name'])['metric_1'])

Unnamed: 0,msds,dataset_name,metric_1,standard_error
0,Medium_10m,bace_classification,0.798478,0.00401
1,Medium_10m,hiv,0.759402,0.004495
2,Medium_10m,tox21_sr_p53,0.78279,0.004555
3,Medium_20m,bace_classification,0.801208,0.003581
4,Medium_20m,hiv,0.759618,0.00462
5,Medium_20m,tox21_sr_p53,0.786207,0.004964
6,Medium_30m,bace_classification,0.799324,0.002281
7,Medium_30m,hiv,0.766282,0.004334
8,Medium_30m,tox21_sr_p53,0.79464,0.004559
9,Small_10m,bace_classification,0.795471,0.003714


## Best Class Avg By Epoch

In [29]:
# df_cls_best_ep
df_cls_best_ep = df_cls.groupby(by=['ft_version', 'model_mtr_name', 'model_mtr_ep','dataset_name'])['metric_1'].max().reset_index()
df_cls_best_ep = pd.merge(df_cls, df_cls_best_ep, on=['ft_version', 'model_mtr_name', 'model_mtr_ep', 'dataset_name', 'metric_1'])
df_cls_best_ep

Unnamed: 0,ft_version,model_mtr_name,model_mtr_ep,dataset_name,dataset_type,metric_1,metric_2,p_value_mantissa,p_value_exponent,epoch,loss,loss_ranking,metric_1_ranking,model_type,model_size,data_size,mts,mtds,msds
0,0,ChemBart_Medium_10m,0,bace_classification,classification,0.811413,,2.964394,-12.0,5,0.681843,4,0,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
1,0,ChemBart_Medium_10m,0,hiv,classification,0.779646,,7.010327,-28.0,4,0.081900,2,0,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
2,0,ChemBart_Medium_10m,0,tox21_sr_p53,classification,0.810510,,1.180567,-12.0,5,0.179241,3,0,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
3,0,ChemBart_Medium_10m,1,bace_classification,classification,0.811051,,3.173805,-12.0,3,0.685923,0,0,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
4,0,ChemBart_Medium_10m,1,hiv,classification,0.777790,,1.576771,-27.0,2,0.082176,0,0,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1900,4,ChemLlama_Small_30m,5,hiv,classification,0.782080,,2.401455,-28.0,5,0.084080,0,0,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m
1901,4,ChemLlama_Small_30m,5,tox21_sr_p53,classification,0.825786,,8.034399,-14.0,6,0.162883,0,0,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m
1902,4,ChemLlama_Small_30m,6,bace_classification,classification,0.816848,,1.047220,-12.0,1,0.736842,1,0,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m
1903,4,ChemLlama_Small_30m,6,hiv,classification,0.788864,,1.150852,-29.0,5,0.087338,3,0,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m


In [30]:
df_cls_best_ep_mean = df_cls_best_ep.groupby(by=['model_mtr_name', 'dataset_name', 'model_mtr_ep'])['metric_1'].mean()
df_cls_best_ep_mean = pd.merge(
    df_cls_best_ep_mean,
    df_cls_best_ep[["model_mtr_name", "model_mtr_ep", "dataset_name", "metric_1", "model_type", "model_size", "data_size", "mts", "mtds", "msds"]],
    on=['model_mtr_name', 'dataset_name', 'model_mtr_ep']
)
df_cls_best_ep_mean = df_cls_best_ep_mean.rename(columns={
    'metric_1_x': 'metric_1_mean',
    'metric_1_y': 'metric_1'
})

df_cls_best_ep_mean = df_cls_best_ep_mean.drop_duplicates(subset=['model_mtr_name', 'dataset_name', 'model_mtr_ep'], keep='first')
df_cls_best_ep_mean.drop(columns=['metric_1'], inplace=True)
df_cls_best_ep_mean

Unnamed: 0,model_mtr_name,dataset_name,model_mtr_ep,metric_1_mean,model_type,model_size,data_size,mts,mtds,msds
0,ChemBart_Medium_10m,bace_classification,0,0.816775,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
5,ChemBart_Medium_10m,bace_classification,1,0.804197,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
11,ChemBart_Medium_10m,bace_classification,2,0.810688,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
16,ChemBart_Medium_10m,bace_classification,3,0.811449,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
21,ChemBart_Medium_10m,bace_classification,4,0.814384,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
...,...,...,...,...,...,...,...,...,...,...
1880,ChemLlama_Small_30m,tox21_sr_p53,2,0.807208,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m
1885,ChemLlama_Small_30m,tox21_sr_p53,3,0.800460,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m
1890,ChemLlama_Small_30m,tox21_sr_p53,4,0.805416,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m
1895,ChemLlama_Small_30m,tox21_sr_p53,5,0.806306,ChemLlama,Small,30m,ChemLlama_Small,ChemLlama_30m,Small_30m


# TES and STD

In [31]:
def TES_STD_calculator_low_level(well_ordered_df, is_reg, by:str):
    list_ds_name = sorted(well_ordered_df['dataset_name'].unique())

    ser_groupby = well_ordered_df.groupby(by=[by]+['dataset_name'])['metric_1']
    avg_of_best = ser_groupby.mean()
    df_avg_of_best = avg_of_best.reset_index()

    list_best_metric = list()
    for ds in list_ds_name:
        if is_reg:
            local_best = df_avg_of_best[df_avg_of_best['dataset_name'] == ds]['metric_1'].min()
        else:
            local_best = df_avg_of_best[df_avg_of_best['dataset_name'] == ds]['metric_1'].max()
        list_best_metric.append(local_best)
    # now it is an array
    list_best_metric = np.array(list_best_metric)

    list_error = list()
    list_mt = sorted(well_ordered_df[by].unique())
    for mt in list_mt:
        list_error_ds = list()
        for idx, ds in enumerate(list_ds_name):
            local_df = well_ordered_df[well_ordered_df[by] == mt]
            arr_local_metric = local_df[local_df['dataset_name'] == ds]['metric_1']
            # local_ae = np.abs(arr_local_metric - list_best_metric[idx])
            if is_reg:
                local_ae = arr_local_metric - list_best_metric[idx]
            else:
                local_ae = list_best_metric[idx] - arr_local_metric
            llist_error_ds.append(local_ae)
        list_error.append(list_error_ds)

    return np.array(list_error)
    
    

# Std from Low Level 

## overall TES and STD

## Model Type

In [32]:
utils.calculate_SEM(df_reg_best.groupby(by=['model_type', 'dataset_name'])['metric_1']).sort_values(by=['dataset_name', 'metric_1'])

Unnamed: 0,model_type,dataset_name,metric_1,standard_error
0,ChemBart,bace_regression,0.895847,0.029971
6,ChemLlama,bace_regression,0.918288,0.029837
3,ChemBerta,bace_regression,0.922637,0.03772
1,ChemBart,delaney,0.478322,0.012365
4,ChemBerta,delaney,0.488503,0.018287
7,ChemLlama,delaney,0.491942,0.031576
5,ChemBerta,lipo,0.735585,0.006088
2,ChemBart,lipo,0.737041,0.004775
8,ChemLlama,lipo,0.741595,0.005231


In [33]:
utils.calculate_SEM(df_cls_best.groupby(by=['model_type', 'dataset_name'])['metric_1']).sort_values(by=['dataset_name', 'metric_1'], ascending=[True, False])

Unnamed: 0,model_type,dataset_name,metric_1,standard_error
3,ChemBerta,bace_classification,0.802156,0.003802
0,ChemBart,bace_classification,0.794559,0.008263
6,ChemLlama,bace_classification,0.792156,0.005415
7,ChemLlama,hiv,0.769427,0.008741
1,ChemBart,hiv,0.76109,0.00446
4,ChemBerta,hiv,0.758133,0.007284
8,ChemLlama,tox21_sr_p53,0.789914,0.00725
5,ChemBerta,tox21_sr_p53,0.788588,0.006989
2,ChemBart,tox21_sr_p53,0.788581,0.007211


In [34]:
df_reg_best[df_reg_best['model_type'] == 'ChemBart']

Unnamed: 0,ft_version,model_mtr_name,model_mtr_ep,dataset_name,dataset_type,metric_1,metric_2,p_value_mantissa,p_value_exponent,epoch,loss,loss_ranking,metric_1_ranking,model_type,model_size,data_size,mts,mtds,msds
0,0,ChemBart_Medium_10m,4,delaney,regression,0.456320,0.357732,7.292727,-37.0,3,0.371624,0,3,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
1,0,ChemBart_Medium_10m,4,lipo,regression,0.721048,0.566975,2.102943,-43.0,6,0.534508,0,1,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
2,0,ChemBart_Medium_10m,6,bace_regression,regression,0.858430,0.718267,1.841017,-23.0,0,0.373197,0,0,ChemBart,Medium,10m,ChemBart_Medium,ChemBart_10m,Medium_10m
3,0,ChemBart_Medium_20m,0,delaney,regression,0.473180,0.379224,7.223428,-40.0,6,0.383850,0,0,ChemBart,Medium,20m,ChemBart_Medium,ChemBart_20m,Medium_20m
4,0,ChemBart_Medium_20m,2,bace_regression,regression,0.815301,0.664588,3.910137,-24.0,3,0.374928,0,0,ChemBart,Medium,20m,ChemBart_Medium,ChemBart_20m,Medium_20m
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,4,ChemBart_Small_20m,5,lipo,regression,0.730026,0.568839,1.231096,-43.0,5,0.541574,0,1,ChemBart,Small,20m,ChemBart_Small,ChemBart_20m,Small_20m
230,4,ChemBart_Small_20m,6,delaney,regression,0.465059,0.377089,5.311194,-37.0,6,0.389186,0,2,ChemBart,Small,20m,ChemBart_Small,ChemBart_20m,Small_20m
231,4,ChemBart_Small_30m,1,lipo,regression,0.747914,0.586111,5.925237,-41.0,6,0.546155,0,0,ChemBart,Small,30m,ChemBart_Small,ChemBart_30m,Small_30m
232,4,ChemBart_Small_30m,4,bace_regression,regression,0.858674,0.695968,1.096690,-27.0,1,0.386848,0,4,ChemBart,Small,30m,ChemBart_Small,ChemBart_30m,Small_30m


In [35]:
by = 'model_type'
arr_tes_reg_overall = TES_STD_calculator_low_level(well_ordered_df=df_reg_best, is_reg=True, by=by)
arr_tes_cls_overall = TES_STD_calculator_low_level(well_ordered_df=df_cls_best, is_reg=False, by=by)

arr_tes_overall = np.hstack((arr_tes_reg_overall, arr_tes_cls_overall))
tes_lv1 = np.mean(arr_tes_overall, axis=2)
border = arr_tes_overall.shape[1]//2

for idx, mt in enumerate(df_reg[by].unique()):
    print(f"------ {mt} -----")
    # tes
    print(np.sum(tes_lv1[idx]))
    # tes std
    print(np.std(arr_tes_overall[idx]))
    
print("@@@@@ LOW @@@@@@")
print("---- Regression ----")
for idx, mt in enumerate(df_reg[by].unique()):
    print(f"------ {mt} -----")
    # tes
    print(f"{np.sum(tes_lv1[idx][:border]):.6f}")
    # tes std
    print(f"{np.std(arr_tes_overall[idx][:border]):.6f}")

print()
print("---- Classification----")
for idx, mt in enumerate(df_reg[by].unique()):
    print(f"------ {mt} -----")
    # tes
    print(f"{np.sum(tes_lv1[idx][border:]):.6f}")
    # tes std
    print(f"{np.std(arr_tes_overall[idx][border:]):.6f}")

NameError: name 'llist_error_ds' is not defined

# MTDS

In [None]:
utils.calculate_SEM(df_reg_best.groupby(by=['mtds', 'dataset_name'])['metric_1']).sort_values(by=['dataset_name', 'metric_1'])

In [None]:
utils.calculate_SEM(df_cls_best.groupby(by=['mtds', 'dataset_name'])['metric_1']).sort_values(by=['dataset_name', 'metric_1'], ascending=[True, False])

In [None]:
by = 'mtds'
arr_tes_reg = TES_STD_calculator_low_level(well_ordered_df=df_reg_best, is_reg=True, by=by)
arr_tes_cls = TES_STD_calculator_low_level(well_ordered_df=df_cls_best, is_reg=False, by=by)

arr_tes_overall = np.hstack((arr_tes_reg, arr_tes_cls))
tes_lv1 = np.mean(arr_tes_overall, axis=2)
border = arr_tes_overall.shape[1]//2

for idx, mt in enumerate(df_reg[by].unique()):
    print(f"------ {mt} -----")
    # tes
    print(np.sum(tes_lv1[idx]))
    # tes std
    print(np.std(arr_tes_overall[idx]))
    
print("@@@@@ LOW @@@@@@")
print("---- Regression ----")
for idx, mt in enumerate(df_reg[by].unique()):
    print(f"------ {mt} -----")
    # tes
    print(f"{np.sum(tes_lv1[idx][:border]):.6f}")
    # tes std
    print(f"{np.std(arr_tes_overall[idx][:border]):.6f}")

print()
print("---- Classification----")
for idx, mt in enumerate(df_reg[by].unique()):
    print(f"------ {mt} -----")
    # tes
    print(f"{np.sum(tes_lv1[idx][border:]):.6f}")
    # tes std
    print(f"{np.std(arr_tes_overall[idx][border:]):.6f}")

## MTS

In [None]:
by = 'mts'
arr_tes_reg = TES_STD_calculator_low_level(well_ordered_df=df_reg_best, is_reg=True, by=by)
arr_tes_cls = TES_STD_calculator_low_level(well_ordered_df=df_cls_best, is_reg=False, by=by)

arr_tes_overall = np.hstack((arr_tes_reg, arr_tes_cls))
tes_lv1 = np.mean(arr_tes_overall, axis=2)
border = arr_tes_overall.shape[1]//2

for idx, mt in enumerate(df_reg[by].unique()):
    print(f"------ {mt} -----")
    # tes
    print(np.sum(tes_lv1[idx]))
    # tes std
    print(np.std(arr_tes_overall[idx]))
    
print("@@@@@ LOW @@@@@@")
print("---- Regression ----")
for idx, mt in enumerate(df_reg[by].unique()):
    print(f"------ {mt} -----")
    # tes
    print(f"{np.sum(tes_lv1[idx][:border]):.6f}")
    # tes std
    print(f"{np.std(arr_tes_overall[idx][:border]):.6f}")

print()
print("---- Classification----")
for idx, mt in enumerate(df_reg[by].unique()):
    print(f"------ {mt} -----")
    # tes
    print(f"{np.sum(tes_lv1[idx][border:]):.6f}")
    # tes std
    print(f"{np.std(arr_tes_overall[idx][border:]):.6f}")

## MSDS

In [None]:
utils.calculate_SEM(df_reg_best.groupby(by=['msds', 'dataset_name'])['metric_1']).sort_values(by=['dataset_name', 'metric_1'])

In [None]:
utils.calculate_SEM(df_cls_best.groupby(by=['msds', 'dataset_name'])['metric_1']).sort_values(by=['dataset_name', 'metric_1'], ascending=[True, False])

In [None]:
by = 'msds'
arr_tes_reg = TES_STD_calculator_low_level(well_ordered_df=df_reg_best, is_reg=True, by=by)
arr_tes_cls = TES_STD_calculator_low_level(well_ordered_df=df_cls_best, is_reg=False, by=by)

arr_tes_overall = np.hstack((arr_tes_reg, arr_tes_cls))
tes_lv1 = np.mean(arr_tes_overall, axis=2)
border = arr_tes_overall.shape[1]//2

for idx, mt in enumerate(df_reg[by].unique()):
    print(f"------ {mt} -----")
    # tes
    print(np.sum(tes_lv1[idx]))
    # tes std
    print(np.std(arr_tes_overall[idx]))
    
print("@@@@@ LOW @@@@@@")
print("---- Regression ----")
for idx, mt in enumerate(df_reg[by].unique()):
    print(f"------ {mt} -----")
    # tes
    print(f"{np.sum(tes_lv1[idx][:border]):.6f}")
    # tes std
    print(f"{np.std(arr_tes_overall[idx][:border]):.6f}")

print()
print("---- Classification----")
for idx, mt in enumerate(df_reg[by].unique()):
    print(f"------ {mt} -----")
    # tes
    print(f"{np.sum(tes_lv1[idx][border:]):.6f}")
    # tes std
    print(f"{np.std(arr_tes_overall[idx][border:]):.6f}")