In [3]:
import pandas as pd
import numpy as np
import os
from os.path import join, exists
import json
import matplotlib.pyplot as plt
import seaborn as sns


from sklearn.metrics import roc_curve, auc, precision_recall_curve


from sklearn.metrics import (
    matthews_corrcoef,
    balanced_accuracy_score,
    f1_score,
    average_precision_score,
)
from sklearn.metrics import precision_score, recall_score

In [4]:
# template = "embdim512_DRIAMS-{}_{}_sp{}_results"
template = "rand_DRIAMS-B_{}_metrics"

datasets = ["A", "B", "C", "D"]
splits = ["random", "partitioned"]


In [5]:
metrics_df = []

for dset in datasets:
    for sp in splits:
        for i in range(10):
            try:
                with open(join("outputs/DrugMajorityClassifier_metrics", f"test_metrics_{sp}_{i}_{dset}.json"), "r") as f:
                    met = json.load(f)
                met["dataset"] = dset
                met["split"] = sp
                met["seed"]=i
                # met["species_embedding_dim"]=dim
                metrics_df.append(met)
            except:
                continue
    
metrics_df = pd.DataFrame(metrics_df)
metrics_df = metrics_df.drop(["seed"], axis=1)
metrics_df

Unnamed: 0,mcc,balanced_accuracy,f1,AUPRC,precision,recall,dataset,split
0,0.231590,0.549097,0.188759,-1,0.735099,0.108282,A,random
1,0.233713,0.549872,0.191289,-1,0.736245,0.109925,A,random
2,0.236636,0.549929,0.190788,-1,0.749384,0.109308,A,random
3,0.237528,0.550610,0.193243,-1,0.745774,0.111003,A,random
4,0.231067,0.548671,0.187197,-1,0.737549,0.107203,A,random
...,...,...,...,...,...,...,...,...
66,0.550699,0.730091,0.609832,-1,0.786369,0.498027,D,partitioned
67,0.556332,0.742043,0.623168,-1,0.756156,0.529962,D,partitioned
68,0.471498,0.683137,0.521893,-1,0.744603,0.401735,D,partitioned
69,0.535878,0.727702,0.602359,-1,0.757683,0.499883,D,partitioned


In [6]:
mean_df = metrics_df.groupby(["split", "dataset"]).mean()
mean_df.columns = [c+"_average" for c in mean_df.columns]
std_df = metrics_df.groupby(["split", "dataset"]).std()
std_df.columns = [c+"_std" for c in std_df.columns]
std_df
joined_df = pd.merge(mean_df, std_df, left_index=True, right_index=True)
joined_df

Unnamed: 0_level_0,Unnamed: 1_level_0,mcc_average,balanced_accuracy_average,f1_average,AUPRC_average,precision_average,recall_average,mcc_std,balanced_accuracy_std,f1_std,AUPRC_std,precision_std,recall_std
split,dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
partitioned,A,0.177667,0.554055,0.218681,-1.0,0.445127,0.145215,0.015694,0.003909,0.009276,0.0,0.033344,0.00722
partitioned,B,0.274666,0.60134,0.347122,-1.0,0.539185,0.257811,0.038299,0.021121,0.046182,0.0,0.023316,0.047978
partitioned,C,0.294378,0.620608,0.405037,-1.0,0.540642,0.323957,0.02667,0.010814,0.019457,0.0,0.030074,0.015968
partitioned,D,0.531761,0.728509,0.598737,-1.0,0.742047,0.504265,0.03003,0.020828,0.035076,0.0,0.033368,0.04793
random,A,0.232559,0.549385,0.189655,-1.0,0.736386,0.108846,0.003623,0.000846,0.002536,0.0,0.008979,0.001569
random,B,0.349077,0.644255,0.42308,-1.0,0.543644,0.346369,0.01117,0.004627,0.009324,0.0,0.014158,0.008701
random,C,0.382517,0.649683,0.466956,-1.0,0.674917,0.357011,0.010934,0.004722,0.009347,0.0,0.012139,0.00867
random,D,0.595989,0.758505,0.63703,-1.0,0.762909,0.546877,0.006682,0.004424,0.006742,0.0,0.006193,0.009251


In [7]:
metrics_order = ["mcc", "f1", "precision", "recall", "AUPRC", "balanced_accuracy"]
cols = []
for m in metrics_order:
    cols.append(f"{m}_average")
    cols.append(f"{m}_std")
    
joined_df = joined_df[cols]
joined_df

Unnamed: 0_level_0,Unnamed: 1_level_0,mcc_average,mcc_std,f1_average,f1_std,precision_average,precision_std,recall_average,recall_std,AUPRC_average,AUPRC_std,balanced_accuracy_average,balanced_accuracy_std
split,dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
partitioned,A,0.177667,0.015694,0.218681,0.009276,0.445127,0.033344,0.145215,0.00722,-1.0,0.0,0.554055,0.003909
partitioned,B,0.274666,0.038299,0.347122,0.046182,0.539185,0.023316,0.257811,0.047978,-1.0,0.0,0.60134,0.021121
partitioned,C,0.294378,0.02667,0.405037,0.019457,0.540642,0.030074,0.323957,0.015968,-1.0,0.0,0.620608,0.010814
partitioned,D,0.531761,0.03003,0.598737,0.035076,0.742047,0.033368,0.504265,0.04793,-1.0,0.0,0.728509,0.020828
random,A,0.232559,0.003623,0.189655,0.002536,0.736386,0.008979,0.108846,0.001569,-1.0,0.0,0.549385,0.000846
random,B,0.349077,0.01117,0.42308,0.009324,0.543644,0.014158,0.346369,0.008701,-1.0,0.0,0.644255,0.004627
random,C,0.382517,0.010934,0.466956,0.009347,0.674917,0.012139,0.357011,0.00867,-1.0,0.0,0.649683,0.004722
random,D,0.595989,0.006682,0.63703,0.006742,0.762909,0.006193,0.546877,0.009251,-1.0,0.0,0.758505,0.004424


In [10]:
metrics = ["AUPRC", "balanced_accuracy", "mcc"]

for i, row in joined_df.iterrows():
    
    print(i)
    for m in metrics:
        print("{:.2f} ({:.2f}) ".format(row[f"{m}_average"], row[f"{m}_std"]), end="")
        if m!="mcc":
            print(" & ", end="")
    print(" \\\ ")

('partitioned', 'A')
-1.00 (0.00)  & 0.55 (0.00)  & 0.18 (0.02)  \\ 
('partitioned', 'B')
-1.00 (0.00)  & 0.60 (0.02)  & 0.27 (0.04)  \\ 
('partitioned', 'C')
-1.00 (0.00)  & 0.62 (0.01)  & 0.29 (0.03)  \\ 
('partitioned', 'D')
-1.00 (0.00)  & 0.73 (0.02)  & 0.53 (0.03)  \\ 
('random', 'A')
-1.00 (0.00)  & 0.55 (0.00)  & 0.23 (0.00)  \\ 
('random', 'B')
-1.00 (0.00)  & 0.64 (0.00)  & 0.35 (0.01)  \\ 
('random', 'C')
-1.00 (0.00)  & 0.65 (0.00)  & 0.38 (0.01)  \\ 
('random', 'D')
-1.00 (0.00)  & 0.76 (0.00)  & 0.60 (0.01)  \\ 
