In [6]:
import pandas as pd
import os

from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import log_loss

import altair as alt

In [7]:
### making an assumption that "target actual is the binary value"

eval_results = {}

datasets = ["bace", "clintox", "deepchem_Lipophilicity", "HIV", "sol_del", "tox21"]

data_map={
    'HIV.csv': {'target':'HIV_active','structure':'smiles'},
    'bace.csv':{'target':'active','structure':'mol'},
    'tox21.csv':{'target':'NR-AhR','structure':'smiles'},
    'clintox.csv':{'target':'CT_TOX','structure':'smiles'},
    'sol_del.csv':{'target':'binned_sol','structure':'smiles'},
    'deepchem_Lipophilicity.csv':{'target':'drug_like','structure':'smiles'}   
}

pred_threshold = 0.5

for file in os.scandir(os.path.join("Complex_Models", "Predictions")):
    target = [data_map[dataset]['target'] for dataset in data_map if dataset[:-4] in file.name][0]
    dataset = [dataset for dataset in data_map if dataset[:-4] in file.name][0]
    df = pd.read_csv(file.path)
    y_actual = df["target_actual"].to_list()
    y_pred = df[target].apply(lambda x: 1 if x > pred_threshold else 0).to_list()

    eval_results[file.name] = {
        "dataset" : dataset,
        "Accuracy_Score": accuracy_score(y_actual, y_pred),
        "F1_Score": f1_score(y_actual, y_pred),
        "ROC_AUC_Score": roc_auc_score(y_actual, y_pred),
        "Log_Loss_Score": log_loss(y_actual, y_pred)
    }
    
df = pd.DataFrame(eval_results).T
df = df.reset_index()
df = df.rename(columns = {"index": "result_file"})
df['split_id'] = df['result_file'].apply(lambda x: "train" if "train" in x else "validate")
df['split_type'] = df['result_file'].apply(lambda x: "custer" if "cluster" in x else "random")
df  

Unnamed: 0,result_file,dataset,Accuracy_Score,F1_Score,ROC_AUC_Score,Log_Loss_Score,split_id,split_type
0,bace-cluster-train-pred.csv,bace.csv,0.931624,0.948417,0.919875,2.361658,train,custer
1,bace-cluster-validate-pred.csv,bace.csv,0.699115,0.802326,0.582665,10.392336,validate,custer
2,bace-random-train-pred.csv,bace.csv,0.987558,0.99073,0.981974,0.42973,train,random
3,bace-random-validate-pred.csv,bace.csv,0.845815,0.892966,0.791892,5.32545,validate,random
4,clintox-cluster-train-pred.csv,clintox.csv,0.980907,0.882353,0.924665,0.659457,train,custer
5,clintox-cluster-validate-pred.csv,clintox.csv,0.968326,0.0,0.5,1.093988,validate,custer
6,clintox-random-train-pred.csv,clintox.csv,0.984076,0.901961,0.938835,0.549986,train,random
7,clintox-random-validate-pred.csv,clintox.csv,0.932432,0.285714,0.664136,2.333737,validate,random
8,deepchem_Lipophilicity-cluster-train-pred.csv,deepchem_Lipophilicity.csv,0.996359,0.998117,0.954628,0.125774,train,custer
9,deepchem_Lipophilicity-cluster-validate-pred.csv,deepchem_Lipophilicity.csv,0.963492,0.981407,0.5,1.260969,validate,custer


In [8]:
pvt = pd.pivot(df, index = ["dataset", "split_type"], columns = ['split_id'], values = ["Accuracy_Score", "F1_Score", "ROC_AUC_Score", "Log_Loss_Score"])

pvt.columns.names = [None, None]

pvt

Unnamed: 0_level_0,Unnamed: 1_level_0,Accuracy_Score,Accuracy_Score,F1_Score,F1_Score,ROC_AUC_Score,ROC_AUC_Score,Log_Loss_Score,Log_Loss_Score
Unnamed: 0_level_1,Unnamed: 1_level_1,train,validate,train,validate,train,validate,train,validate
dataset,split_type,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
HIV.csv,custer,0.982865,0.967418,0.692978,0.377709,0.777729,0.629915,0.591818,1.125356
HIV.csv,random,0.995852,0.970827,0.938324,0.502762,0.954139,0.694375,0.143266,1.00762
bace.csv,custer,0.931624,0.699115,0.948417,0.802326,0.919875,0.582665,2.361658,10.392336
bace.csv,random,0.987558,0.845815,0.99073,0.892966,0.981974,0.791892,0.42973,5.32545
clintox.csv,custer,0.980907,0.968326,0.882353,0.0,0.924665,0.5,0.659457,1.093988
clintox.csv,random,0.984076,0.932432,0.901961,0.285714,0.938835,0.664136,0.549986,2.333737
deepchem_Lipophilicity.csv,custer,0.996359,0.963492,0.998117,0.981407,0.954628,0.5,0.125774,1.260969
deepchem_Lipophilicity.csv,random,0.998039,0.960317,0.998986,0.979691,0.971545,0.541343,0.067725,1.370612
sol_del.csv,custer,0.997914,0.846154,0.993151,0.59375,0.99596,0.720513,0.072032,5.313686
sol_del.csv,random,0.995825,0.876471,0.987013,0.588235,0.994869,0.719882,0.144215,4.266574


In [9]:
# need to come back and optimize the visualizations, but thinking something like multiple facets for each score type
# I also need to rotate text to avoid overlap

vis_df = df.melt(['result_file', 'dataset', 'split_id', 'split_type'], value_name = "score", var_name= "metric")

vis_df['dataset'] = vis_df['dataset'].str[:-4]
vis_df['dataset'] = vis_df['dataset'].str.replace('deepchem_', '')


vis_df.sample(5)


Unnamed: 0,result_file,dataset,split_id,split_type,metric,score
43,sol_del-random-validate-pred.csv,sol_del,validate,random,F1_Score,0.588235
57,deepchem_Lipophilicity-cluster-validate-pred.csv,Lipophilicity,validate,custer,ROC_AUC_Score,0.5
20,tox21-cluster-train-pred.csv,tox21,train,custer,Accuracy_Score,0.967307
0,bace-cluster-train-pred.csv,bace,train,custer,Accuracy_Score,0.931624
55,clintox-random-validate-pred.csv,clintox,validate,random,ROC_AUC_Score,0.664136


In [10]:
stack = None
chart = None

for i, metric in enumerate(vis_df['metric'].unique()):
    for j, split_type in enumerate(vis_df['split_type'].unique()):
        chart_title = None
        y_title = None
        x_labels = False

        if i == 0: 
            chart_title = "\n" + split_type + " split"

        if i ==0 and j == 0:
            chart_title = ["Summary of All Chemprop Model Results", "", chart_title]
        
        if i == len(vis_df['metric'].unique()) - 1:
            x_labels = True

        if j == 0: y_title = metric

        plot = alt.Chart(vis_df[(vis_df['metric'] == metric) & (vis_df['split_type'] == split_type)]
        ).mark_bar(opacity = 0.7).encode(
                y = alt.Y("score:Q", title = y_title),
                x = alt.X("split_id:N", title= None, axis = alt.Axis(labels = x_labels)),
                color = alt.Color("split_id:N", title = None),
                column =  alt.Row("dataset:N", title = chart_title, header=alt.Header(titleFontSize=14))
                ).properties(width = 30, height = 100)

    
        if not stack:
            stack = plot
        else:
            stack = alt.hconcat(
                stack,
                plot).resolve_scale(
                    y='shared')
            # stack = stack | plot
            # stack.resolve_scale(y = 'shared')

    if not chart:
            chart = stack
    else:
        chart = chart & stack
    stack = None

chart


In [12]:
### write out dataframe to csv for later comparison with simple model results
vis_df["Model"] = 'Chemprop'
vis_df.to_csv(os.path.join("Evaluation", "Chemprop_Results.csv"), index = False)