In [191]:
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style(style = "darkgrid")

import altair as alt

In [186]:
simple_df = pd.read_csv(r"C:\Users\phili\Python Projects\Chemistry-Capstone\evaluation\Scikit-learn_Evaluation_Results.csv")
complex_df = pd.read_csv(r"C:\Users\phili\Python Projects\Chemistry-Capstone\evaluation\Chemprop_Results.csv")


complex_df = complex_df.rename(columns = {"Model": "model"})

# ### split out time metric because this is not a float type
# time_df = df[df['metric'] == "fit_time"]
# df = df[df['metric'] != "fit_time"]
# df['score'] = df['score'].astype(float)

In [187]:
simple_df.sample(10)

Unnamed: 0,dataset,split_method,split_id,model,metric,score
67,bace,cluster,train,DummyClassifier,balanced_accuracy_score,0.5
100,clintox,cluster,validate,LogisticRegression,log_loss,0.1857443238515716
88,clintox,cluster,train,SVC,matthews_corrcoef,0.8824081428645786
208,lipophilicity,cluster,train,GradientBoostingClassifier,accuracy_score,0.9669467787114846
283,HIV,cluster,train,KNeighborsClassifier,matthews_corrcoef,0.5914685122835135
354,sol_del,cluster,validate,KNeighborsClassifier,balanced_accuracy_score,0.7051282051282052
159,lipophilicity,cluster,validate,SVC,balanced_accuracy_score,0.5528615428694219
296,HIV,cluster,train,GradientBoostingClassifier,matthews_corrcoef,0.1886254978512857
268,HIV,cluster,train,RandomForestClassifier,log_loss,0.0187977128087736
460,tox21,cluster,validate,DummyClassifier,f1_score,0.0


In [188]:
complex_df.sample(10)


Unnamed: 0,result_file,dataset,split_id,split_method,metric,score,model
25,tox21-random-validate-pred.csv,tox21,validate,random,accuracy_score,0.898271,Chemprop
90,deepchem_Lipophilicity-random-train-pred.csv,lipophilicity,train,random,roc_auc_score,0.971545,Chemprop
18,sol_del-cluster-train-pred.csv,sol_del,train,custer,accuracy_score,0.997914,Chemprop
122,sol_del-cluster-train-pred.csv,sol_del,train,custer,log_loss,0.072032,Chemprop
7,clintox-cluster-validate-pred.csv,clintox,validate,custer,accuracy_score,0.968326,Chemprop
98,sol_del-random-train-pred.csv,sol_del,train,random,roc_auc_score,0.994869,Chemprop
86,clintox-random-train-pred.csv,clintox,train,random,roc_auc_score,0.938835,Chemprop
89,deepchem_Lipophilicity-cluster-validate-pred.csv,lipophilicity,validate,custer,roc_auc_score,0.5,Chemprop
31,bace-random-validate-pred.csv,bace,validate,random,balanced_accuracy_score,0.791892,Chemprop
97,sol_del-cluster-validate-pred.csv,sol_del,validate,custer,roc_auc_score,0.720513,Chemprop


In [189]:
df = pd.concat([simple_df, complex_df])

### split out the time metric as this is not perfectly formatted
time_df = df[df["metric"] == "fit_time"]
df = df[df["metric"] != "fit_time"]
df['score'] = df['score'].astype(float)

df.sample(10)

Unnamed: 0,dataset,split_method,split_id,model,metric,score,result_file
235,HIV,cluster,validate,SVC,accuracy_score,0.96531,
101,tox21,custer,validate,Chemprop,roc_auc_score,0.648708,tox21-cluster-validate-pred.csv
348,sol_del,cluster,train,RandomForestClassifier,matthews_corrcoef,0.99598,
377,sol_del,cluster,train,DummyClassifier,accuracy_score,0.847758,
401,tox21,cluster,validate,SVC,matthews_corrcoef,0.312354,
96,sol_del,custer,train,Chemprop,roc_auc_score,0.99596,sol_del-cluster-train-pred.csv
453,tox21,cluster,validate,GradientBoostingClassifier,matthews_corrcoef,0.110581,
6,clintox,custer,train,Chemprop,accuracy_score,0.980907,clintox-cluster-train-pred.csv
312,sol_del,cluster,train,SVC,accuracy_score,0.996872,
53,bace,custer,train,Chemprop,f1_score,0.948417,bace-cluster-train-pred.csv


In [205]:
filtered_vis_data = df[(df['split_id'] == 'validate') & (df['split_method'] == 'cluster')]

score_title = "ROC-AUC Score"

title = f"Evaluation of Datasets and Models by {score_title}"
# title = f"Evaluation of Datasets and Models by Matthews Correlation Coefficient"

default_metric = {"metric": "roc_auc_score"}
right_model_default = {"model": "DummyClassifier"}
models = list(df['model'].unique())
metrics = list(df['metric'].unique())

metric_dropdown = alt.binding_select(options= [None] + metrics, name='Metric: ')
metric_selection = alt.selection_single(fields=['metric'], bind=metric_dropdown, init = default_metric)

left_model_dropdown = alt.binding_select(options= [None] + models, name='Left Chart - Model: ')
left_model_selection = alt.selection_single(fields=['model'], bind=left_model_dropdown)
right_model_dropdown = alt.binding_select(options= [None] + models, name='Right Chart - Model: ')
right_model_selection = alt.selection_single(fields=['model'], bind=right_model_dropdown, init = right_model_default)

left_color = alt.condition(left_model_selection, alt.Color('model:N'), alt.value('lightgray'))
left_opacity = alt.condition(left_model_selection, alt.value(1.0), alt.value(0.1))
right_color = alt.condition(right_model_selection, alt.Color('model:N'), alt.value('lightgray'))
right_opacity = alt.condition(right_model_selection, alt.value(1.0), alt.value(0.1))

resize = alt.selection_interval(bind='scales')


left_chart = alt.Chart(filtered_vis_data).mark_point(size = 50).encode(
    x = 'dataset:N',
    y = alt.Y('score:Q', title = score_title),
    color = left_color,
    shape = "model:N",
    opacity = left_opacity,
    tooltip = ["dataset", 'model', 'split_method', 'metric', 'score']
).add_selection(
    metric_selection, left_model_selection, resize
).transform_filter(
    (metric_selection)
).properties(height = 400, width = 550)



right_chart = alt.Chart(filtered_vis_data).mark_point(size = 50).encode(
    x = 'dataset:N',
    y = alt.Y('score:Q', title = score_title),
    color = right_color,
    shape = "model:N",
    opacity = right_opacity,
    tooltip = ["dataset", 'model', 'split_method', 'metric', 'score']
).add_selection(
    metric_selection, right_model_selection, resize
).transform_filter(
    (metric_selection)
).properties(height = 400, width = 550)


(left_chart | right_chart).properties(title= title).configure_title(fontSize=24)

In [193]:
# for metric in metrics:

#     left_chart = alt.Chart(filtered_vis_data).mark_point(size = 50).encode(
#     x = 'dataset:N',
#     y = 'score:Q',
#     color = left_color,
#     shape = "model:N",
#     opacity = left_opacity,
#     tooltip = ["dataset", 'model', 'split_method', 'metric', 'score']
# ).add_selection(
#     metric_selection, left_model_selection, resize
# ).transform_filter(
#     (metric_selection)
# ).properties(height = 400, width = 500)



# right_chart = alt.Chart(filtered_vis_data).mark_point(size = 50).encode(
#     x = 'dataset:N',
#     y = alt.Y('score:Q'),
#     color = "model:N",
#     shape = "model:N",
#     tooltip = ["dataset", 'model', 'split_method', 'metric', 'score']
# ).filter_transform(filtered_vis_df[filtered_vis_df] == "DummyClassifier").properties(height = 400, width = 500)


# chart = (left_chart | right_chart).properties(title= "Evaluate and Compare Datasets, Models, and Metrics\n").configure_title(fontSize=24)

# path = Path.cwd() / "Notebook_Visualizations" / (f"{metric}.png")
# chart.save(path)

In [157]:
# df.groupby(["split_method", "split_id", "model", "metric"]).agg({"score": ["mean", "std"]})

In [155]:
# metrics = df['metric'].unique()

# fig, axes = plt.subplots(len(metrics), figsize = (12,15))

# for i, metric in enumerate(metrics):
#     sns.boxplot(x="model", y="score", hue="split_id",
#         data=df[df['metric'] == metric], palette="Set3", ax= axes[i])
    
#     axes[i].set_ylabel(metric, fontsize = 14)
#     if i != len(metrics) -1:
#         axes[i].set_xlabel(None)
        
# plt.suptitle("Evaluation of Metric_Scores by Model\nfor all DataSets\n", fontsize = 24, fontweight = "bold")
# fig.tight_layout()

# fig.savefig("Boxplot_Model_Scores.png", facecolor = "w")



In [154]:
# fig, axes = plt.subplots(len(metrics), figsize = (12,15))

# for i, metric in enumerate(metrics):
#     sns.boxplot(x="dataset", y="score", hue="split_id",
#         data=df[df['metric'] == metric], palette="Set2", ax= axes[i])
    
#     axes[i].set_ylabel(metric, fontsize = 14)
#     if i != len(metrics) -1:
#         axes[i].set_xlabel(None)
        
# plt.suptitle("Evaluation of Metric_Scores by Dataset\nfor all Models\n", fontsize = 24, fontweight = "bold")
# fig.tight_layout()

# fig.savefig("Boxplot_Dataset_Scores.png", facecolor = "w")

In [9]:
df.sample(10)

Unnamed: 0,dataset,split_method,split_id,model,metric,score
137,clintox,random,validate,GradientBoostingClassifier,roc_auc_score,0.711741
36,bace,random,train,RandomForestClassifier,matthews_corrcoef,0.984288
327,sol_del,random,train,LogisticRegression,balanced_accuracy_score,0.965457
62,bace,random,train,GradientBoostingClassifier,matthews_corrcoef,0.582157
299,HIV,random,train,DummyClassifier,accuracy_score,0.9653
241,HIV,random,validate,SVC,roc_auc_score,0.774178
286,HIV,random,train,GradientBoostingClassifier,accuracy_score,0.966816
423,tox21,random,validate,RandomForestClassifier,roc_auc_score,0.89045
282,HIV,random,validate,KNeighborsClassifier,log_loss,0.526436
174,lipophilicity,random,validate,LogisticRegression,f1_score,0.982885


In [10]:
### load chemprop results and normalize to simple model results format
cp_df = pd.read_csv(r"C:\Users\phili\Python Projects\Chemistry-Capstone\evaluation\Chemprop_Results.csv")
cp_df.head()
cp_df.columns = [x.lower() for x in cp_df.columns]
cp_df = cp_df.iloc[:,1:]

tdf = pd.concat([cp_df, df])
tdf.head()

tdf.to_csv("Combined_Results.csv", index = False)


In [153]:
# ### Get results including Chemprop for Evaluation of Metric_Scores by Model for all DataSets

# split_style = "random"

# metrics = tdf['metric'].unique()

# fig, axes = plt.subplots(len(metrics), figsize = (12,15))

# for i, metric in enumerate(metrics):

#     sns.boxplot(x="model", y="score",
#         data=tdf[(tdf['metric'] == metric) & (tdf["split_method"] == split_style) & (tdf["split_id"] == "validate")],
#          palette="Set3", ax= axes[i], width = 0.5)
    
#     axes[i].set_ylabel(metric, fontsize = 14)
#     if i != len(metrics) -1:
#         axes[i].set_xlabel(None)
#     # axes[i].get_legend().remove()
        
# plt.suptitle("Evaluation of Metric_Scores by Model\nfor all DataSets\n", fontsize = 24, fontweight = "bold")
# fig.tight_layout()

# # fig.savefig("Boxplot_Model_Scores_wChemProp.png", facecolor = "w")

In [152]:
# ### Get results including Chemprop for Evaluation of Metric_Scores by Model for all DataSets

# split_style = "random"

# fig, axes = plt.subplots(len(metrics), figsize = (12,15))

# for i, metric in enumerate(metrics):
#     sns.boxplot(x="dataset", y="score",
#         data=tdf[(tdf['metric'] == metric) & (tdf["split_method"] == split_style) & (tdf["split_id"] == "validate")], palette="Set2", ax= axes[i])
    
#     axes[i].set_ylabel(metric, fontsize = 14)
#     if i != len(metrics) -1:
#         axes[i].set_xlabel(None)
        
# plt.suptitle("Evaluation of Metric_Scores by Dataset\nfor all Models\n", fontsize = 24, fontweight = "bold")
# fig.tight_layout()

# fig.savefig("Boxplot_Dataset_Scores_wChemProp.png", facecolor = "w")

In [12]:
tdf.groupby(['metric', "model", 'split_id']).agg({'score': ['mean', 'std']})[:20]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,score,score
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std
metric,model,split_id,Unnamed: 3_level_2,Unnamed: 4_level_2
accuracy_score,Chemprop,train,0.981043,0.020469
accuracy_score,Chemprop,validate,0.887536,0.086438
accuracy_score,DummyClassifier,train,0.872894,0.112444
accuracy_score,DummyClassifier,validate,0.878283,0.11219
accuracy_score,GradientBoostingClassifier,train,0.904811,0.058889
accuracy_score,GradientBoostingClassifier,validate,0.905916,0.068779
accuracy_score,KNeighborsClassifier,train,0.934345,0.034622
accuracy_score,KNeighborsClassifier,validate,0.911435,0.055492
accuracy_score,LogisticRegression,train,0.977704,0.013937
accuracy_score,LogisticRegression,validate,0.912069,0.049877


In [151]:

filtered_vis_data = tdf[(tdf['split_id'] == 'validate') & (tdf['split_method'] == 'random')]


default_metric = {"metric": "roc_auc_score"}
right_model_default = {"model": "DummyClassifier"}

metric_dropdown = alt.binding_select(options= [None] + list(tdf['metric'].unique()), name='Metric: ')
metric_selection = alt.selection_single(fields=['metric'], bind=metric_dropdown, init = default_metric)

left_model_dropdown = alt.binding_select(options= [None] + list(tdf['model'].unique()), name='Left Chart - Model: ')
left_model_selection = alt.selection_single(fields=['model'], bind=left_model_dropdown)

left_color = alt.condition(left_model_selection,
                    alt.Color('model:N'),
                    alt.value('lightgray'))

left_opacity = alt.condition(left_model_selection, alt.value(1.0), alt.value(0.1))

resize = alt.selection_interval(bind='scales')


left_chart = alt.Chart(filtered_vis_data).mark_point(size = 50).encode(
    x = 'dataset:N',
    y = 'score:Q',
    color = left_color,
    shape = "model:N",
    opacity = left_opacity,
    tooltip = ["dataset", 'model', 'split_method', 'metric', 'score']
).add_selection(
    metric_selection, left_model_selection, resize
).transform_filter(
    (metric_selection)
).properties(height = 400, width = 500)

right_model_dropdown = alt.binding_select(options= [None] + list(tdf['model'].unique()), name='Right Chart - Model: ')
right_model_selection = alt.selection_single(fields=['model'], bind=right_model_dropdown, init = right_model_default)

right_color = alt.condition(right_model_selection,
                    alt.Color('model:N'),
                    alt.value('lightgray'))

right_opacity = alt.condition(right_model_selection, alt.value(1.0), alt.value(0.1))

right_chart = alt.Chart(filtered_vis_data).mark_point(size = 50).encode(
    x = 'dataset:N',
    y = alt.Y('score:Q'),
    color = right_color,
    shape = "model:N",
    opacity = right_opacity,
    tooltip = ["dataset", 'model', 'split_method', 'metric', 'score']
).add_selection(
    metric_selection, right_model_selection, resize
).transform_filter(
    (metric_selection)
).properties(height = 400, width = 500)


(left_chart | right_chart).properties(title= "Evaluate and Compare Datasets, Models, and Metrics\n").configure_title(fontSize=24)

In [80]:
# from vega_datasets import data

# cars = data.cars.url

# input_dropdown = alt.binding_select(options=['Europe','Japan','USA'], name='Country')
# selection = alt.selection_single(fields=['Origin'], bind=input_dropdown)

# alt.Chart(cars).mark_point().encode(
#     x='Horsepower:Q',
#     y='Miles_per_Gallon:Q',
#     color='Origin:N',
#     tooltip='Name:N'
# ).add_selection(
#     selection
# ).transform_filter(
#     selection
# )

In [81]:
# # from vega_datasets import data

# # cars = data.cars.url

# metrics = df['metric'].unique()

# input_dropdown = alt.binding_select(options=metrics, name='Country')
# selection = alt.selection_single(fields=['metric'], bind=input_dropdown)

# alt.Chart(df).mark_point().encode(
#     x='score:Q',
#     y='split_id:N',
#     color='split_id:N',
#     row = "model:N",
#     tooltip='dataset:N'
# ).add_selection(
#     selection
# ).transform_filter(
#     selection
# )