In [8]:
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style(style = "darkgrid")

import altair as alt

In [9]:
result_path = Path.cwd().parent / "evaluation" / "Combined_Results.csv"

df = pd.read_csv(result_path)

### split out the time metric as this is not perfectly formatted
time_df = df[df["metric"] == "fit_time"]
df = df[df["metric"] != "fit_time"]
df['score'] = df['score'].astype(float)

df.sample(10)

df[(df["model"] == "Chemprop")]
df.sample(10)

Unnamed: 0,result_file,dataset,split_id,split_method,metric,score,model
438,lipophilicity-random-train-pred.csv,lipophilicity,train,random,accuracy_score,0.994118,LogisticRegression
705,tox21-cluster-validate-pred.csv,tox21,validate,cluster,roc_auc_score,0.748857,KNeighborsClassifier
274,clintox-random-validate-pred.csv,clintox,validate,random,log_loss,0.449885,KNeighborsClassifier
202,clintox-cluster-validate-pred.csv,clintox,validate,cluster,log_loss,0.988404,KNeighborsClassifier
71,bace-cluster-validate-pred.csv,bace,validate,cluster,matthews_corrcoef,0.0,DummyClassifier
83,bace-random-train-pred.csv,bace,train,random,matthews_corrcoef,0.940455,LogisticRegression
847,clintox-random-validate-pred.csv,clintox,validate,random,f1_score,0.285714,Chemprop
131,bace-random-validate-pred.csv,bace,validate,random,matthews_corrcoef,0.703999,KNeighborsClassifier
207,clintox-cluster-validate-pred.csv,clintox,validate,cluster,roc_auc_score,0.50267,GradientBoostingClassifier
816,bace-cluster-train-pred.csv,bace,train,cluster,balanced_accuracy_score,0.919875,Chemprop


# Interactive diagram to explore and compare results

This was used with some title modifications to generate static charts in final report.

In [34]:
filtered_vis_data = df[(df['split_id'] == 'validate') & (df['split_method'] == 'cluster')]

title = f"Interactive Evaluation of Chemical Datasets, Models and Metric Combinations"

### used only for generation of singleton static charts
# score_title = "ROC-AUC Score"
# title = f"Evaluation of Datasets and Models by {score_title}"


default_metric = {"metric": "roc_auc_score"}
right_model_default = {"model": "DummyClassifier"}
models = list(df['model'].unique())
metrics = list(df['metric'].unique())

metric_dropdown = alt.binding_select(options= [None] + metrics, name='Metric: ')
metric_selection = alt.selection_single(fields=['metric'], bind=metric_dropdown, init = default_metric)

left_model_dropdown = alt.binding_select(options= [None] + models, name='Left Chart - Model: ')
left_model_selection = alt.selection_single(fields=['model'], bind=left_model_dropdown)
right_model_dropdown = alt.binding_select(options= [None] + models, name='Right Chart - Model: ')
right_model_selection = alt.selection_single(fields=['model'], bind=right_model_dropdown, init = right_model_default)

left_color = alt.condition(left_model_selection, alt.Color('model:N'), alt.value('lightgray'))
left_opacity = alt.condition(left_model_selection, alt.value(1.0), alt.value(0.1))
right_color = alt.condition(right_model_selection, alt.Color('model:N'), alt.value('lightgray'))
right_opacity = alt.condition(right_model_selection, alt.value(1.0), alt.value(0.1))

resize = alt.selection_interval(bind='scales')


left_chart = alt.Chart(filtered_vis_data).mark_point(size = 50).encode(
    x = alt.X('dataset:N', title = None),
    y = alt.Y('score:Q'),
    color = left_color,
    shape = "model:N",
    opacity = left_opacity,
    tooltip = ["dataset", 'model', 'split_method', 'metric', 'score']
).add_selection(
    metric_selection, left_model_selection, resize
).transform_filter(
    (metric_selection)
).properties(height = 400, width = 450)



right_chart = alt.Chart(filtered_vis_data).mark_point(size = 50).encode(
    x = alt.X('dataset:N', title = None),
    y = alt.Y('score:Q', title = None),
    color = right_color,
    shape = "model:N",
    opacity = right_opacity,
    tooltip = ["dataset", 'model', 'split_method', 'metric', 'score']
).add_selection(
    metric_selection, right_model_selection, resize
).transform_filter(
    (metric_selection)
).properties(height = 400, width = 450)


(left_chart | right_chart).properties(title= title).configure_title(fontSize=24).configure_axis(
    labelFontSize=14,
    titleFontSize=18,
    labelFontWeight = "bold"
).configure_legend(labelFontSize=12, labelFontWeight = "bold")



# Look at where models might still be under-performing due to lack of hyperparameter tuning

In [42]:
box = alt.Chart(df[(df['metric'] == "roc_auc_score") & (df['model'] != "DummyClassifier") & (df['split_method'] == "cluster")]).mark_boxplot(size = 50).encode(
    x = alt.X("split_id:N", title = None),
    y = alt.Y("score:Q", scale = alt.Scale(domain = [0.3,1.1]), title = "ROC-AUC Score"),
    color = alt.Color("split_id:N", scale=alt.Scale(scheme = "set2"))
).properties(title = "Test", width = 100, height = 300).facet(
    facet=alt.Facet('model:N', header=alt.Header(labelFontSize=14, labelFontWeight= "bold"), title = None),
    columns=6
).properties(title = "Comparison of Model Test and Vaildate Prediction Results").configure_title(fontSize=24).configure_axis(
    labelFontSize=12,
    titleFontSize=16,
    labelFontWeight = "bold"
).configure_legend(labelFontSize=12, labelFontWeight = "bold")

box
