In [8]:
import os
import pandas as pd
import json

def get_split(replica_file):
    replica = json.load(open(replica_file, "r"))
    return replica["py/state"]["prepSettings"]["split"]['py/object'].split(".")[-1]

def get_results(data_folder, prefix):
    results = []
    for res_dir in [os.path.join(data_folder, x) for x in os.listdir(data_folder) if x.startswith(prefix)]:
        for res_file in [os.path.join(res_dir, x) for x in os.listdir(res_dir)]:
            if os.path.basename(res_file).startswith("results_"):
                res_df = pd.read_table(res_file)
                res_df["Split"] = res_df.ReplicaFile.apply(get_split)
                results.append(res_df)
    return pd.concat(results)

In [2]:
# regression

In [3]:
df_reg_results = get_results("./data/", "regression")
df_reg_results.head()

Unnamed: 0,Assessor,ScoreFunc,Score,TargetProperty,TargetTask,ModelFile,Algorithm,AlgorithmParams,ReplicaID,DataSet,ReplicaFile,Split
0,TestSetAssessor,r2_score,-1.68265,measured log solubility in mols per litre,REGRESSION,/home/martin/projects/qsp-bench-dev/data/regre...,STFullyConnected,,regression_dnn_107420369,delaney-processed_MorganFP_gpu,/home/martin/projects/qsp-bench-dev/data/regre...,RandomSplit
1,TestSetAssessor,root_mean_squared_error,3.308069,measured log solubility in mols per litre,REGRESSION,/home/martin/projects/qsp-bench-dev/data/regre...,STFullyConnected,,regression_dnn_107420369,delaney-processed_MorganFP_gpu,/home/martin/projects/qsp-bench-dev/data/regre...,RandomSplit
2,TestSetAssessor,r2_score,-1.538419,measured log solubility in mols per litre,REGRESSION,/home/martin/projects/qsp-bench-dev/data/regre...,STFullyConnected,,regression_dnn_3184935163,delaney-processed_MorganFP_gpu,/home/martin/projects/qsp-bench-dev/data/regre...,ClusterSplit
3,TestSetAssessor,root_mean_squared_error,3.11312,measured log solubility in mols per litre,REGRESSION,/home/martin/projects/qsp-bench-dev/data/regre...,STFullyConnected,,regression_dnn_3184935163,delaney-processed_MorganFP_gpu,/home/martin/projects/qsp-bench-dev/data/regre...,ClusterSplit
4,TestSetAssessor,r2_score,-1.312194,measured log solubility in mols per litre,REGRESSION,/home/martin/projects/qsp-bench-dev/data/regre...,STFullyConnected,,regression_dnn_958682846,delaney-processed_MorganFP_gpu,/home/martin/projects/qsp-bench-dev/data/regre...,RandomSplit


In [4]:
# classification

In [5]:
df_cls_results = get_results("./data/", "classification")
df_cls_results.head()

Unnamed: 0,Assessor,ScoreFunc,Score,TargetProperty,TargetTask,ModelFile,Algorithm,AlgorithmParams,ReplicaID,DataSet,ReplicaFile,Split
0,TestSetAssessor,roc_auc_score,0.873296,Class,SINGLECLASS,/home/martin/projects/qsp-bench-dev/data/class...,XGBClassifier,"{""n_jobs"": 1, ""random_state"": 2746317213}",classification_2746317213,bace_MorganFP,/home/martin/projects/qsp-bench-dev/data/class...,RandomSplit
1,TestSetAssessor,matthews_corrcoef,0.639343,Class,SINGLECLASS,/home/martin/projects/qsp-bench-dev/data/class...,XGBClassifier,"{""n_jobs"": 1, ""random_state"": 2746317213}",classification_2746317213,bace_MorganFP,/home/martin/projects/qsp-bench-dev/data/class...,RandomSplit
2,TestSetAssessor,recall_score,0.821705,Class,SINGLECLASS,/home/martin/projects/qsp-bench-dev/data/class...,XGBClassifier,"{""n_jobs"": 1, ""random_state"": 2746317213}",classification_2746317213,bace_MorganFP,/home/martin/projects/qsp-bench-dev/data/class...,RandomSplit
3,TestSetAssessor,precision_score,0.773723,Class,SINGLECLASS,/home/martin/projects/qsp-bench-dev/data/class...,XGBClassifier,"{""n_jobs"": 1, ""random_state"": 2746317213}",classification_2746317213,bace_MorganFP,/home/martin/projects/qsp-bench-dev/data/class...,RandomSplit
4,TestSetAssessor,f1_score,0.796992,Class,SINGLECLASS,/home/martin/projects/qsp-bench-dev/data/class...,XGBClassifier,"{""n_jobs"": 1, ""random_state"": 2746317213}",classification_2746317213,bace_MorganFP,/home/martin/projects/qsp-bench-dev/data/class...,RandomSplit


In [6]:
# df_cls_results.ModelName  = df_cls_results.ModelName.apply(lambda x: x.split("_")[-1])

In [9]:
from utils.plotting import make_box_plot

def make_plot(df, prefix):
    df["DataSetName"] = df.DataSet.apply(lambda x: x.split("_")[0])
    df = df[df.Algorithm != "STFullyConnected"].sort_values(by=["DataSetName", "Algorithm"])
    for split_name, df in df.groupby("Split"):
        make_box_plot(
            df,
            x="DataSetName",
            y="Score",
            hue="Algorithm",
            plot_name=f"{prefix}_{split_name}",
            ylim=(-0.5, 1)
        )
make_plot(df_reg_results, "regression")
make_plot(df_cls_results, "classification")