In [22]:
from utils import * 

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import itertools
from pprint import pprint
import joblib
import statistics

#regression matrics
from sklearn.metrics import mean_absolute_error , mean_squared_error, r2_score

#classification metrics
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import accuracy_score, balanced_accuracy_score, roc_auc_score, f1_score, matthews_corrcoef


## Test Set Results

This notebook evalute all the models (60 base models and 12 hierarchical models) on test set.

## Test lables

In [23]:
test_labels = pd.read_csv('../data/processed/test_labels.csv', index_col = 'CASRN')

In [24]:
test_labels.head(1)

Unnamed: 0_level_0,SMILES,logLD50_mmolkg,verytoxic,toxic,EPA_category,GHS_category
CASRN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
130209-82-4,CC(C)OC(=O)CCCC=CCC1C(O)CC(O)C1CCC(O)CCc1ccccc1,,1.0,1.0,1.0,1.0


## Functions

In [25]:
def report_reg_scores(labels, predictions):
    '''
    two dataframe: label, and predictions
    
    report four scores: RMSE, R2, MAE, MSE
    '''
    
    # get the labeled data
    labeled = labels[~labels['logLD50_mmolkg'].isnull()]
    
    labeled_preds = predictions.loc[labeled.index].values.astype('float32')
    labeled_Y = labeled['logLD50_mmolkg'].values
    
    score_rmse = rmse(labeled_Y, labeled_preds)
    score_r2 = r2_score(labeled_Y, labeled_preds)
    score_mae = mean_absolute_error(labeled_Y, labeled_preds)
    score_mse = mean_squared_error(labeled_Y, labeled_preds)

    print('RMSE:', score_rmse)
    print('R2:', score_r2) 
    print('MAE:', score_mae)
    print('MSE:', score_mse)
    
    return [score_rmse, score_r2, score_mae, score_mse]

def prob_to_pred(probs):
    classes = probs.argmax(axis=-1)
    return classes

def report_clf_scores(labels, predictions, target, encoder):
    '''
    two dataframe: label, and predictions
    target: ['toxic', 'EPA_category']
    encoder: label encoder
    
    report four scores: Accuracy, Balance Accuracy, MCC, f1_weight, AUROC (Only for binary model)
    '''
    
    # get the labeled data
    labeled = labels[~labels[target].isnull()]
    
    labeled_probs = predictions.loc[labeled.index].values.astype('float32')
    # predicted probabilities to predicted class
    labeled_preds = prob_to_pred(labeled_probs)
    
    # label encoding
    labeled_Y = encoder.transform(labeled[target].values)
    
    accuracy = accuracy_score(labeled_Y, labeled_preds)
    balance_acc = balanced_accuracy_score(labeled_Y, labeled_preds)
    f1= f1_score(labeled_Y, labeled_preds, average='weighted')
    mcc = matthews_corrcoef(labeled_Y, labeled_preds)
    
    score_list = [accuracy, balance_acc, f1, mcc]
    
    print('Accuracy:', accuracy)
    print('Balance Accuracy:', balance_acc) 
    print('F1_score:', f1)
    print('MCC:', mcc)
    
    if target == 'toxic':
        auroc = roc_auc_score(labeled_Y, labeled_preds)
        print('AUROC:', auroc)
        score_list.append(auroc)
        
    return score_list

## Base Models

In [26]:
encoder_epa = joblib.load('../encoder_models/encoder_epa.joblib')
encoder_toxic = joblib.load('../encoder_models/encoder_toxic.joblib')

endpoints = ['Toxic', 'EPA', 'LD50']
descriptors = ['ecfp6bits', 'ecfp6counts', 'maccs', 'rdkit2d', 'mordred']
algorithms = ['knn', 'svm', 'RF', 'xgboost']

reg_cols = ['name', 'RMSE', 'R2', 'MAE', 'MSE']
binary_cols = ['name', 'Accuracy', 'Balance Accuracy', 'F1_score', 'MCC', 'AUROC']
multiclass_cols = ['name', 'Accuracy', 'Balance Accuracy', 'F1_score', 'MCC']

base_reg_scores = pd.DataFrame(columns=reg_cols)
base_toxic_scores = pd.DataFrame(columns=binary_cols)
base_epa_scores = pd.DataFrame(columns=multiclass_cols)

for e in endpoints:
    for d in descriptors:
        for a in algorithms:
            name = f'{e}_{a}_{d}'
            print(f'Evaluating {name}')
            df_preds = pd.read_csv(f'../data/Hmodel_features_test/{name}.csv', index_col='CASRN')
            
            if e == 'LD50':
                scores = report_reg_scores(test_labels, df_preds)
                # Replace append with concat
                new_row = pd.DataFrame([[name] + scores], columns=reg_cols)
                base_reg_scores = pd.concat([base_reg_scores, new_row], ignore_index=True)
                
            elif e == 'EPA':
                scores = report_clf_scores(test_labels, df_preds, target='EPA_category', encoder=encoder_epa)
                new_row = pd.DataFrame([[name] + scores], columns=multiclass_cols)
                base_epa_scores = pd.concat([base_epa_scores, new_row], ignore_index=True)
                
            elif e == 'Toxic':
                scores = report_clf_scores(test_labels, df_preds, target='toxic', encoder=encoder_toxic)
                new_row = pd.DataFrame([[name] + scores], columns=binary_cols)
                base_toxic_scores = pd.concat([base_toxic_scores, new_row], ignore_index=True)
            
            print('\n')

Evaluating Toxic_knn_ecfp6bits
Accuracy: 0.7450773558368495
Balance Accuracy: 0.7325302564606078
F1_score: 0.742894115481333
MCC: 0.4731110515854856
AUROC: 0.7325302564606078


Evaluating Toxic_svm_ecfp6bits
Accuracy: 0.7468354430379747
Balance Accuracy: 0.7352543880914579
F1_score: 0.7450336459712092
MCC: 0.4772638733127907
AUROC: 0.7352543880914579


Evaluating Toxic_RF_ecfp6bits
Accuracy: 0.7584388185654009
Balance Accuracy: 0.7396755031609037
F1_score: 0.7533245721190068
MCC: 0.5002594036115744
AUROC: 0.7396755031609037


Evaluating Toxic_xgboost_ecfp6bits
Accuracy: 0.7478902953586498
Balance Accuracy: 0.7348649372247716
F1_score: 0.7455068747359669
MCC: 0.47871704448538055
AUROC: 0.7348649372247718


Evaluating Toxic_knn_ecfp6counts
Accuracy: 0.7566807313642757
Balance Accuracy: 0.7440242446449874
F1_score: 0.7544460781543839
MCC: 0.4971161475799576
AUROC: 0.7440242446449873


Evaluating Toxic_svm_ecfp6counts
Accuracy: 0.7422644163150492
Balance Accuracy: 0.7271487995853956
F1_sco

  base_toxic_scores = pd.concat([base_toxic_scores, new_row], ignore_index=True)


Accuracy: 0.780239099859353
Balance Accuracy: 0.7669912174907217
F1_score: 0.7777937547918627
MCC: 0.5460361363974763
AUROC: 0.7669912174907217


Evaluating Toxic_xgboost_rdkit2d
Accuracy: 0.7841068917018285
Balance Accuracy: 0.7727449175270098
F1_score: 0.7823516388454043
MCC: 0.5544486157485587
AUROC: 0.7727449175270098


Evaluating Toxic_knn_mordred
Accuracy: 0.770745428973277
Balance Accuracy: 0.7607030663625289
F1_score: 0.7694448285135678
MCC: 0.5273617959900211
AUROC: 0.7607030663625289


Evaluating Toxic_svm_mordred
Accuracy: 0.7819971870604782
Balance Accuracy: 0.7717828043397834
F1_score: 0.7806229515382522
MCC: 0.5504702211024075
AUROC: 0.7717828043397834


Evaluating Toxic_RF_mordred
Accuracy: 0.7805907172995781
Balance Accuracy: 0.7678407214279966
F1_score: 0.7783354194186535
MCC: 0.5468379579957108
AUROC: 0.7678407214279966


Evaluating Toxic_xgboost_mordred
Accuracy: 0.7795358649789029
Balance Accuracy: 0.767033224536771
F1_score: 0.7773714085953174
MCC: 0.54468324862609

  base_epa_scores = pd.concat([base_epa_scores, new_row], ignore_index=True)


Accuracy: 0.6470588235294118
Balance Accuracy: 0.5597057437921018
F1_score: 0.6301190773624209
MCC: 0.43732883637849024


Evaluating EPA_xgboost_maccs
Accuracy: 0.6353649893692417
Balance Accuracy: 0.547024068090494
F1_score: 0.6177012400120111
MCC: 0.4165245681921688


Evaluating EPA_knn_rdkit2d
Accuracy: 0.6417434443656981
Balance Accuracy: 0.5795151917141061
F1_score: 0.6327220755213979
MCC: 0.4362653294796405


Evaluating EPA_svm_rdkit2d
Accuracy: 0.6364280652019844
Balance Accuracy: 0.5479582350025856
F1_score: 0.6186975949005431
MCC: 0.41821252051214264


Evaluating EPA_RF_rdkit2d
Accuracy: 0.6587526576895819
Balance Accuracy: 0.5706526673937231
F1_score: 0.6430205339838978
MCC: 0.4567736350803515


Evaluating EPA_xgboost_rdkit2d
Accuracy: 0.6566265060240963
Balance Accuracy: 0.5882570108914287
F1_score: 0.6467700434385517
MCC: 0.45733902931080767


Evaluating EPA_knn_mordred
Accuracy: 0.6350106307583274
Balance Accuracy: 0.576779027754237
F1_score: 0.6259009552595995
MCC: 0.4259

  base_reg_scores = pd.concat([base_reg_scores, new_row], ignore_index=True)


In [27]:
base_reg_scores

Unnamed: 0,name,RMSE,R2,MAE,MSE
0,LD50_knn_ecfp6bits,0.616599,0.528507,0.445068,0.380195
1,LD50_svm_ecfp6bits,0.627607,0.511522,0.459033,0.39389
2,LD50_RF_ecfp6bits,0.631907,0.504805,0.473092,0.399307
3,LD50_xgboost_ecfp6bits,0.583979,0.577074,0.434834,0.341032
4,LD50_knn_ecfp6counts,0.601272,0.551656,0.434642,0.361528
5,LD50_svm_ecfp6counts,0.604804,0.546372,0.444925,0.365788
6,LD50_RF_ecfp6counts,0.617901,0.526513,0.458276,0.381802
7,LD50_xgboost_ecfp6counts,0.592436,0.564736,0.437826,0.35098
8,LD50_knn_maccs,0.601186,0.551784,0.430886,0.361424
9,LD50_svm_maccs,0.575471,0.589308,0.418651,0.331166


In [28]:
base_toxic_scores

Unnamed: 0,name,Accuracy,Balance Accuracy,F1_score,MCC,AUROC
0,Toxic_knn_ecfp6bits,0.745077,0.73253,0.742894,0.473111,0.73253
1,Toxic_svm_ecfp6bits,0.746835,0.735254,0.745034,0.477264,0.735254
2,Toxic_RF_ecfp6bits,0.758439,0.739676,0.753325,0.500259,0.739676
3,Toxic_xgboost_ecfp6bits,0.74789,0.734865,0.745507,0.478717,0.734865
4,Toxic_knn_ecfp6counts,0.756681,0.744024,0.754446,0.497116,0.744024
5,Toxic_svm_ecfp6counts,0.742264,0.727149,0.738941,0.466212,0.727149
6,Toxic_RF_ecfp6counts,0.765471,0.746111,0.760094,0.515536,0.746111
7,Toxic_xgboost_ecfp6counts,0.767229,0.753296,0.764482,0.518734,0.753296
8,Toxic_knn_maccs,0.770042,0.757263,0.767763,0.524851,0.757263
9,Toxic_svm_maccs,0.779184,0.76651,0.776955,0.543918,0.76651


In [29]:
base_epa_scores

Unnamed: 0,name,Accuracy,Balance Accuracy,F1_score,MCC
0,EPA_knn_ecfp6bits,0.626152,0.557011,0.615868,0.409073
1,EPA_svm_ecfp6bits,0.621545,0.516311,0.593787,0.389499
2,EPA_RF_ecfp6bits,0.63005,0.506912,0.592427,0.40693
3,EPA_xgboost_ecfp6bits,0.622254,0.528022,0.60025,0.392334
4,EPA_knn_ecfp6counts,0.631113,0.564531,0.620869,0.416371
5,EPA_svm_ecfp6counts,0.626506,0.514918,0.600149,0.397211
6,EPA_RF_ecfp6counts,0.636074,0.515629,0.604298,0.415817
7,EPA_xgboost_ecfp6counts,0.631821,0.55244,0.61937,0.414872
8,EPA_knn_maccs,0.628278,0.566542,0.617903,0.413732
9,EPA_svm_maccs,0.62438,0.528073,0.603115,0.397303


In [30]:
# save the results
base_reg_scores.to_csv('../results/model_evaluation/base_reg_scores.csv', index =False)
base_toxic_scores.to_csv('../results/model_evaluation/base_toxic_scores.csv', index =False)
base_epa_scores.to_csv('../results/model_evaluation/base_epa_scores.csv', index =False)

## Hierarchial Models

Get the predictions on test set

In [31]:
encoder_epa = joblib.load('../encoder_models/encoder_epa.joblib')
encoder_toxic = joblib.load('../encoder_models/encoder_toxic.joblib')

result_path = '../results/Hierarchical_testset_preds/'

endpoints = ['Toxic', 'EPA', 'LD50']
descriptors = ['Hmodel']
algorithms = ['knn', 'SVM', 'RF', 'xgboost']

reg_cols = ['name','RMSE', 'R2', 'MAE', 'MSE']
binary_cols = ['name','Accuracy', 'Balance Accuracy', 'F1_score', 'MCC', 'AUROC']
multiclass_cols = ['name','Accuracy', 'Balance Accuracy', 'F1_score', 'MCC']

H_reg_scores = pd.DataFrame(columns=reg_cols)
H_toxic_scores = pd.DataFrame(columns=binary_cols)
H_epa_scores = pd.DataFrame(columns=multiclass_cols)

for e in endpoints:
    for d in descriptors:
        for a in algorithms:
            name = f'{e}_{a}_{d}'
            print(f'Evaluating {name}')
            df_preds = pd.read_csv(f'{result_path}{name}.csv', index_col='CASRN')
            if e == 'LD50':
                scores = report_reg_scores(test_labels, df_preds)
                new_row = pd.DataFrame([[name] + scores], columns=reg_cols)
                H_reg_scores = pd.concat([H_reg_scores, new_row], ignore_index=True)
            elif e == 'EPA':
                scores = report_clf_scores(test_labels, df_preds, target='EPA_category', encoder=encoder_epa)
                new_row = pd.DataFrame([[name] + scores], columns=multiclass_cols)
                H_epa_scores = pd.concat([H_epa_scores, new_row], ignore_index=True)
            elif e == 'Toxic':
                scores = report_clf_scores(test_labels, df_preds, target='toxic', encoder=encoder_toxic)
                new_row = pd.DataFrame([[name] + scores], columns=binary_cols)
                H_toxic_scores = pd.concat([H_toxic_scores, new_row], ignore_index=True)
            print('\n')

Evaluating Toxic_knn_Hmodel
Accuracy: 0.79957805907173
Balance Accuracy: 0.7912983669381266
F1_score: 0.7987414427823886
MCC: 0.5874837713275923
AUROC: 0.7912983669381266


Evaluating Toxic_SVM_Hmodel
Accuracy: 0.7946554149085795
Balance Accuracy: 0.7854988640383752
F1_score: 0.7935851790250765
MCC: 0.5769859652549326
AUROC: 0.7854988640383751


Evaluating Toxic_RF_Hmodel
Accuracy: 0.8090717299578059
Balance Accuracy: 0.8010685479075177
F1_score: 0.8082884998795127
MCC: 0.6071004132632591
AUROC: 0.8010685479075176


Evaluating Toxic_xgboost_Hmodel
Accuracy: 0.8016877637130801
Balance Accuracy: 0.7942191219110271
F1_score: 0.80105506382791
MCC: 0.5922063345595926
AUROC: 0.7942191219110271


Evaluating EPA_knn_Hmodel
Accuracy: 0.6757618710134656
Balance Accuracy: 0.6040378813409405
F1_score: 0.6635967726057506
MCC: 0.48722175519466593


Evaluating EPA_SVM_Hmodel
Accuracy: 0.6771793054571226
Balance Accuracy: 0.6060163578985972
F1_score: 0.6657911142543218
MCC: 0.490029648998071


Evaluat

  H_toxic_scores = pd.concat([H_toxic_scores, new_row], ignore_index=True)
  H_epa_scores = pd.concat([H_epa_scores, new_row], ignore_index=True)
  H_reg_scores = pd.concat([H_reg_scores, new_row], ignore_index=True)


In [32]:
H_reg_scores

Unnamed: 0,name,RMSE,R2,MAE,MSE
0,LD50_knn_Hmodel,0.537764,0.641365,0.383375,0.28919
1,LD50_SVM_Hmodel,0.527598,0.654796,0.376434,0.278359
2,LD50_RF_Hmodel,0.529732,0.651998,0.377788,0.280616
3,LD50_xgboost_Hmodel,0.53451,0.645692,0.382348,0.285701


In [33]:
H_toxic_scores

Unnamed: 0,name,Accuracy,Balance Accuracy,F1_score,MCC,AUROC
0,Toxic_knn_Hmodel,0.799578,0.791298,0.798741,0.587484,0.791298
1,Toxic_SVM_Hmodel,0.794655,0.785499,0.793585,0.576986,0.785499
2,Toxic_RF_Hmodel,0.809072,0.801069,0.808288,0.6071,0.801069
3,Toxic_xgboost_Hmodel,0.801688,0.794219,0.801055,0.592206,0.794219


In [34]:
H_epa_scores

Unnamed: 0,name,Accuracy,Balance Accuracy,F1_score,MCC
0,EPA_knn_Hmodel,0.675762,0.604038,0.663597,0.487222
1,EPA_SVM_Hmodel,0.677179,0.606016,0.665791,0.49003
2,EPA_RF_Hmodel,0.678951,0.621083,0.669755,0.495334
3,EPA_xgboost_Hmodel,0.675762,0.613531,0.666057,0.48934


In [35]:
# save the results
H_reg_scores.to_csv('../results/model_evaluation/H_reg_scores.csv', index =False)
H_toxic_scores.to_csv('../results/model_evaluation/H_toxic_scores.csv', index =False)
H_epa_scores.to_csv('../results/model_evaluation/H_epa_scores.csv', index =False)

In [36]:
reg_scores = pd.concat([H_reg_scores, base_reg_scores])
toxic_scores = pd.concat([H_toxic_scores, base_toxic_scores])
epa_scores = pd.concat([H_epa_scores, base_epa_scores])

In [37]:
# save the results
reg_scores.to_csv('../results/model_evaluation/reg_scores.csv', index =False)
toxic_scores.to_csv('../results/model_evaluation/toxic_scores.csv', index =False)
epa_scores.to_csv('../results/model_evaluation/epa_scores.csv', index =False)

## Averge predictions

Average predictions of hierarchial models

In [38]:
result_path = '../results/Hierarchical_testset_preds/'

endpoints = ['Toxic', 'EPA', 'LD50']
descriptors = ['Hmodel']
algorithms = ['knn', 'SVM', 'RF', 'xgboost']

reg_array_sum = np.zeros((2849, 1)) #shape of predictions (number of samples, predictions)
binary_array_sum = np.zeros((2849, 2))
multiclass_array_sum = np.zeros((2849, 4))

for e in endpoints:
    for d in descriptors:
        for a in algorithms:
            name = f'{e}_{a}_{d}'
            print(f'Eavluating {name}')
            df_preds = pd.read_csv(f'{result_path}{name}.csv',index_col = 'CASRN')
            
            if e == 'LD50':
                reg_array_sum += df_preds.values
            if e == 'EPA':
                multiclass_array_sum += df_preds.values
            if e == 'Toxic':                
                binary_array_sum += df_preds.values

reg_array_avg = reg_array_sum/4 # len(descriptors) * len(algorithms)
binary_array_avg = binary_array_sum/4 # len(descriptors) * len(algorithms)
multiclass_array_avg = multiclass_array_sum/4 # len(descriptors) * len(algorithms)

Eavluating Toxic_knn_Hmodel
Eavluating Toxic_SVM_Hmodel
Eavluating Toxic_RF_Hmodel
Eavluating Toxic_xgboost_Hmodel
Eavluating EPA_knn_Hmodel
Eavluating EPA_SVM_Hmodel


Eavluating EPA_RF_Hmodel
Eavluating EPA_xgboost_Hmodel
Eavluating LD50_knn_Hmodel
Eavluating LD50_SVM_Hmodel
Eavluating LD50_RF_Hmodel
Eavluating LD50_xgboost_Hmodel


In [39]:
Hmodel_reg_avgp = pd.DataFrame(reg_array_avg, index = test_labels.index, columns= ['LD50_avg'])
Hmodel_toxic_avgp = pd.DataFrame(binary_array_avg, index = test_labels.index, columns= ['Toxic_avg-0', 'Toxic_avg-1'])
Hmodel_epa_avgp = pd.DataFrame(multiclass_array_avg, index = test_labels.index, 
                               columns= ['EPA_avg-1', 'EPA_avg-2', 'EPA_avg-3','EPA_avg-4'])

In [40]:
Hmodel_reg_avgp.to_csv('../results/avg_predictions/Hmodel_reg_avgp.csv')
Hmodel_toxic_avgp.to_csv('../results/avg_predictions/Hmodel_toxic_avgp.csv')
Hmodel_epa_avgp.to_csv('../results/avg_predictions/Hmodel_epa_avgp.csv')

In [41]:
Hmodel_avgp = pd.concat([Hmodel_reg_avgp, Hmodel_toxic_avgp, Hmodel_epa_avgp],axis=1)

In [42]:
Hmodel_avgp.to_csv('../results/avg_predictions/Hmodel_avgp.csv')

In [43]:
list(Hmodel_avgp)

['LD50_avg',
 'Toxic_avg-0',
 'Toxic_avg-1',
 'EPA_avg-1',
 'EPA_avg-2',
 'EPA_avg-3',
 'EPA_avg-4']

Average predictions of base models

In [44]:
result_path = '../data/Hmodel_features_test/'

endpoints = ['Toxic', 'EPA', 'LD50']
descriptors = ['ecfp6bits', 'ecfp6counts', 'maccs', 'rdkit2d', 'mordred']
algorithms = ['knn', 'svm', 'RF', 'xgboost']

reg_array_sum = np.zeros((2849, 1)) #shape of predictions (number of samples, predictions)
binary_array_sum = np.zeros((2849, 2))
multiclass_array_sum = np.zeros((2849, 4))

for e in endpoints:
    for d in descriptors:
        for a in algorithms:
            name = f'{e}_{a}_{d}'
            print(f'Eavluating {name}')
            df_preds = pd.read_csv(f'{result_path}{name}.csv',index_col = 'CASRN')
            
            if e == 'LD50':
                reg_array_sum += df_preds.values
            if e == 'EPA':
                multiclass_array_sum += df_preds.values
            if e == 'Toxic':                
                binary_array_sum += df_preds.values

reg_array_avg = reg_array_sum/20 # len(descriptors) * len(algorithms)
binary_array_avg = binary_array_sum/20 # len(descriptors) * len(algorithms)
multiclass_array_avg = multiclass_array_sum/20 # len(descriptors) * len(algorithms)

Eavluating Toxic_knn_ecfp6bits
Eavluating Toxic_svm_ecfp6bits
Eavluating Toxic_RF_ecfp6bits
Eavluating Toxic_xgboost_ecfp6bits
Eavluating Toxic_knn_ecfp6counts
Eavluating Toxic_svm_ecfp6counts
Eavluating Toxic_RF_ecfp6counts


Eavluating Toxic_xgboost_ecfp6counts
Eavluating Toxic_knn_maccs
Eavluating Toxic_svm_maccs
Eavluating Toxic_RF_maccs
Eavluating Toxic_xgboost_maccs
Eavluating Toxic_knn_rdkit2d
Eavluating Toxic_svm_rdkit2d
Eavluating Toxic_RF_rdkit2d
Eavluating Toxic_xgboost_rdkit2d
Eavluating Toxic_knn_mordred
Eavluating Toxic_svm_mordred
Eavluating Toxic_RF_mordred
Eavluating Toxic_xgboost_mordred
Eavluating EPA_knn_ecfp6bits
Eavluating EPA_svm_ecfp6bits
Eavluating EPA_RF_ecfp6bits
Eavluating EPA_xgboost_ecfp6bits
Eavluating EPA_knn_ecfp6counts
Eavluating EPA_svm_ecfp6counts
Eavluating EPA_RF_ecfp6counts
Eavluating EPA_xgboost_ecfp6counts
Eavluating EPA_knn_maccs
Eavluating EPA_svm_maccs
Eavluating EPA_RF_maccs
Eavluating EPA_xgboost_maccs
Eavluating EPA_knn_rdkit2d
Eavluating EPA_svm_rdkit2d
Eavluating EPA_RF_rdkit2d
Eavluating EPA_xgboost_rdkit2d
Eavluating EPA_knn_mordred
Eavluating EPA_svm_mordred
Eavluating EPA_RF_mordred
Eavluating EPA_xgboost_mordred
Eavluating LD50_knn_ecfp6bits
Eavluating LD

In [45]:
Bmodel_reg_avgp = pd.DataFrame(reg_array_avg, index = test_labels.index, columns= ['LD50_avg'])
Bmodel_toxic_avgp = pd.DataFrame(binary_array_avg, index = test_labels.index, columns= ['Toxic_avg-0', 'Toxic_avg-1'])
Bmodel_epa_avgp = pd.DataFrame(multiclass_array_avg, index = test_labels.index, 
                               columns= ['EPA_avg-1', 'EPA_avg-2', 'EPA_avg-3','EPA_avg-4'])

In [46]:
Bmodel_reg_avgp.to_csv('../results/avg_predictions/Bmodel_reg_avgp.csv')
Bmodel_toxic_avgp.to_csv('../results/avg_predictions/Bmodel_toxic_avgp.csv')
Bmodel_epa_avgp.to_csv('../results/avg_predictions/Bmodel_epa_avgp.csv')

In [47]:
Bmodel_avgp = pd.concat([Bmodel_reg_avgp, Bmodel_toxic_avgp, Bmodel_epa_avgp],axis=1)
Bmodel_avgp.to_csv('../results/avg_predictions/Bmodel_avgp.csv')

In [48]:
Bmodel_avgp.head(1)

Unnamed: 0_level_0,LD50_avg,Toxic_avg-0,Toxic_avg-1,EPA_avg-1,EPA_avg-2,EPA_avg-3,EPA_avg-4
CASRN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
130209-82-4,0.429453,0.610803,0.389197,0.0544,0.169503,0.301574,0.474523


### Ealuation of the avergage predictons
and merge with other results

In [49]:
encoder_epa = joblib.load('../encoder_models/encoder_epa.joblib')
encoder_toxic = joblib.load('../encoder_models/encoder_toxic.joblib')

In [50]:
reg_scores = pd.read_csv('../results/model_evaluation/reg_scores.csv')
toxic_scores = pd.read_csv('../results/model_evaluation/toxic_scores.csv')
epa_scores = pd.read_csv('../results/model_evaluation/epa_scores.csv')

In [51]:
reg_cols = ['name','RMSE', 'R2', 'MAE', 'MSE']
binary_cols = ['name','Accuracy', 'Balance Accuracy', 'F1_score', 'MCC', 'AUROC']
multiclass_cols = ['name','Accuracy', 'Balance Accuracy', 'F1_score', 'MCC']


Base model

In [53]:
scores = report_reg_scores(test_labels, Bmodel_reg_avgp)
new_row = pd.DataFrame([['Bmodel_avgp'] + scores], columns=reg_cols)
reg_scores = pd.concat([reg_scores, new_row], ignore_index=True)

RMSE: 0.5535855151916359
R2: 0.619951386507464
MAE: 0.401204056281817
MSE: 0.3064569226299889


In [55]:
scores = report_clf_scores(test_labels, Bmodel_epa_avgp, target='EPA_category', encoder=encoder_epa)
new_row = pd.DataFrame([['Bmodel_avgp'] + scores], columns=multiclass_cols)
epa_scores = pd.concat([epa_scores, new_row], ignore_index=True)

Accuracy: 0.6704464918497519
Balance Accuracy: 0.5799565097969118
F1_score: 0.6529950785896049
MCC: 0.4767465165154781


In [57]:
scores = report_clf_scores(test_labels, Bmodel_toxic_avgp, target='toxic', encoder=encoder_toxic)
new_row = pd.DataFrame([['Bmodel_avgp'] + scores], columns=binary_cols)
toxic_scores = pd.concat([toxic_scores, new_row], ignore_index=True)

Accuracy: 0.7883263009845288
Balance Accuracy: 0.774995584199075
F1_score: 0.7858690403982773
MCC: 0.5629524480088984
AUROC: 0.774995584199075


Hierarchical Models

In [59]:
scores = report_reg_scores(test_labels, Hmodel_reg_avgp)
new_row = pd.DataFrame([['Hmodel_avgp'] + scores], columns=reg_cols)
reg_scores = pd.concat([reg_scores, new_row], ignore_index=True)

RMSE: 0.5276270666519393
R2: 0.654757821183973
MAE: 0.37556543849935525
MSE: 0.27839032146373


In [61]:
scores = report_clf_scores(test_labels, Hmodel_epa_avgp, target='EPA_category', encoder=encoder_epa)
new_row = pd.DataFrame([['Hmodel_avgp'] + scores], columns=multiclass_cols)
epa_scores = pd.concat([epa_scores, new_row], ignore_index=True)

Accuracy: 0.6814316087880935
Balance Accuracy: 0.6150245696490275
F1_score: 0.6708732997602733
MCC: 0.4976144073936262


In [63]:
scores = report_clf_scores(test_labels, Hmodel_toxic_avgp, target='toxic', encoder=encoder_toxic)
new_row = pd.DataFrame([['Hmodel_avgp'] + scores], columns=binary_cols)
toxic_scores = pd.concat([toxic_scores, new_row], ignore_index=True)

Accuracy: 0.8013361462728551
Balance Accuracy: 0.7930431776761399
F1_score: 0.800492499825607
MCC: 0.5910883021388699
AUROC: 0.7930431776761399


In [64]:
# save the results
reg_scores.to_csv('../results/model_evaluation/reg_scores.csv', index =False)
toxic_scores.to_csv('../results/model_evaluation/toxic_scores.csv', index =False)
epa_scores.to_csv('../results/model_evaluation/epa_scores.csv', index =False)

## Cross-validation Resluts

In [67]:
def report_reg_scores(score, decimal = 3):
    '''
    score: cross-validation score
    
    report four scores: RMSE, R2, MAE, MSE
    '''
    
    score_rmse = round(statistics.mean(score['test_RMSE']), decimal)
    std_rmse = round(statistics.stdev(score['test_RMSE']), decimal)
    score_r2 = round(statistics.mean(score['test_R2']), decimal)
    std_r2 = round(statistics.stdev(score['test_R2']), decimal)
    score_mae = round(statistics.mean(score['test_MAE']), decimal)
    std_mae = round(statistics.stdev(score['test_MAE']), decimal)
    score_mse = round(statistics.mean(score['test_MSE']), decimal)
    std_mse = round(statistics.stdev(score['test_MSE']), decimal)
    
    return [score_rmse,std_rmse, score_r2, std_r2, score_mae,std_mae, score_mse,std_mse]

def report_clf_scores(score, decimal = 3):
    '''
    score: cross-validation score
    
    report four scores: Accuracy, Balance Accuracy, MCC, f1_weight, AUROC
    '''
    
    accuracy = round(statistics.mean(score['test_Accuracy']), decimal)
    std_accuracy = round(statistics.stdev(score['test_Accuracy']), decimal)
    balance_acc = round(statistics.mean(score['test_Balance Accuracy']), decimal)
    std_balance_acc = round(statistics.stdev(score['test_Balance Accuracy']), decimal)
    mcc = round(statistics.mean(score['test_matthews_corrcoef']), decimal)
    std_mcc = round(statistics.stdev(score['test_matthews_corrcoef']), decimal)
    f1= round(statistics.mean(score['test_f1_score']), decimal)
    std_f1 = round(statistics.stdev(score['test_f1_score']), decimal)
    auroc = round(statistics.mean(score['test_AUROC']), decimal)
    std_auroc = round(statistics.stdev(score['test_AUROC']), decimal)

    return [accuracy, std_accuracy,balance_acc, std_balance_acc, mcc, std_mcc, f1, std_f1, auroc, std_auroc]

Get the cv scores from all the Base Models

In [69]:
endpoints = ['Toxic', 'EPA', 'LD50']
descriptors = ['ecfp6bits', 'ecfp6counts', 'maccs', 'rdkit2d', 'mordred']
algorithms = ['knn', 'svm', 'RF', 'xgboost']

reg_cols = ['name','RMSE', 'RMSE (std)', 'R2','R2 (std)', 'MAE', 'MAE (std)', 'MSE', 'MSE (std)']
binary_cols = ['name','Accuracy', 'Accuracy (std)', 'Balance Accuracy', 'Balance Accuracy (std)',
               'MCC', 'MCC (std)','F1_score', 'F1_score (std)', 'AUROC', 'AUROC (std)']
multiclass_cols = ['name','Accuracy', 'Accuracy (std)', 'Balance Accuracy', 'Balance Accuracy (std)',
               'MCC', 'MCC (std)','F1_score', 'F1_score (std)', 'AUROC', 'AUROC (std)']

base_reg_cv_scores = pd.DataFrame(columns = reg_cols)
base_toxic_cv_scores = pd.DataFrame(columns = binary_cols)
base_epa_cv_scores = pd.DataFrame(columns = multiclass_cols)

for e in endpoints:
    for d in descriptors:
        for a in algorithms:
            name = f'{e}_{a}_{d}'
            cv_score = joblib.load(f'../results/Base_models/{name}_CVScore')
            if e == 'LD50':
                scores = report_reg_scores(cv_score)
                row = pd.Series([name] + scores, index=reg_cols).to_frame().T
                base_reg_cv_scores = pd.concat([base_reg_cv_scores, row], ignore_index=True)
            if e == 'EPA':
                scores = report_clf_scores(cv_score)
                row = pd.Series([name] + scores, index=multiclass_cols).to_frame().T
                base_epa_cv_scores = pd.concat([base_epa_cv_scores, row], ignore_index=True)
            if e == 'Toxic':
                scores = report_clf_scores(cv_score)
                row = pd.Series([name] + scores, index=binary_cols).to_frame().T
                base_toxic_cv_scores = pd.concat([base_toxic_cv_scores, row], ignore_index=True)    

In [70]:
base_reg_cv_scores.head(1)

Unnamed: 0,name,RMSE,RMSE (std),R2,R2 (std),MAE,MAE (std),MSE,MSE (std)
0,LD50_knn_ecfp6bits,0.646,0.017,0.488,0.026,0.476,0.014,0.418,0.022


Get the cv scores from all the Hierarchical Models

In [72]:
endpoints = ['Toxic', 'EPA', 'LD50']
descriptors = ['Hmodel']
algorithms = ['knn', 'SVM', 'RF', 'xgboost']

reg_cols = ['name','RMSE', 'RMSE (std)', 'R2','R2 (std)', 'MAE', 'MAE (std)', 'MSE', 'MSE (std)']
binary_cols = ['name','Accuracy', 'Accuracy (std)', 'Balance Accuracy', 'Balance Accuracy (std)',
               'MCC', 'MCC (std)','F1_score', 'F1_score (std)', 'AUROC', 'AUROC (std)']
multiclass_cols = ['name','Accuracy', 'Accuracy (std)', 'Balance Accuracy', 'Balance Accuracy (std)',
               'MCC', 'MCC (std)','F1_score', 'F1_score (std)', 'AUROC', 'AUROC (std)']

H_reg_cv_scores = pd.DataFrame(columns = reg_cols)
H_toxic_cv_scores = pd.DataFrame(columns = binary_cols)
H_epa_cv_scores = pd.DataFrame(columns = multiclass_cols)

for e in endpoints:
    for d in descriptors:
        for a in algorithms:
            name = f'{e}_{a}_{d}'
            cv_score = joblib.load(f'../results/Hierarchical_models/{name}_CVScore')
            if e == 'LD50':
                scores = report_reg_scores(cv_score)
                new_row = pd.DataFrame([[name] + scores], columns=reg_cols)
                H_reg_cv_scores = pd.concat([H_reg_cv_scores, new_row], ignore_index=True)
            if e == 'EPA':
                scores = report_clf_scores(cv_score)
                new_row = pd.DataFrame([[name] + scores], columns=multiclass_cols)
                H_epa_cv_scores = pd.concat([H_epa_cv_scores, new_row], ignore_index=True)
            if e == 'Toxic':
                scores = report_clf_scores(cv_score)
                new_row = pd.DataFrame([[name] + scores], columns=binary_cols)
                H_toxic_cv_scores = pd.concat([H_toxic_cv_scores, new_row], ignore_index=True)

  H_toxic_cv_scores = pd.concat([H_toxic_cv_scores, new_row], ignore_index=True)
  H_epa_cv_scores = pd.concat([H_epa_cv_scores, new_row], ignore_index=True)
  H_reg_cv_scores = pd.concat([H_reg_cv_scores, new_row], ignore_index=True)


In [73]:
reg_cv_scores = pd.concat([H_reg_cv_scores, base_reg_cv_scores])
toxic_cv_scores = pd.concat([H_toxic_cv_scores, base_toxic_cv_scores])
epa_cv_scores = pd.concat([H_epa_cv_scores, base_epa_cv_scores])

In [74]:
# save the results
reg_cv_scores.to_csv('../results/model_evaluation/reg_cv_scores.csv', index =False)
toxic_cv_scores.to_csv('../results/model_evaluation/toxic_cv_scores.csv', index =False)
epa_cv_scores.to_csv('../results/model_evaluation/epa_cv_scores.csv', index =False)