In [45]:
import pandas as pd
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score
from rdkit import Chem
import chemprop

def evaluate_metrics(y_true, y_pred):
    y_pred = [p[0] for p in y_pred]
    y_pred_label = [1 if p > 0.5 else 0 for p in y_pred]
    auc = roc_auc_score(y_true, y_pred)
    accuracy = accuracy_score(y_true, y_pred_label)
    precision = precision_score(y_true, y_pred_label)
    recall = recall_score(y_true, y_pred_label)
    
    return {
        'AUC': "{:.3f}".format(auc),
        'Accuracy': "{:.3f}".format(accuracy),
        'Precision': "{:.3f}".format(precision),
        'Recall': "{:.3f}".format(recall)
    }

def chemprop_pred(smiles_list):
    arguments = [
        '--test_path', '/dev/null',
        '--preds_path', '/dev/null',
        '--checkpoint_dir', '/home/jovyan/my_code/uncertainty/checkpoints/hERGDB/hERGDB_model'
    ]

    args = chemprop.args.PredictArgs().parse_args(arguments)
    model_objects = chemprop.train.load_model(args=args)
    smiles = [[s]for s in smiles_list]

    preds = chemprop.train.make_predictions(
        args=args, 
        smiles=smiles, 
        model_objects=model_objects,
        return_invalid_smiles=True
        )
    
    return preds

In [46]:
df_val = pd.read_csv('/home/jovyan/my_code/uncertainty/data/hERGDB/hERGDB_cls_valid_data.csv')
df_week1 = pd.read_csv('/home/jovyan/my_code/uncertainty/data/hERGDB/hERGDB_cls_week1_1201.csv')
df_week2 = pd.read_csv('/home/jovyan/my_code/uncertainty/data/hERGDB/hERGDB_cls_week2_1201.csv')
df_week3 = pd.read_csv('/home/jovyan/my_code/uncertainty/data/hERGDB/hERGDB_cls_week3_1201.csv')
df_week4 = pd.read_csv('/home/jovyan/my_code/uncertainty/data/hERGDB/hERGDB_cls_week4_1201.csv')

val_smi = df_val['smiles'].tolist()
val_true = df_val['class'].tolist()

week1_smi = df_week1['smiles'].tolist()
week1_true = df_week1['class'].tolist()

week2_smi = df_week2['smiles'].tolist()
week2_true = df_week2['class'].tolist()

week3_smi = df_week3['smiles'].tolist()
week3_true = df_week3['class'].tolist()

week4_smi = df_week4['smiles'].tolist()
week4_true = df_week4['class'].tolist()

In [47]:
val_prob = chemprop_pred(val_smi)

week1_prob = chemprop_pred(week1_smi)
week2_prob = chemprop_pred(week2_smi)
week3_prob = chemprop_pred(week3_smi)
week4_prob = chemprop_pred(week4_smi)

Loading training args


  vars(torch.load(path, map_location=lambda storage, loc: storage)["args"]),
  state = torch.load(path, map_location=lambda storage, loc: storage)
  state = torch.load(path, map_location=lambda storage, loc: storage)


Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "readout.1.weight".
Loading pretrained parameter "readout.1.bias".
Loading pretrained parameter "readout.4.weight".
Loading pretrained parameter "readout.4.bias".
Setting molecule featurization parameters to default.
Loading data
Validating SMILES
Test size = 1,422


100%|██████████| 1/1 [00:04<00:00,  4.97s/it]


Saving predictions to /dev/null
Elapsed time = 0:00:05
Loading training args
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "readout.1.weight".
Loading pretrained parameter "readout.1.bias".
Loading pretrained parameter "readout.4.weight".
Loading pretrained parameter "readout.4.bias".
Setting molecule featurization parameters to default.
Loading data
Validating SMILES
Test size = 843


100%|██████████| 1/1 [00:02<00:00,  2.92s/it]


Saving predictions to /dev/null
Elapsed time = 0:00:03
Loading training args
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "readout.1.weight".
Loading pretrained parameter "readout.1.bias".
Loading pretrained parameter "readout.4.weight".
Loading pretrained parameter "readout.4.bias".
Setting molecule featurization parameters to default.
Loading data
Validating SMILES
Test size = 646


100%|██████████| 1/1 [00:02<00:00,  2.43s/it]


Saving predictions to /dev/null
Elapsed time = 0:00:03
Loading training args
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "readout.1.weight".
Loading pretrained parameter "readout.1.bias".
Loading pretrained parameter "readout.4.weight".
Loading pretrained parameter "readout.4.bias".
Setting molecule featurization parameters to default.
Loading data
Validating SMILES
Test size = 1,052


100%|██████████| 1/1 [00:03<00:00,  3.68s/it]


Saving predictions to /dev/null
Elapsed time = 0:00:04
Loading training args
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "readout.1.weight".
Loading pretrained parameter "readout.1.bias".
Loading pretrained parameter "readout.4.weight".
Loading pretrained parameter "readout.4.bias".
Setting molecule featurization parameters to default.
Loading data
Validating SMILES
Test size = 775


100%|██████████| 1/1 [00:02<00:00,  2.63s/it]

Saving predictions to /dev/null
Elapsed time = 0:00:03





In [48]:
print('val set',evaluate_metrics(val_true, val_prob))

{'AUC': '0.877', 'Accuracy': '0.793', 'Precision': '0.794', 'Recall': '0.779'}


In [49]:
print('val set',evaluate_metrics(week1_true, week1_prob))
print('val set',evaluate_metrics(week2_true, week2_prob))
print('val set',evaluate_metrics(week3_true, week3_prob))
print('val set',evaluate_metrics(week4_true, week4_prob))

val set {'AUC': '0.725', 'Accuracy': '0.690', 'Precision': '0.584', 'Recall': '0.651'}
val set {'AUC': '0.686', 'Accuracy': '0.622', 'Precision': '0.635', 'Recall': '0.722'}
val set {'AUC': '0.761', 'Accuracy': '0.723', 'Precision': '0.704', 'Recall': '0.806'}
val set {'AUC': '0.811', 'Accuracy': '0.739', 'Precision': '0.804', 'Recall': '0.741'}
