In [1]:
import pandas as pd
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem
import chemprop

In [2]:
import sys
sys.path.append('../../../../code')

from metrics import get_hi_metrics

def chemprop_prepare_df(original_data):
    result = pd.DataFrame({
        'smiles': original_data['smiles'],
        'targets': original_data['value'].astype(float)
    })
    return result

def chemprop_process_folder(input_path, output_path):
    files = ['train_1.csv', 'train_2.csv', 'train_3.csv', 'test_1.csv', 'test_2.csv', 'test_3.csv']
    for file in files:
        input_data = pd.read_csv(input_path + file)
        output_data = chemprop_prepare_df(input_data)
        output_data.to_csv(output_path + file, index=False)

Skipped loading some Tensorflow models, missing a dependency. No module named 'tensorflow'
Skipped loading modules with pytorch-geometric dependency, missing a dependency. No module named 'torch_geometric'
Skipped loading modules with pytorch-geometric dependency, missing a dependency. cannot import name 'DMPNN' from 'deepchem.models.torch_models' (/home/simon/miniconda3/envs/chemprop/lib/python3.8/site-packages/deepchem/models/torch_models/__init__.py)
Skipped loading modules with pytorch-lightning dependency, missing a dependency. No module named 'pytorch_lightning'
Skipped loading some Jax models, missing a dependency. No module named 'jax'


In [3]:
train = pd.read_csv('../../../../data/hi/sol/train_1.csv')
test = pd.read_csv('../../../../data/hi/sol/test_1.csv')

train

Unnamed: 0.1,Unnamed: 0,smiles,value
0,2089,O=C(CN1C(=O)CCC1=O)Nc1ccc(F)c(F)c1F,False
1,2099,COc1ccc(-c2nnc(NC(=O)c3ccc4ccccc4c3)o2)cc1OC,True
2,61,O=C(Nc1ccc2ccccc2n1)c1ccc(N2CCOC2=O)cc1,True
3,89,Cc1c[nH]c(=O)n1-c1ccc(C(=O)Nc2ccc3ccccc3n2)cc1,True
4,97,O=C(C1CCC(O)CC1)N1CCCN(c2ccc(F)cc2)CC1,False
...,...,...,...
1437,2143,N#CCCn1nc(C(F)(F)F)cc1O,False
1438,2147,Cc1ccccc1N1C(=O)c2ccc(C(=O)NC(C)(C)C)cc2C1=O,False
1439,2160,CC(C)(C)C(=O)Nc1sc2c(c1C#N)CCC2,True
1440,2165,C[C@@H](c1ccc(F)cc1)n1nnc2cnc3ccc(-c4ccc5ocnc5...,True


In [4]:
import os
import os
import shutil

temp_datapath = '/tmp/chemprop/'
if os.path.exists(temp_datapath) and os.path.isdir(temp_datapath):
    shutil.rmtree(temp_datapath)
os.mkdir(temp_datapath)

input_paths = [
    '/home/simon/papers/lohi/data/hi/sol/'
]

output_paths = [
    temp_datapath
]

for i in range(len(input_paths)):
    chemprop_process_folder(input_paths[i], output_paths[i])

In [5]:
base_arguments = [
    '--dataset_type', 'classification', 
    '--data_path', temp_datapath + 'train_1.csv',
    '--separate_val_path', temp_datapath + 'test_1.csv',
    '--separate_test_path', temp_datapath + 'test_1.csv',
    '--save_preds',
    '--metric', 'prc-auc',
    '--epochs', '100',
    '--gpu', '0',
    '--save_dir', temp_datapath + 'checkpoint',
    '--features_generator', 'rdkit_2d_normalized',
    '--no_features_scaling'
]

In [6]:
param_dict = {
    '--depth': ['3', '4', '5', '6'],
    '--dropout': ['0.0', '0.2', '0.3', '0.5', '0.7'],
    '--ffn_hidden_size': ['600', '1200', '2400', '3600'],
    '--ffn_num_layers': ['1', '2', '3'],
    '--hidden_size': ['600', '1200', '2400', '3600']
}

In [7]:
from copy import copy
from tqdm import tqdm
from sklearn.model_selection import ParameterSampler

hyperparam_list = list(ParameterSampler(param_dict, n_iter=20))

best_score = 0.0
best_hyperparams = None

for i, hyperparam in tqdm(enumerate(hyperparam_list)):
    print('HYPERPARAMETER', i)
    print(hyperparam)
    result_arguments = copy(base_arguments)
    for key, value in hyperparam.items():
        result_arguments.append(key)
        result_arguments.append(value)
    # Train
    args = chemprop.args.TrainArgs().parse_args(result_arguments)
    mean_score, std_score = chemprop.train.cross_validate(args=args, train_func=chemprop.train.run_training)

    test_preds = pd.read_csv(temp_datapath + 'checkpoint/test_preds.csv')
    metrics = get_hi_metrics(test, test_preds['targets'].to_list())

    score = metrics['prc_auc']
    if score > best_score:
        print('New best PRC AUC:', score)
        best_score = score
        best_hyperparams = hyperparam

0it [00:00, ?it/s]Command line
python /home/simon/miniconda3/envs/chemprop/lib/python3.8/site-packages/ipykernel_launcher.py --ip=127.0.0.1 --stdin=9003 --control=9001 --hb=9000 --Session.signature_scheme="hmac-sha256" --Session.key=b"0fbbc264-b232-4363-ab26-cbd03fc47663" --shell=9002 --transport="tcp" --iopub=9004 --f=/home/simon/.local/share/jupyter/runtime/kernel-v2-283513zdsaee6PI6kh.json
Args
{'activation': 'ReLU',
 'adding_bond_types': True,
 'adding_h': False,
 'aggregation': 'mean',
 'aggregation_norm': 100,
 'atom_constraints': [],
 'atom_descriptor_scaling': True,
 'atom_descriptors': None,
 'atom_descriptors_path': None,
 'atom_descriptors_size': 0,
 'atom_features_size': 0,
 'atom_messages': False,
 'atom_targets': [],
 'batch_size': 50,
 'bias': False,
 'bias_solvent': False,
 'bond_constraints': [],
 'bond_descriptor_scaling': True,
 'bond_descriptors': None,
 'bond_descriptors_path': None,
 'bond_descriptors_size': 0,
 'bond_features_size': 0,
 'bond_targets': [],
 'cach

HYPERPARAMETER 0
{'--hidden_size': '600', '--ffn_num_layers': '2', '--ffn_hidden_size': '3600', '--dropout': '0.2', '--depth': '3'}


1442it [00:00, 322621.56it/s]
100%|██████████| 1442/1442 [00:33<00:00, 43.44it/s]
100%|██████████| 1442/1442 [00:00<00:00, 264366.92it/s]
Number of tasks = 1
Fold 0
Splitting data with seed 0
721it [00:00, 439165.43it/s]
100%|██████████| 721/721 [00:16<00:00, 44.90it/s]
100%|██████████| 721/721 [00:00<00:00, 257918.40it/s]
721it [00:00, 471042.55it/s]
100%|██████████| 721/721 [00:15<00:00, 45.54it/s]
100%|██████████| 721/721 [00:00<00:00, 268164.69it/s]
Class sizes
targets 0: 78.64%, 1: 21.36%
Total size = 1,442 | train size = 1,442 | val size = 721 | test size = 721
Building model 0
MoleculeModel(
  (sigmoid): Sigmoid()
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout): Dropout(p=0.2, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=600, bias=False)
        (W_h): Linear(in_features=600, out_features=600, bias=False)
        (W_o): Linear(in_features=733, out_features=600, bias=True)
      )
    )
  )
  (

New best PRC AUC: 0.6605853929566027
HYPERPARAMETER 1
{'--hidden_size': '1200', '--ffn_num_layers': '1', '--ffn_hidden_size': '1200', '--dropout': '0.2', '--depth': '4'}


1442it [00:00, 429436.69it/s]
100%|██████████| 1442/1442 [00:29<00:00, 48.87it/s]
100%|██████████| 1442/1442 [00:00<00:00, 252639.36it/s]
Number of tasks = 1
Fold 0
Splitting data with seed 0
721it [00:00, 423958.11it/s]
100%|██████████| 721/721 [00:14<00:00, 49.57it/s]
100%|██████████| 721/721 [00:00<00:00, 277109.24it/s]
721it [00:00, 431766.59it/s]
100%|██████████| 721/721 [00:14<00:00, 49.95it/s]
100%|██████████| 721/721 [00:00<00:00, 277975.29it/s]
Class sizes
targets 0: 78.64%, 1: 21.36%
Total size = 1,442 | train size = 1,442 | val size = 721 | test size = 721
Building model 0
MoleculeModel(
  (sigmoid): Sigmoid()
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout): Dropout(p=0.2, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=1200, bias=False)
        (W_h): Linear(in_features=1200, out_features=1200, bias=False)
        (W_o): Linear(in_features=1333, out_features=1200, bias=True)
      )
    )
  

New best PRC AUC: 0.6840081722945363
HYPERPARAMETER 2
{'--hidden_size': '3600', '--ffn_num_layers': '2', '--ffn_hidden_size': '2400', '--dropout': '0.3', '--depth': '3'}


1442it [00:00, 431982.46it/s]
100%|██████████| 1442/1442 [00:30<00:00, 47.98it/s]
100%|██████████| 1442/1442 [00:00<00:00, 268891.94it/s]
Number of tasks = 1
Fold 0
Splitting data with seed 0
721it [00:00, 519692.93it/s]
100%|██████████| 721/721 [00:14<00:00, 49.52it/s]
100%|██████████| 721/721 [00:00<00:00, 274917.56it/s]
721it [00:00, 518267.90it/s]
100%|██████████| 721/721 [00:14<00:00, 48.70it/s]
100%|██████████| 721/721 [00:00<00:00, 280788.60it/s]
Class sizes
targets 0: 78.64%, 1: 21.36%
Total size = 1,442 | train size = 1,442 | val size = 721 | test size = 721
Building model 0
MoleculeModel(
  (sigmoid): Sigmoid()
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout): Dropout(p=0.3, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=3600, bias=False)
        (W_h): Linear(in_features=3600, out_features=3600, bias=False)
        (W_o): Linear(in_features=3733, out_features=3600, bias=True)
      )
    )
  

HYPERPARAMETER 3
{'--hidden_size': '600', '--ffn_num_layers': '3', '--ffn_hidden_size': '600', '--dropout': '0.3', '--depth': '5'}


Setting molecule featurization parameters to default.
Loading data
1442it [00:00, 174314.39it/s]
100%|██████████| 1442/1442 [01:03<00:00, 22.62it/s]
100%|██████████| 1442/1442 [00:00<00:00, 160442.11it/s]
Number of tasks = 1
Fold 0
Splitting data with seed 0
721it [00:00, 278077.53it/s]
100%|██████████| 721/721 [00:21<00:00, 33.18it/s]
100%|██████████| 721/721 [00:00<00:00, 252977.51it/s]
721it [00:00, 368252.94it/s]
100%|██████████| 721/721 [00:14<00:00, 48.09it/s]
100%|██████████| 721/721 [00:00<00:00, 283553.04it/s]
Class sizes
targets 0: 78.64%, 1: 21.36%
Total size = 1,442 | train size = 1,442 | val size = 721 | test size = 721
Building model 0
MoleculeModel(
  (sigmoid): Sigmoid()
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout): Dropout(p=0.3, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=600, bias=False)
        (W_h): Linear(in_features=600, out_features=600, bias=False)
        (W_o): Linear(

HYPERPARAMETER 4
{'--hidden_size': '2400', '--ffn_num_layers': '2', '--ffn_hidden_size': '600', '--dropout': '0.3', '--depth': '6'}


1442it [00:00, 442507.05it/s]
100%|██████████| 1442/1442 [00:32<00:00, 44.57it/s]
100%|██████████| 1442/1442 [00:00<00:00, 267169.64it/s]
Number of tasks = 1
Fold 0
Splitting data with seed 0
721it [00:00, 512905.90it/s]
100%|██████████| 721/721 [00:15<00:00, 46.53it/s]
100%|██████████| 721/721 [00:00<00:00, 254982.56it/s]
721it [00:00, 477362.78it/s]
100%|██████████| 721/721 [00:15<00:00, 47.75it/s]
100%|██████████| 721/721 [00:00<00:00, 280346.08it/s]
Class sizes
targets 0: 78.64%, 1: 21.36%
Total size = 1,442 | train size = 1,442 | val size = 721 | test size = 721
Building model 0
MoleculeModel(
  (sigmoid): Sigmoid()
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout): Dropout(p=0.3, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=2400, bias=False)
        (W_h): Linear(in_features=2400, out_features=2400, bias=False)
        (W_o): Linear(in_features=2533, out_features=2400, bias=True)
      )
    )
  

HYPERPARAMETER 5
{'--hidden_size': '3600', '--ffn_num_layers': '1', '--ffn_hidden_size': '1200', '--dropout': '0.7', '--depth': '4'}


Setting molecule featurization parameters to default.
Loading data
1442it [00:00, 209200.18it/s]
100%|██████████| 1442/1442 [01:01<00:00, 23.42it/s]
100%|██████████| 1442/1442 [00:00<00:00, 158516.22it/s]
Number of tasks = 1
Fold 0
Splitting data with seed 0
721it [00:00, 270879.00it/s]
100%|██████████| 721/721 [00:24<00:00, 29.85it/s]
100%|██████████| 721/721 [00:00<00:00, 252154.86it/s]
721it [00:00, 517292.71it/s]
100%|██████████| 721/721 [00:15<00:00, 46.51it/s]
100%|██████████| 721/721 [00:00<00:00, 277567.07it/s]
Class sizes
targets 0: 78.64%, 1: 21.36%
Total size = 1,442 | train size = 1,442 | val size = 721 | test size = 721
Building model 0
MoleculeModel(
  (sigmoid): Sigmoid()
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout): Dropout(p=0.7, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=3600, bias=False)
        (W_h): Linear(in_features=3600, out_features=3600, bias=False)
        (W_o): Line

HYPERPARAMETER 6
{'--hidden_size': '2400', '--ffn_num_layers': '3', '--ffn_hidden_size': '1200', '--dropout': '0.7', '--depth': '3'}


1442it [00:00, 356320.63it/s]
100%|██████████| 1442/1442 [00:34<00:00, 42.18it/s]
100%|██████████| 1442/1442 [00:00<00:00, 188194.24it/s]
Number of tasks = 1
Fold 0
Splitting data with seed 0
721it [00:00, 259980.50it/s]
100%|██████████| 721/721 [00:27<00:00, 26.66it/s]
100%|██████████| 721/721 [00:00<00:00, 61455.32it/s]
721it [00:00, 69564.16it/s]
100%|██████████| 721/721 [00:18<00:00, 38.84it/s]
100%|██████████| 721/721 [00:00<00:00, 263675.40it/s]
Class sizes
targets 0: 78.64%, 1: 21.36%
Total size = 1,442 | train size = 1,442 | val size = 721 | test size = 721
Building model 0
MoleculeModel(
  (sigmoid): Sigmoid()
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout): Dropout(p=0.7, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=2400, bias=False)
        (W_h): Linear(in_features=2400, out_features=2400, bias=False)
        (W_o): Linear(in_features=2533, out_features=2400, bias=True)
      )
    )
  )


HYPERPARAMETER 7
{'--hidden_size': '3600', '--ffn_num_layers': '3', '--ffn_hidden_size': '1200', '--dropout': '0.7', '--depth': '6'}


{'activation': 'ReLU',
 'adding_bond_types': True,
 'adding_h': False,
 'aggregation': 'mean',
 'aggregation_norm': 100,
 'atom_constraints': [],
 'atom_descriptor_scaling': True,
 'atom_descriptors': None,
 'atom_descriptors_path': None,
 'atom_descriptors_size': 0,
 'atom_features_size': 0,
 'atom_messages': False,
 'atom_targets': [],
 'batch_size': 50,
 'bias': False,
 'bias_solvent': False,
 'bond_constraints': [],
 'bond_descriptor_scaling': True,
 'bond_descriptors': None,
 'bond_descriptors_path': None,
 'bond_descriptors_size': 0,
 'bond_features_size': 0,
 'bond_targets': [],
 'cache_cutoff': 10000,
 'checkpoint_dir': None,
 'checkpoint_frzn': None,
 'checkpoint_path': None,
 'checkpoint_paths': None,
 'class_balance': False,
 'config_path': None,
 'constraints_path': None,
 'crossval_index_dir': None,
 'crossval_index_file': None,
 'crossval_index_sets': None,
 'cuda': True,
 'data_path': '/tmp/chemprop/train_1.csv',
 'data_weights_path': None,
 'dataset_type': 'classificati

HYPERPARAMETER 8
{'--hidden_size': '2400', '--ffn_num_layers': '3', '--ffn_hidden_size': '2400', '--dropout': '0.7', '--depth': '4'}


1442it [00:00, 413268.63it/s]
100%|██████████| 1442/1442 [00:45<00:00, 32.01it/s]
100%|██████████| 1442/1442 [00:00<00:00, 207991.55it/s]
Number of tasks = 1
Fold 0
Splitting data with seed 0
721it [00:00, 357626.91it/s]
100%|██████████| 721/721 [00:18<00:00, 38.83it/s]
100%|██████████| 721/721 [00:00<00:00, 256018.73it/s]
721it [00:00, 470602.74it/s]
100%|██████████| 721/721 [00:16<00:00, 44.96it/s]
100%|██████████| 721/721 [00:00<00:00, 275343.09it/s]
Class sizes
targets 0: 78.64%, 1: 21.36%
Total size = 1,442 | train size = 1,442 | val size = 721 | test size = 721
Building model 0
MoleculeModel(
  (sigmoid): Sigmoid()
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout): Dropout(p=0.7, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=2400, bias=False)
        (W_h): Linear(in_features=2400, out_features=2400, bias=False)
        (W_o): Linear(in_features=2533, out_features=2400, bias=True)
      )
    )
  

HYPERPARAMETER 9
{'--hidden_size': '3600', '--ffn_num_layers': '3', '--ffn_hidden_size': '1200', '--dropout': '0.2', '--depth': '5'}


Setting molecule featurization parameters to default.
Loading data
1442it [00:00, 210532.80it/s]
100%|██████████| 1442/1442 [01:02<00:00, 23.14it/s]
100%|██████████| 1442/1442 [00:00<00:00, 159250.81it/s]
Number of tasks = 1
Fold 0
Splitting data with seed 0
721it [00:00, 276248.58it/s]
100%|██████████| 721/721 [00:15<00:00, 46.37it/s]
100%|██████████| 721/721 [00:00<00:00, 270418.78it/s]
721it [00:00, 508678.42it/s]
100%|██████████| 721/721 [00:15<00:00, 46.08it/s]
100%|██████████| 721/721 [00:00<00:00, 264020.71it/s]
Class sizes
targets 0: 78.64%, 1: 21.36%
Total size = 1,442 | train size = 1,442 | val size = 721 | test size = 721
Building model 0
MoleculeModel(
  (sigmoid): Sigmoid()
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout): Dropout(p=0.2, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=3600, bias=False)
        (W_h): Linear(in_features=3600, out_features=3600, bias=False)
        (W_o): Line

HYPERPARAMETER 10
{'--hidden_size': '3600', '--ffn_num_layers': '3', '--ffn_hidden_size': '2400', '--dropout': '0.2', '--depth': '6'}


Setting molecule featurization parameters to default.
Loading data
1442it [00:00, 224856.36it/s]
100%|██████████| 1442/1442 [01:02<00:00, 23.05it/s]
100%|██████████| 1442/1442 [00:00<00:00, 161014.47it/s]
Number of tasks = 1
Fold 0
Splitting data with seed 0
721it [00:00, 269527.02it/s]
100%|██████████| 721/721 [00:21<00:00, 34.02it/s]
100%|██████████| 721/721 [00:00<00:00, 268069.60it/s]
721it [00:00, 533632.11it/s]
100%|██████████| 721/721 [00:14<00:00, 48.49it/s]
100%|██████████| 721/721 [00:00<00:00, 277058.47it/s]
Class sizes
targets 0: 78.64%, 1: 21.36%
Total size = 1,442 | train size = 1,442 | val size = 721 | test size = 721
Building model 0
MoleculeModel(
  (sigmoid): Sigmoid()
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout): Dropout(p=0.2, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=3600, bias=False)
        (W_h): Linear(in_features=3600, out_features=3600, bias=False)
        (W_o): Line

HYPERPARAMETER 11
{'--hidden_size': '600', '--ffn_num_layers': '1', '--ffn_hidden_size': '600', '--dropout': '0.3', '--depth': '5'}


Command line
python /home/simon/miniconda3/envs/chemprop/lib/python3.8/site-packages/ipykernel_launcher.py --ip=127.0.0.1 --stdin=9003 --control=9001 --hb=9000 --Session.signature_scheme="hmac-sha256" --Session.key=b"0fbbc264-b232-4363-ab26-cbd03fc47663" --shell=9002 --transport="tcp" --iopub=9004 --f=/home/simon/.local/share/jupyter/runtime/kernel-v2-283513zdsaee6PI6kh.json
Args
{'activation': 'ReLU',
 'adding_bond_types': True,
 'adding_h': False,
 'aggregation': 'mean',
 'aggregation_norm': 100,
 'atom_constraints': [],
 'atom_descriptor_scaling': True,
 'atom_descriptors': None,
 'atom_descriptors_path': None,
 'atom_descriptors_size': 0,
 'atom_features_size': 0,
 'atom_messages': False,
 'atom_targets': [],
 'batch_size': 50,
 'bias': False,
 'bias_solvent': False,
 'bond_constraints': [],
 'bond_descriptor_scaling': True,
 'bond_descriptors': None,
 'bond_descriptors_path': None,
 'bond_descriptors_size': 0,
 'bond_features_size': 0,
 'bond_targets': [],
 'cache_cutoff': 10000,


New best PRC AUC: 0.6926138745668395
HYPERPARAMETER 12
{'--hidden_size': '2400', '--ffn_num_layers': '1', '--ffn_hidden_size': '1200', '--dropout': '0.7', '--depth': '3'}


1442it [00:00, 153803.95it/s]
100%|██████████| 1442/1442 [00:31<00:00, 45.55it/s]
100%|██████████| 1442/1442 [00:00<00:00, 273809.88it/s]
Number of tasks = 1
Fold 0
Splitting data with seed 0
721it [00:00, 469506.78it/s]
100%|██████████| 721/721 [00:15<00:00, 47.72it/s]
100%|██████████| 721/721 [00:00<00:00, 263721.39it/s]
721it [00:00, 489811.01it/s]
100%|██████████| 721/721 [00:15<00:00, 47.79it/s]
100%|██████████| 721/721 [00:00<00:00, 266698.40it/s]
Class sizes
targets 0: 78.64%, 1: 21.36%
Total size = 1,442 | train size = 1,442 | val size = 721 | test size = 721
Building model 0
MoleculeModel(
  (sigmoid): Sigmoid()
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout): Dropout(p=0.7, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=2400, bias=False)
        (W_h): Linear(in_features=2400, out_features=2400, bias=False)
        (W_o): Linear(in_features=2533, out_features=2400, bias=True)
      )
    )
  

HYPERPARAMETER 13
{'--hidden_size': '2400', '--ffn_num_layers': '3', '--ffn_hidden_size': '600', '--dropout': '0.0', '--depth': '6'}


Setting molecule featurization parameters to default.
Loading data
1442it [00:00, 85016.89it/s]
100%|██████████| 1442/1442 [00:44<00:00, 32.71it/s]
100%|██████████| 1442/1442 [00:00<00:00, 265411.02it/s]
Number of tasks = 1
Fold 0
Splitting data with seed 0
721it [00:00, 505278.73it/s]
100%|██████████| 721/721 [00:15<00:00, 47.32it/s]
100%|██████████| 721/721 [00:00<00:00, 256888.65it/s]
721it [00:00, 494698.71it/s]
100%|██████████| 721/721 [00:15<00:00, 47.65it/s]
100%|██████████| 721/721 [00:00<00:00, 281546.71it/s]
Class sizes
targets 0: 78.64%, 1: 21.36%
Total size = 1,442 | train size = 1,442 | val size = 721 | test size = 721
Building model 0
MoleculeModel(
  (sigmoid): Sigmoid()
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout): Dropout(p=0.0, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=2400, bias=False)
        (W_h): Linear(in_features=2400, out_features=2400, bias=False)
        (W_o): Linea

HYPERPARAMETER 14
{'--hidden_size': '600', '--ffn_num_layers': '1', '--ffn_hidden_size': '1200', '--dropout': '0.2', '--depth': '6'}


Setting molecule featurization parameters to default.
Loading data
1442it [00:00, 146512.59it/s]
100%|██████████| 1442/1442 [01:02<00:00, 23.24it/s]
100%|██████████| 1442/1442 [00:00<00:00, 20110.08it/s]
Number of tasks = 1
Fold 0
Splitting data with seed 0
721it [00:00, 30032.21it/s]
100%|██████████| 721/721 [00:19<00:00, 37.64it/s]
100%|██████████| 721/721 [00:00<00:00, 256192.24it/s]
721it [00:00, 499602.38it/s]
100%|██████████| 721/721 [00:15<00:00, 45.57it/s]
100%|██████████| 721/721 [00:00<00:00, 257435.36it/s]
Class sizes
targets 0: 78.64%, 1: 21.36%
Total size = 1,442 | train size = 1,442 | val size = 721 | test size = 721
Building model 0
MoleculeModel(
  (sigmoid): Sigmoid()
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout): Dropout(p=0.2, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=600, bias=False)
        (W_h): Linear(in_features=600, out_features=600, bias=False)
        (W_o): Linear(in

HYPERPARAMETER 15
{'--hidden_size': '600', '--ffn_num_layers': '2', '--ffn_hidden_size': '2400', '--dropout': '0.7', '--depth': '6'}


Setting molecule featurization parameters to default.
Loading data
1442it [00:00, 217952.66it/s]
100%|██████████| 1442/1442 [00:43<00:00, 32.90it/s]
100%|██████████| 1442/1442 [00:00<00:00, 263079.01it/s]
Number of tasks = 1
Fold 0
Splitting data with seed 0
721it [00:00, 511604.33it/s]
100%|██████████| 721/721 [00:15<00:00, 47.26it/s]
100%|██████████| 721/721 [00:00<00:00, 277873.12it/s]
721it [00:00, 519692.93it/s]
100%|██████████| 721/721 [00:14<00:00, 48.40it/s]
100%|██████████| 721/721 [00:00<00:00, 267784.75it/s]
Class sizes
targets 0: 78.64%, 1: 21.36%
Total size = 1,442 | train size = 1,442 | val size = 721 | test size = 721
Building model 0
MoleculeModel(
  (sigmoid): Sigmoid()
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout): Dropout(p=0.7, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=600, bias=False)
        (W_h): Linear(in_features=600, out_features=600, bias=False)
        (W_o): Linear(

HYPERPARAMETER 16
{'--hidden_size': '3600', '--ffn_num_layers': '3', '--ffn_hidden_size': '2400', '--dropout': '0.5', '--depth': '4'}


1442it [00:00, 369603.18it/s]
100%|██████████| 1442/1442 [00:33<00:00, 43.19it/s]
100%|██████████| 1442/1442 [00:00<00:00, 247643.06it/s]
Number of tasks = 1
Fold 0
Splitting data with seed 0
721it [00:00, 379244.19it/s]
100%|██████████| 721/721 [00:16<00:00, 43.93it/s]
100%|██████████| 721/721 [00:00<00:00, 149426.48it/s]
721it [00:00, 363254.44it/s]
100%|██████████| 721/721 [00:16<00:00, 44.73it/s]
100%|██████████| 721/721 [00:00<00:00, 254832.16it/s]
Class sizes
targets 0: 78.64%, 1: 21.36%
Total size = 1,442 | train size = 1,442 | val size = 721 | test size = 721
Building model 0
MoleculeModel(
  (sigmoid): Sigmoid()
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout): Dropout(p=0.5, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=3600, bias=False)
        (W_h): Linear(in_features=3600, out_features=3600, bias=False)
        (W_o): Linear(in_features=3733, out_features=3600, bias=True)
      )
    )
  

HYPERPARAMETER 17
{'--hidden_size': '600', '--ffn_num_layers': '3', '--ffn_hidden_size': '1200', '--dropout': '0.5', '--depth': '5'}


Setting molecule featurization parameters to default.
Loading data
1442it [00:00, 304955.70it/s]
100%|██████████| 1442/1442 [00:42<00:00, 33.85it/s]
100%|██████████| 1442/1442 [00:00<00:00, 217984.08it/s]
Number of tasks = 1
Fold 0
Splitting data with seed 0
721it [00:00, 369332.34it/s]
100%|██████████| 721/721 [00:19<00:00, 36.57it/s]
100%|██████████| 721/721 [00:00<00:00, 230829.19it/s]
721it [00:00, 225157.71it/s]
100%|██████████| 721/721 [00:19<00:00, 37.88it/s]
100%|██████████| 721/721 [00:00<00:00, 223559.78it/s]
Class sizes
targets 0: 78.64%, 1: 21.36%
Total size = 1,442 | train size = 1,442 | val size = 721 | test size = 721
Building model 0
MoleculeModel(
  (sigmoid): Sigmoid()
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout): Dropout(p=0.5, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=600, bias=False)
        (W_h): Linear(in_features=600, out_features=600, bias=False)
        (W_o): Linear(

HYPERPARAMETER 18
{'--hidden_size': '600', '--ffn_num_layers': '3', '--ffn_hidden_size': '1200', '--dropout': '0.2', '--depth': '3'}


1442it [00:00, 322157.58it/s]
100%|██████████| 1442/1442 [00:40<00:00, 35.84it/s]
100%|██████████| 1442/1442 [00:00<00:00, 217795.69it/s]
Number of tasks = 1
Fold 0
Splitting data with seed 0
721it [00:00, 395771.91it/s]
100%|██████████| 721/721 [00:19<00:00, 36.71it/s]
100%|██████████| 721/721 [00:00<00:00, 226591.73it/s]
721it [00:00, 422418.38it/s]
100%|██████████| 721/721 [00:19<00:00, 37.07it/s]
100%|██████████| 721/721 [00:00<00:00, 181040.06it/s]
Class sizes
targets 0: 78.64%, 1: 21.36%
Total size = 1,442 | train size = 1,442 | val size = 721 | test size = 721
Building model 0
MoleculeModel(
  (sigmoid): Sigmoid()
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout): Dropout(p=0.2, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=600, bias=False)
        (W_h): Linear(in_features=600, out_features=600, bias=False)
        (W_o): Linear(in_features=733, out_features=600, bias=True)
      )
    )
  )
  (

HYPERPARAMETER 19
{'--hidden_size': '2400', '--ffn_num_layers': '3', '--ffn_hidden_size': '2400', '--dropout': '0.2', '--depth': '4'}


Setting molecule featurization parameters to default.
Loading data
1442it [00:00, 317773.68it/s]
100%|██████████| 1442/1442 [00:39<00:00, 36.94it/s]
100%|██████████| 1442/1442 [00:00<00:00, 225939.94it/s]
Number of tasks = 1
Fold 0
Splitting data with seed 0
721it [00:00, 294803.39it/s]
100%|██████████| 721/721 [00:19<00:00, 36.84it/s]
100%|██████████| 721/721 [00:00<00:00, 221010.98it/s]
721it [00:00, 409325.01it/s]
100%|██████████| 721/721 [00:19<00:00, 36.86it/s]
100%|██████████| 721/721 [00:00<00:00, 234589.50it/s]
Class sizes
targets 0: 78.64%, 1: 21.36%
Total size = 1,442 | train size = 1,442 | val size = 721 | test size = 721
Building model 0
MoleculeModel(
  (sigmoid): Sigmoid()
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout): Dropout(p=0.2, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=2400, bias=False)
        (W_h): Linear(in_features=2400, out_features=2400, bias=False)
        (W_o): Line

In [9]:
print(best_hyperparams)

{'--hidden_size': '600', '--ffn_num_layers': '1', '--ffn_hidden_size': '600', '--dropout': '0.3', '--depth': '5'}


In [10]:
print(best_score)

0.6926138745668395


In [11]:
from copy import copy

result_arguments = copy(base_arguments)
result_arguments.extend([
    '--hidden_size', '600',
    '--ffn_num_layers', '1',
    '--ffn_hidden_size', '600',
    '--dropout', '0.3',
    '--depth', '5'
])

args = chemprop.args.TrainArgs().parse_args(result_arguments)
mean_score, std_score = chemprop.train.cross_validate(args=args, train_func=chemprop.train.run_training)


Command line
python /home/simon/miniconda3/envs/chemprop/lib/python3.8/site-packages/ipykernel_launcher.py --ip=127.0.0.1 --stdin=9003 --control=9001 --hb=9000 --Session.signature_scheme="hmac-sha256" --Session.key=b"0fbbc264-b232-4363-ab26-cbd03fc47663" --shell=9002 --transport="tcp" --iopub=9004 --f=/home/simon/.local/share/jupyter/runtime/kernel-v2-283513zdsaee6PI6kh.json
Args
{'activation': 'ReLU',
 'adding_bond_types': True,
 'adding_h': False,
 'aggregation': 'mean',
 'aggregation_norm': 100,
 'atom_constraints': [],
 'atom_descriptor_scaling': True,
 'atom_descriptors': None,
 'atom_descriptors_path': None,
 'atom_descriptors_size': 0,
 'atom_features_size': 0,
 'atom_messages': False,
 'atom_targets': [],
 'batch_size': 50,
 'bias': False,
 'bias_solvent': False,
 'bond_constraints': [],
 'bond_descriptor_scaling': True,
 'bond_descriptors': None,
 'bond_descriptors_path': None,
 'bond_descriptors_size': 0,
 'bond_features_size': 0,
 'bond_targets': [],
 'cache_cutoff': 10000,


In [9]:
max_epoch = 27