In [1]:
import os
from visar.model_training_utils import (
    ST_model_hyperparam_screen, 
    ST_model_training,
    RobustMT_model_training,
    RobustMT_model_hyperparam_screen
)
from visar.VISAR_model_utils import (
    generate_RUNKEY_dataframe_baseline,
    generate_RUNKEY_dataframe_RobustMT,
    generate_RUNKEY_dataframe_ST,
    generate_performance_plot_ST,
    generate_performance_plot_RobustMT
)

import pandas as pd
import seaborn as sns
from collections import OrderedDict
os.environ['CUDA_VISIBLE_DEVICES']='1'

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
# initialize parameters
protein_targets = ['5HT-1b', '5HT-2b', '5HT-2c']
task_names = ['T106', 'T227', 'T108']
MT_dat_name = './data/MT_data_clean_June28.csv'
FP_type = 'Circular_2048'
log_path = './logs/Demo_GPCRs'
add_features = None
smiles_field = 'salt_removed_smi'
id_field = 'molregno'
dataset_file = './logs/Demo_GPCRs/tmp.csv'
n_features = 2048

## Model training and evaluation

### baseline model and single-task models

In [5]:
# set parameters
params_dict = OrderedDict(
    n_tasks = [1],
    
    n_features = [2048], ## need modification given FP types
    activation = ['relu'],
    momentum = [.9],
    batch_size = [128],
    init = ['glorot_uniform'],
    learning_rate = [0.01],
    decay = [1e-6],
    nb_epoch = [30],
    dropouts = [.2, .4],
    nb_layers = [1],
    batchnorm = [False],
    layer_sizes = [(1024, 512),(1024,128) ,(512, 128),(512,64),(128,64),(64,32), 
                   (512,128,64), (128,64,32)],
    penalty = [0.1]
)

In [6]:
# hyperparam screening using deepchem
log_output = ST_model_hyperparam_screen(MT_dat_name, task_names, FP_type, params_dict, 
                                        log_path = log_path)

----------------------------------------------


  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (3202, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/temp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
Featurizing sample 2000
Featurizing sample 3000
TIMING: featurizing shard 0 took 11.672 s
TIMING: dataset construction took 11.821 s
Loading dataset from disk.
Preparing dataset for T51 of rep 1...
Computing train/valid/test indices
TIMING: dataset construction took 0.268 s
Loading dataset from disk.
TIMING: dataset construction took 0.146 s
Loading dataset from disk.
TIMING: dataset construction took 0.148 s
Loading dataset from disk.
Hyperprameter screening ...
Fitting model 1/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (1024, 512), 'penalty': 0.1}
computed_metri

TIMING: dataset construction took 0.141 s
Loading dataset from disk.
Hyperprameter screening ...
Fitting model 1/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (1024, 512), 'penalty': 0.1}
computed_metrics: [0.5366203871363865]
Model 1/16, Metric r2_score, Validation set 0: 0.536620
	best_validation_score so far: 0.536620
Fitting model 2/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (1024, 128), 'penalty': 0.1}
computed_metrics: [0.635799801229309]
Model 2/16, Metric r2_score, Validation set 1: 0.635800
	best_validation_score so far: 0.635800
Fitting model 3/16
h

computed_metrics: [0.6279527824450148]
Model 2/16, Metric r2_score, Validation set 1: 0.627953
	best_validation_score so far: 0.627953
Fitting model 3/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (512, 128), 'penalty': 0.1}
computed_metrics: [0.5615058492113072]
Model 3/16, Metric r2_score, Validation set 2: 0.561506
	best_validation_score so far: 0.627953
Fitting model 4/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (512, 64), 'penalty': 0.1}
computed_metrics: [0.5180466042070273]
Model 4/16, Metric r2_score, Validation set 3: 0.518047
	best_validation_score s

  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (808, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/temp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 3.016 s
TIMING: dataset construction took 3.081 s
Loading dataset from disk.
Preparing dataset for T106 of rep 1...
Computing train/valid/test indices
TIMING: dataset construction took 0.087 s
Loading dataset from disk.
TIMING: dataset construction took 0.028 s
Loading dataset from disk.
TIMING: dataset construction took 0.025 s
Loading dataset from disk.
Hyperprameter screening ...
Fitting model 1/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (1024, 512), 'penalty': 0.1}
computed_metrics: [0.20210516011655777]
Model 1/16, Metric r2_score, Validation set 0: 0

computed_metrics: [0.18087361224022636]
Model 1/16, Metric r2_score, Validation set 0: 0.180874
	best_validation_score so far: 0.180874
Fitting model 2/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (1024, 128), 'penalty': 0.1}
computed_metrics: [0.7152301466118673]
Model 2/16, Metric r2_score, Validation set 1: 0.715230
	best_validation_score so far: 0.715230
Fitting model 3/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (512, 128), 'penalty': 0.1}
computed_metrics: [0.724276165488216]
Model 3/16, Metric r2_score, Validation set 2: 0.724276
	best_validation_score

computed_metrics: [0.5701382177378543]
Model 3/16, Metric r2_score, Validation set 2: 0.570138
	best_validation_score so far: 0.647248
Fitting model 4/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (512, 64), 'penalty': 0.1}
computed_metrics: [0.675092239780181]
Model 4/16, Metric r2_score, Validation set 3: 0.675092
	best_validation_score so far: 0.675092
Fitting model 5/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (128, 64), 'penalty': 0.1}
computed_metrics: [0.4656264200546356]
Model 5/16, Metric r2_score, Validation set 4: 0.465626
	best_validation_score so 

  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (883, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/temp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 3.301 s
TIMING: dataset construction took 3.362 s
Loading dataset from disk.
Preparing dataset for T105 of rep 1...
Computing train/valid/test indices
TIMING: dataset construction took 0.075 s
Loading dataset from disk.
TIMING: dataset construction took 0.029 s
Loading dataset from disk.
TIMING: dataset construction took 0.027 s
Loading dataset from disk.
Hyperprameter screening ...
Fitting model 1/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (1024, 512), 'penalty': 0.1}
computed_metrics: [0.2979667033953348]
Model 1/16, Metric r2_score, Validation set 0: 0.

computed_metrics: [0.4944069903207954]
Model 1/16, Metric r2_score, Validation set 0: 0.494407
	best_validation_score so far: 0.494407
Fitting model 2/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (1024, 128), 'penalty': 0.1}
computed_metrics: [0.8083238752299299]
Model 2/16, Metric r2_score, Validation set 1: 0.808324
	best_validation_score so far: 0.808324
Fitting model 3/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (512, 128), 'penalty': 0.1}
computed_metrics: [0.8129010143943809]
Model 3/16, Metric r2_score, Validation set 2: 0.812901
	best_validation_score

computed_metrics: [0.6695600867397516]
Model 3/16, Metric r2_score, Validation set 2: 0.669560
	best_validation_score so far: 0.676835
Fitting model 4/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (512, 64), 'penalty': 0.1}
computed_metrics: [0.7202188951194824]
Model 4/16, Metric r2_score, Validation set 3: 0.720219
	best_validation_score so far: 0.720219
Fitting model 5/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (128, 64), 'penalty': 0.1}
computed_metrics: [0.7693965116279908]
Model 5/16, Metric r2_score, Validation set 4: 0.769397
	best_validation_score so

  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (109, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/temp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.508 s
TIMING: dataset construction took 0.527 s
Loading dataset from disk.
Preparing dataset for T10618 of rep 1...
Computing train/valid/test indices
TIMING: dataset construction took 0.019 s
Loading dataset from disk.
TIMING: dataset construction took 0.010 s
Loading dataset from disk.
TIMING: dataset construction took 0.010 s
Loading dataset from disk.
Hyperprameter screening ...
Fitting model 1/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (1024, 512), 'penalty': 0.1}
computed_metrics: [0.10412746805128958]
Model 1/16, Metric r2_score, Validation set 0:

computed_metrics: [-0.4177477720384051]
Model 1/16, Metric r2_score, Validation set 0: -0.417748
	best_validation_score so far: -0.417748
Fitting model 2/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (1024, 128), 'penalty': 0.1}
computed_metrics: [0.020009588646332843]
Model 2/16, Metric r2_score, Validation set 1: 0.020010
	best_validation_score so far: 0.020010
Fitting model 3/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (512, 128), 'penalty': 0.1}
computed_metrics: [0.28839138154189103]
Model 3/16, Metric r2_score, Validation set 2: 0.288391
	best_validation

computed_metrics: [0.8520117716601716]
Model 3/16, Metric r2_score, Validation set 2: 0.852012
	best_validation_score so far: 0.852012
Fitting model 4/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (512, 64), 'penalty': 0.1}
computed_metrics: [0.7926954968334659]
Model 4/16, Metric r2_score, Validation set 3: 0.792695
	best_validation_score so far: 0.852012
Fitting model 5/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (128, 64), 'penalty': 0.1}
computed_metrics: [0.896966446268291]
Model 5/16, Metric r2_score, Validation set 4: 0.896966
	best_validation_score so 

  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (2951, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/temp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
Featurizing sample 2000
TIMING: featurizing shard 0 took 10.764 s
TIMING: dataset construction took 10.911 s
Loading dataset from disk.
Preparing dataset for T107 of rep 1...
Computing train/valid/test indices
TIMING: dataset construction took 0.190 s
Loading dataset from disk.
TIMING: dataset construction took 0.086 s
Loading dataset from disk.
TIMING: dataset construction took 0.087 s
Loading dataset from disk.
Hyperprameter screening ...
Fitting model 1/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (1024, 512), 'penalty': 0.1}
computed_metrics: [0.5846689383942829

computed_metrics: [0.6079278827170447]
Model 1/16, Metric r2_score, Validation set 0: 0.607928
	best_validation_score so far: 0.607928
Fitting model 2/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (1024, 128), 'penalty': 0.1}
computed_metrics: [0.6474587507512617]
Model 2/16, Metric r2_score, Validation set 1: 0.647459
	best_validation_score so far: 0.647459
Fitting model 3/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (512, 128), 'penalty': 0.1}
computed_metrics: [0.684983770730619]
Model 3/16, Metric r2_score, Validation set 2: 0.684984
	best_validation_score 

computed_metrics: [0.6057152271011159]
Model 3/16, Metric r2_score, Validation set 2: 0.605715
	best_validation_score so far: 0.605715
Fitting model 4/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (512, 64), 'penalty': 0.1}
computed_metrics: [0.5776764663359106]
Model 4/16, Metric r2_score, Validation set 3: 0.577676
	best_validation_score so far: 0.605715
Fitting model 5/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (128, 64), 'penalty': 0.1}
computed_metrics: [0.5975348240295342]
Model 5/16, Metric r2_score, Validation set 4: 0.597535
	best_validation_score so

  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (1064, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/temp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
TIMING: featurizing shard 0 took 3.928 s
TIMING: dataset construction took 3.989 s
Loading dataset from disk.
Preparing dataset for T227 of rep 1...
Computing train/valid/test indices
TIMING: dataset construction took 0.072 s
Loading dataset from disk.
TIMING: dataset construction took 0.035 s
Loading dataset from disk.
TIMING: dataset construction took 0.033 s
Loading dataset from disk.
Hyperprameter screening ...
Fitting model 1/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (1024, 512), 'penalty': 0.1}
computed_metrics: [-0.059923781488611993]
Model 1/16, Metric r2

computed_metrics: [-0.07380744019748242]
Model 1/16, Metric r2_score, Validation set 0: -0.073807
	best_validation_score so far: -0.073807
Fitting model 2/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (1024, 128), 'penalty': 0.1}
computed_metrics: [0.5302292014931449]
Model 2/16, Metric r2_score, Validation set 1: 0.530229
	best_validation_score so far: 0.530229
Fitting model 3/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (512, 128), 'penalty': 0.1}
computed_metrics: [0.5331568622031471]
Model 3/16, Metric r2_score, Validation set 2: 0.533157
	best_validation_s

computed_metrics: [0.5023964598985657]
Model 3/16, Metric r2_score, Validation set 2: 0.502396
	best_validation_score so far: 0.502396
Fitting model 4/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (512, 64), 'penalty': 0.1}
computed_metrics: [0.453031005962405]
Model 4/16, Metric r2_score, Validation set 3: 0.453031
	best_validation_score so far: 0.502396
Fitting model 5/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (128, 64), 'penalty': 0.1}
computed_metrics: [0.43346660591608266]
Model 5/16, Metric r2_score, Validation set 4: 0.433467
	best_validation_score so

  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (2063, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/temp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
Featurizing sample 2000
TIMING: featurizing shard 0 took 7.567 s
TIMING: dataset construction took 7.670 s
Loading dataset from disk.
Preparing dataset for T108 of rep 1...
Computing train/valid/test indices
TIMING: dataset construction took 0.145 s
Loading dataset from disk.
TIMING: dataset construction took 0.064 s
Loading dataset from disk.
TIMING: dataset construction took 0.063 s
Loading dataset from disk.
Hyperprameter screening ...
Fitting model 1/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (1024, 512), 'penalty': 0.1}
computed_metrics: [0.49882042230989143]

Hyperprameter screening ...
Fitting model 1/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (1024, 512), 'penalty': 0.1}
computed_metrics: [0.15312708708117306]
Model 1/16, Metric r2_score, Validation set 0: 0.153127
	best_validation_score so far: 0.153127
Fitting model 2/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (1024, 128), 'penalty': 0.1}
computed_metrics: [0.6049332856819382]
Model 2/16, Metric r2_score, Validation set 1: 0.604933
	best_validation_score so far: 0.604933
Fitting model 3/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'r

computed_metrics: [0.556454422991473]
Model 2/16, Metric r2_score, Validation set 1: 0.556454
	best_validation_score so far: 0.556454
Fitting model 3/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (512, 128), 'penalty': 0.1}
computed_metrics: [0.569353142291254]
Model 3/16, Metric r2_score, Validation set 2: 0.569353
	best_validation_score so far: 0.569353
Fitting model 4/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (512, 64), 'penalty': 0.1}
computed_metrics: [0.6239245380243572]
Model 4/16, Metric r2_score, Validation set 3: 0.623925
	best_validation_score so 

  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (396, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/temp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 1.495 s
TIMING: dataset construction took 1.519 s
Loading dataset from disk.
Preparing dataset for T168 of rep 1...
Computing train/valid/test indices
TIMING: dataset construction took 0.029 s
Loading dataset from disk.
TIMING: dataset construction took 0.015 s
Loading dataset from disk.
TIMING: dataset construction took 0.015 s
Loading dataset from disk.
Hyperprameter screening ...
Fitting model 1/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (1024, 512), 'penalty': 0.1}
computed_metrics: [0.4383437293255522]
Model 1/16, Metric r2_score, Validation set 0: 0.

computed_metrics: [0.09939870318865673]
Model 1/16, Metric r2_score, Validation set 0: 0.099399
	best_validation_score so far: 0.099399
Fitting model 2/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (1024, 128), 'penalty': 0.1}
computed_metrics: [0.46548785534750137]
Model 2/16, Metric r2_score, Validation set 1: 0.465488
	best_validation_score so far: 0.465488
Fitting model 3/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (512, 128), 'penalty': 0.1}
computed_metrics: [0.4821598254727407]
Model 3/16, Metric r2_score, Validation set 2: 0.482160
	best_validation_sco

computed_metrics: [0.7567177539543714]
Model 3/16, Metric r2_score, Validation set 2: 0.756718
	best_validation_score so far: 0.756718
Fitting model 4/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (512, 64), 'penalty': 0.1}
computed_metrics: [0.6634802816517671]
Model 4/16, Metric r2_score, Validation set 3: 0.663480
	best_validation_score so far: 0.756718
Fitting model 5/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (128, 64), 'penalty': 0.1}
computed_metrics: [0.6823208664942335]
Model 5/16, Metric r2_score, Validation set 4: 0.682321
	best_validation_score so

  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (313, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/temp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 1.132 s
TIMING: dataset construction took 1.155 s
Loading dataset from disk.
Preparing dataset for T10624 of rep 1...
Computing train/valid/test indices
TIMING: dataset construction took 0.025 s
Loading dataset from disk.
TIMING: dataset construction took 0.013 s
Loading dataset from disk.
TIMING: dataset construction took 0.013 s
Loading dataset from disk.
Hyperprameter screening ...
Fitting model 1/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (1024, 512), 'penalty': 0.1}
computed_metrics: [-0.18521640869719902]
Model 1/16, Metric r2_score, Validation set 0

computed_metrics: [0.19340183216287188]
Model 1/16, Metric r2_score, Validation set 0: 0.193402
	best_validation_score so far: 0.193402
Fitting model 2/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (1024, 128), 'penalty': 0.1}
computed_metrics: [0.6309328781826044]
Model 2/16, Metric r2_score, Validation set 1: 0.630933
	best_validation_score so far: 0.630933
Fitting model 3/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (512, 128), 'penalty': 0.1}
computed_metrics: [0.6552158138313842]
Model 3/16, Metric r2_score, Validation set 2: 0.655216
	best_validation_scor

computed_metrics: [0.48342057046681564]
Model 3/16, Metric r2_score, Validation set 2: 0.483421
	best_validation_score so far: 0.511671
Fitting model 4/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (512, 64), 'penalty': 0.1}
computed_metrics: [0.6676912657703538]
Model 4/16, Metric r2_score, Validation set 3: 0.667691
	best_validation_score so far: 0.667691
Fitting model 5/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (128, 64), 'penalty': 0.1}
computed_metrics: [0.5781245607091803]
Model 5/16, Metric r2_score, Validation set 4: 0.578125
	best_validation_score s

  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (2523, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/temp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
Featurizing sample 2000
TIMING: featurizing shard 0 took 9.932 s
TIMING: dataset construction took 10.057 s
Loading dataset from disk.
Preparing dataset for T10627 of rep 1...
Computing train/valid/test indices
TIMING: dataset construction took 0.184 s
Loading dataset from disk.
TIMING: dataset construction took 0.079 s
Loading dataset from disk.
TIMING: dataset construction took 0.077 s
Loading dataset from disk.
Hyperprameter screening ...
Fitting model 1/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (1024, 512), 'penalty': 0.1}
computed_metrics: [0.577193818651988

computed_metrics: [0.5623360923030809]
Model 1/16, Metric r2_score, Validation set 0: 0.562336
	best_validation_score so far: 0.562336
Fitting model 2/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (1024, 128), 'penalty': 0.1}
computed_metrics: [0.678379636126528]
Model 2/16, Metric r2_score, Validation set 1: 0.678380
	best_validation_score so far: 0.678380
Fitting model 3/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (512, 128), 'penalty': 0.1}
computed_metrics: [0.6566411596001266]
Model 3/16, Metric r2_score, Validation set 2: 0.656641
	best_validation_score 

computed_metrics: [0.5787741763571148]
Model 3/16, Metric r2_score, Validation set 2: 0.578774
	best_validation_score so far: 0.594057
Fitting model 4/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (512, 64), 'penalty': 0.1}
computed_metrics: [0.5300107028779746]
Model 4/16, Metric r2_score, Validation set 3: 0.530011
	best_validation_score so far: 0.594057
Fitting model 5/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (128, 64), 'penalty': 0.1}
computed_metrics: [0.5310966744413625]
Model 5/16, Metric r2_score, Validation set 4: 0.531097
	best_validation_score so

  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (1560, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/temp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
TIMING: featurizing shard 0 took 5.893 s
TIMING: dataset construction took 5.992 s
Loading dataset from disk.
Preparing dataset for T10209 of rep 1...
Computing train/valid/test indices
TIMING: dataset construction took 0.138 s
Loading dataset from disk.
TIMING: dataset construction took 0.049 s
Loading dataset from disk.
TIMING: dataset construction took 0.048 s
Loading dataset from disk.
Hyperprameter screening ...
Fitting model 1/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (1024, 512), 'penalty': 0.1}
computed_metrics: [0.29783545448139614]
Model 1/16, Metric r2

computed_metrics: [0.39884784602783807]
Model 1/16, Metric r2_score, Validation set 0: 0.398848
	best_validation_score so far: 0.398848
Fitting model 2/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (1024, 128), 'penalty': 0.1}
computed_metrics: [0.5718735203434344]
Model 2/16, Metric r2_score, Validation set 1: 0.571874
	best_validation_score so far: 0.571874
Fitting model 3/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (512, 128), 'penalty': 0.1}
computed_metrics: [0.5675624251476663]
Model 3/16, Metric r2_score, Validation set 2: 0.567562
	best_validation_scor

computed_metrics: [0.533973247581708]
Model 3/16, Metric r2_score, Validation set 2: 0.533973
	best_validation_score so far: 0.579921
Fitting model 4/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (512, 64), 'penalty': 0.1}
computed_metrics: [0.5850845260787016]
Model 4/16, Metric r2_score, Validation set 3: 0.585085
	best_validation_score so far: 0.585085
Fitting model 5/16
hyperparameters: {'n_tasks': 1, 'n_features': 2048, 'activation': 'relu', 'momentum': 0.9, 'batch_size': 128, 'init': 'glorot_uniform', 'learning_rate': 0.01, 'decay': 1e-06, 'nb_epoch': 30, 'dropouts': 0.2, 'nb_layers': 1, 'batchnorm': False, 'layer_sizes': (128, 64), 'penalty': 0.1}
computed_metrics: [0.5777371593058599]
Model 5/16, Metric r2_score, Validation set 4: 0.577737
	best_validation_score so 

In [7]:
# option1: hyperparameter automatic selction
hyper_param_df = pd.read_csv(log_path + '/hyperparam_log.txt', header = None, sep = '\t')
hyper_param_df.columns = ['rep_label', 'task_name', 'param', 'r2_score']
hyper_param_df = hyper_param_df.sort_values(by = ['task_name', 'param', 'rep_label'], axis = 0)

best_hyperparams = {}
for task in task_names:
    hyper_stat = hyper_param_df.loc[hyper_param_df['task_name'] == task].groupby('param').agg({'r2_score': ['mean','max','std']})
    valid_mask = hyper_stat['r2_score']['std'] < 0.15 # filter out ones without reasonable generalization power
    hyper_stat = hyper_stat.loc[valid_mask]
    if hyper_stat.shape[0] >= 1:
        select_param = hyper_stat['r2_score']['max'].sort_values(ascending=False).index[0]
        select_r2 = hyper_stat['r2_score']['max'].sort_values(ascending=False)[0]
        
        select_param = select_param.replace('(', '')
        select_param = select_param.replace(')', '')
        
        tmp_layer1 = int(select_param.split(', ')[12])
        tmp_layer2 = int(select_param.split(', ')[13])
        tmp_drop = float(select_param.split(', ')[9])
        
        best_hyperparams[task] = [(tmp_layer1, tmp_layer2, 1), tmp_drop]
        print(task + ': ' + str(hyper_stat.shape[0]) + ', ' + str(select_r2))
    else:
        print(task_name + ' with training variance too high.')
        continue

T51: 16, 0.6669503600998903
T106: 16, 0.7608567071512521
T105: 16, 0.8142968318069032
T10618: 1, 0.4366532947372249
T107: 16, 0.6875597998996672
T227: 16, 0.6156729427614069
T108: 15, 0.6416264896673209
T168: 12, 0.7567177539543714
T10624: 13, 0.7958433410936154
T10627: 16, 0.7017270221559397
T10209: 16, 0.7117134434736652


In [8]:
best_hyperparams

{'T51': [(512, 64, 1), 0.2],
 'T106': [(512, 128, 1), 0.4],
 'T105': [(512, 128, 1), 0.4],
 'T10618': [(1024, 128, 1), 0.4],
 'T107': [(512, 128, 1), 0.2],
 'T227': [(512, 128, 1), 0.4],
 'T108': [(1024, 128, 1), 0.4],
 'T168': [(512, 128, 1), 0.2],
 'T10624': [(128, 64, 1), 0.4],
 'T10627': [(1024, 128, 1), 0.4],
 'T10209': [(512, 128, 1), 0.2]}

In [None]:
best_hyperparams = {'T106': [(512, 128, 1), 0.4],
                    'T227': [(512, 128, 1), 0.4],
                    'T108': [(1024, 128, 1), 0.4]}

In [None]:
# model training
output_df = ST_model_training(MT_dat_name, FP_type, 
                              best_hyperparams, result_path = log_path)

----------------------------------------------


  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (3202, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/temp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
Featurizing sample 2000
Featurizing sample 3000
TIMING: featurizing shard 0 took 11.996 s
TIMING: dataset construction took 12.155 s
Loading dataset from disk.
Preparing dataset for T51 of rep 0...
Computing train/valid/test indices
TIMING: dataset construction took 0.218 s
Loading dataset from disk.
TIMING: dataset construction took 0.114 s
Loading dataset from disk.
Model training ...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training baseline models ...


  y = column_or_1d(y, warn=True)


Saving metrics ...
Preparing dataset for T51 of rep 1...
Computing train/valid/test indices
TIMING: dataset construction took 0.246 s
Loading dataset from disk.
TIMING: dataset construction took 0.107 s
Loading dataset from disk.
Model training ...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training baseline models ...


  y = column_or_1d(y, warn=True)


Saving metrics ...
Preparing dataset for T51 of rep 2...
Computing train/valid/test indices
TIMING: dataset construction took 0.258 s
Loading dataset from disk.
TIMING: dataset construction took 0.114 s
Loading dataset from disk.
Model training ...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training baseline models ...


  y = column_or_1d(y, warn=True)


Saving metrics ...
Generate performace report ...
----------------------------------------------


  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (808, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/temp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 2.981 s
TIMING: dataset construction took 3.028 s
Loading dataset from disk.
Preparing dataset for T106 of rep 0...
Computing train/valid/test indices
TIMING: dataset construction took 0.057 s
Loading dataset from disk.
TIMING: dataset construction took 0.030 s
Loading dataset from disk.
Model training ...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training baseline models ...


  y = column_or_1d(y, warn=True)


Saving metrics ...
Preparing dataset for T106 of rep 1...
Computing train/valid/test indices
TIMING: dataset construction took 0.138 s
Loading dataset from disk.
TIMING: dataset construction took 0.029 s
Loading dataset from disk.
Model training ...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training baseline models ...


  y = column_or_1d(y, warn=True)


Saving metrics ...
Preparing dataset for T106 of rep 2...
Computing train/valid/test indices
TIMING: dataset construction took 0.098 s
Loading dataset from disk.
TIMING: dataset construction took 0.033 s
Loading dataset from disk.
Model training ...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training baseline models ...


  y = column_or_1d(y, warn=True)


Saving metrics ...
Generate performace report ...
----------------------------------------------


  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (883, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/temp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 3.247 s
TIMING: dataset construction took 3.293 s
Loading dataset from disk.
Preparing dataset for T105 of rep 0...
Computing train/valid/test indices
TIMING: dataset construction took 0.057 s
Loading dataset from disk.
TIMING: dataset construction took 0.031 s
Loading dataset from disk.
Model training ...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training baseline models ...


  y = column_or_1d(y, warn=True)


Saving metrics ...
Preparing dataset for T105 of rep 1...
Computing train/valid/test indices
TIMING: dataset construction took 0.118 s
Loading dataset from disk.
TIMING: dataset construction took 0.035 s
Loading dataset from disk.
Model training ...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training baseline models ...


  y = column_or_1d(y, warn=True)


Saving metrics ...
Preparing dataset for T105 of rep 2...
Computing train/valid/test indices
TIMING: dataset construction took 0.144 s
Loading dataset from disk.
TIMING: dataset construction took 0.036 s
Loading dataset from disk.
Model training ...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training baseline models ...


  y = column_or_1d(y, warn=True)


Saving metrics ...
Generate performace report ...
----------------------------------------------


  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (109, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/temp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.393 s
TIMING: dataset construction took 0.405 s
Loading dataset from disk.
Preparing dataset for T10618 of rep 0...
Computing train/valid/test indices
TIMING: dataset construction took 0.012 s
Loading dataset from disk.
TIMING: dataset construction took 0.007 s
Loading dataset from disk.
Model training ...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training baseline models ...
Saving metrics ...


  y = column_or_1d(y, warn=True)


Preparing dataset for T10618 of rep 1...
Computing train/valid/test indices
TIMING: dataset construction took 0.022 s
Loading dataset from disk.
TIMING: dataset construction took 0.009 s
Loading dataset from disk.
Model training ...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training baseline models ...
Saving metrics ...


  y = column_or_1d(y, warn=True)


Preparing dataset for T10618 of rep 2...
Computing train/valid/test indices
TIMING: dataset construction took 0.014 s
Loading dataset from disk.
TIMING: dataset construction took 0.009 s
Loading dataset from disk.
Model training ...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training baseline models ...
Saving metrics ...


  y = column_or_1d(y, warn=True)


Generate performace report ...
----------------------------------------------


  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (2951, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/temp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
Featurizing sample 2000
TIMING: featurizing shard 0 took 10.864 s
TIMING: dataset construction took 11.004 s
Loading dataset from disk.
Preparing dataset for T107 of rep 0...
Computing train/valid/test indices
TIMING: dataset construction took 0.185 s
Loading dataset from disk.
TIMING: dataset construction took 0.101 s
Loading dataset from disk.
Model training ...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training baseline models ...


  y = column_or_1d(y, warn=True)


Saving metrics ...
Preparing dataset for T107 of rep 1...
Computing train/valid/test indices
TIMING: dataset construction took 0.233 s
Loading dataset from disk.
TIMING: dataset construction took 0.102 s
Loading dataset from disk.
Model training ...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training baseline models ...


  y = column_or_1d(y, warn=True)


Saving metrics ...
Preparing dataset for T107 of rep 2...
Computing train/valid/test indices
TIMING: dataset construction took 0.236 s
Loading dataset from disk.
TIMING: dataset construction took 0.103 s
Loading dataset from disk.
Model training ...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training baseline models ...


  y = column_or_1d(y, warn=True)


Saving metrics ...
Generate performace report ...
----------------------------------------------


  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (1064, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/temp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
TIMING: featurizing shard 0 took 3.993 s
TIMING: dataset construction took 4.050 s
Loading dataset from disk.
Preparing dataset for T227 of rep 0...
Computing train/valid/test indices
TIMING: dataset construction took 0.075 s
Loading dataset from disk.
TIMING: dataset construction took 0.042 s
Loading dataset from disk.
Model training ...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training baseline models ...


  y = column_or_1d(y, warn=True)


Saving metrics ...
Preparing dataset for T227 of rep 1...
Computing train/valid/test indices
TIMING: dataset construction took 0.124 s
Loading dataset from disk.
TIMING: dataset construction took 0.040 s
Loading dataset from disk.
Model training ...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training baseline models ...


  y = column_or_1d(y, warn=True)


Saving metrics ...
Preparing dataset for T227 of rep 2...
Computing train/valid/test indices
TIMING: dataset construction took 0.124 s
Loading dataset from disk.
TIMING: dataset construction took 0.040 s
Loading dataset from disk.
Model training ...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
# evaluation
plot_df = generate_performance_plot_ST('logs/Demo_GPCRs/performance_metrics.csv')
g = sns.catplot(x = 'task', y = 'value', hue = 'method', 
                col = 'tt', row = 'performance', 
                data = plot_df, kind = 'bar')

### Bypass multitask models

In [None]:
# set parameters
params_dict = OrderedDict(
    n_tasks = [len(task_names)],
    n_features = [2048], ## need modification given FP types
    activation = ['relu'],
    momentum = [.9],
    batch_size = [128],
    init = ['glorot_uniform'],
    learning_rate = [0.0001],
    decay = [1e-6],
    nb_epoch = [30],
    dropouts = [.2, .4],
    nb_layers = [1],
    batchnorm = [False],
    layer_sizes = [(1024, 512),(1024, 128),(512, 128),(512,64)],
    bypass_dropouts = [0.5],
    bypass_layer_sizes = [[128], [64]]
)

In [None]:
log_output = RobustMT_model_hyperparam_screen(MT_dat_name, task_names, FP_type, params_dict, log_path, smiles_field, id_field)

In [None]:
# option1: hyperparameter automatic selction
hyper_param_df = pd.read_csv(log_path + '/hyperparam_log.txt', header = None, sep = '\t')
hyper_param_df.columns = ['rep_label', 'param', 'r2_score']
hyper_param_df = hyper_param_df.sort_values(by = ['param', 'rep_label'], axis = 0)

best_hyperparams = {}
hyper_stat = hyper_param_df.groupby('param').agg({'r2_score': ['mean','max','std']})
valid_mask = hyper_stat['r2_score']['std'] < 0.15 # filter out ones without reasonable generalization power
hyper_stat = hyper_stat.loc[valid_mask]
if hyper_stat.shape[0] >= 1:
    select_param = hyper_stat['r2_score']['max'].sort_values(ascending=False).index[0]
    select_r2 = hyper_stat['r2_score']['max'].sort_values(ascending=False)[0]
        
    select_param = select_param.replace('(', '')
    select_param = select_param.replace(')', '')
        
    tmp_layer1 = int(select_param.split(', ')[12])
    tmp_layer2 = int(select_param.split(', ')[13])
    bypass_layer = int(select_param.split(', ')[15].strip('[').strip(']'))
    tmp_drop = float(select_param.split(', ')[9])
        
    best_hyperparams = [(tmp_layer1, tmp_layer2), [bypass_layer], tmp_drop]
    print(str(hyper_stat.shape[0]) + ', ' + str(select_r2))
else:
    print(task_name + ' with training variance too high.')

In [None]:
best_hyperparams = [(1024, 128), [128], 0.2]

In [None]:
# train
layer_sizes = best_hyperparams[0]
bypass_layer_sizes = best_hyperparams[1]
dropout = best_hyperparams[2]
lr = 0.0001
bypass_dropouts = 0.5
n_features = 2048

RobustMT_model_training(MT_dat_name, FP_type, task_names, log_path, 
                        n_features, layer_sizes, bypass_layer_sizes, bypass_dropouts, dropout, lr,
                        N_test = 500.0, add_features = None, n_epoch = 250, epoch_num = 10,
                        id_field = id_field, smiles_field = smiles_field)

In [None]:
# evaluation
plot_df = generate_performance_plot_RobustMT('logs/Demo_GPCRs/model_train_log.csv', 'logs/Demo_GPCRs/model_test_log.csv')
import matplotlib.pyplot as plt
g = sns.FacetGrid(plot_df, col = 'tt', hue = 'tasks')
g = (g.map(plt.plot, 'step', 'R2', marker = '.')).add_legend()

## Process trained models for visualization

In [None]:
# baseline models - RidgeCV
for i in range(len(task_names)):
    task = task_names[i]  # can only be 1 task
    output_prefix = './logs/Demo_GPCRs/RidgeCV_' + task + '_'
    dataset_file = './logs/Demo_GPCRs/tmp.csv'
    
    generate_RUNKEY_dataframe_baseline(output_prefix, task, dataset_file, FP_type, 
                                       add_features = None, mode = 'RidgeCV', 
                                       MT_dat_name = MT_dat_name,
                                       smiles_field = smiles_field, 
                                       id_field = id_field)

In [None]:
# baseline models - SVR
for i in range(len(task_names)):
    task = task_names[i]  # can only be 1 task
    output_prefix = './logs/Demo_GPCRs/SVR_' + task + '_'
    dataset_file = './logs/Demo_GPCRs/tmp.csv'
    
    generate_RUNKEY_dataframe_baseline(output_prefix, task, dataset_file, FP_type, 
                                       add_features = None, mode = 'SVR', 
                                       MT_dat_name = MT_dat_name,
                                       smiles_field = smiles_field, 
                                       id_field = id_field)

In [None]:
# single task models
for i in range(len(task_names)):
    task = task_names[i]
    prev_model = 'logs/Demo_GPCRs/' + task + '_rep0_50.hdf5'
    output_prefix = 'logs/Demo_GPCRs/ST_' + task + '_'
    task_list = [task]
    generate_RUNKEY_dataframe_ST(prev_model, output_prefix, task_list, dataset_file, FP_type, 
                                 add_features = None, n_layer = 1)

## Analyse compounds of interest by loading a custom file

In [3]:
# multitask models
custom_file = './data/custom_file.txt'
custom_id_field = 'id'
custom_task_field = 'dummy_value'
custom_smiles_field = 'SMILES'
sep_custom_file = '\t'
output_prefix = './logs/Demo_GPCRs/RobustMT2_'

prev_model = './logs/Demo_GPCRs/model-2250'
layer_sizes = [1024, 128]
bypass_layer_sizes = [128]
dropout = 0.2
n_layer = 1
n_bypass = 2
model_flag = 'MT'

generate_RUNKEY_dataframe_RobustMT(prev_model, output_prefix, task_names, dataset_file, FP_type, add_features, 
                              n_features, layer_sizes, bypass_layer_sizes, model_flag, n_bypass,
                              MT_dat_name, model_test_log = './logs/Demo_GPCRs/model_test_log.csv',
                              smiles_field = smiles_field, id_field = id_field,
                              bypass_dropouts = [.5], dropout = dropout, learning_rate = 0.001, n_layer = n_layer,
                              custom_file = custom_file, custom_id_field = custom_id_field, 
                              custom_task_field = custom_task_field, custom_smiles_field = custom_smiles_field,
                              sep_custom_file = sep_custom_file, K = 5, valid_cutoff = None)

------------- Loading dataset --------------------


  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (2939, 5)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
Featurizing sample 2000
TIMING: featurizing shard 0 took 10.879 s
TIMING: dataset construction took 11.040 s
Loading dataset from disk.
------------- Loading custom file --------------------
0
Read in 3 compounds; 3 valid compounds.
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.012 s
TIMING: dataset construction took 0.018 s
Loading dataset from disk.
------------- Loading previous trained models ------------------
INFO:tensorflow:Restoring parameters from ./logs/Demo_GPCRs/model-2250
------------- Prepare information for chemicals ------------------
INFO:tensorflow:Restoring parameters from ./logs/Demo_GPCRs/model-2250
------------- Prepare 

In [None]:
# baseline models -- RidgeCV
custom_file = './data/custom_file.txt'
custom_id_field = 'id'
custom_task_field = 'dummy_value'
custom_smiles_field = 'SMILES'
sep_custom_file = '\t'
model_flag = 'MT'

for i in range(len(task_names)):
    task = task_names[i]
    output_prefix = './logs/Demo_GPCRs/RidgeCV_' + task + '_new_'
    
    generate_RUNKEY_dataframe_baseline(output_prefix, task, dataset_file, FP_type, 
                                   add_features, mode = 'RidgeCV', 
                                   MT_dat_name = MT_dat_name, 
                                   smiles_field = smiles_field, id_field = id_field,
                                   custom_file = custom_file, custom_id_field = custom_id_field, 
                                   custom_task_field = custom_task_field, 
                                   custom_smiles_field = custom_smiles_field,
                                   sep_custom_file = sep_custom_file)

In [None]:
# baseline models -- SVR
custom_file = './data/custom_file.txt'
custom_id_field = 'id'
custom_task_field = 'dummy_value'
custom_smiles_field = 'SMILES'
sep_custom_file = '\t'
model_flag = 'MT'

for i in range(len(task_names)):
    task = task_names[i]
    output_prefix = './logs/Demo_GPCRs/SVR_' + task + '_new_'
    
    generate_RUNKEY_dataframe_baseline(output_prefix, task, dataset_file, FP_type, 
                                   add_features, mode = 'SVR', 
                                   MT_dat_name = MT_dat_name, 
                                   smiles_field = smiles_field, id_field = id_field,
                                   custom_file = custom_file, custom_id_field = custom_id_field, 
                                   custom_task_field = custom_task_field, 
                                   custom_smiles_field = custom_smiles_field,
                                   sep_custom_file = sep_custom_file)

In [None]:
# single task models
custom_file = './data/custom_file.txt'
custom_id_field = 'id'
custom_task_field = 'dummy_value'
custom_smiles_field = 'SMILES'
sep_custom_file = '\t'
model_flag = 'MT'

for i in range(len(task_names)):
    task = task_names[i]
    output_prefix = './logs/Demo_GPCRs/ST_' + task + '_new_'
    prev_model = './logs/Demo_GPCRs/' + task + '_rep0_50.hdf5'
    
    generate_RUNKEY_dataframe_ST(prev_model, output_prefix, [task], dataset_file, FP_type, 
                             add_features, mode = 'ST',
                             MT_dat_name = MT_dat_name, n_layer = 1,
                             smiles_field = smiles_field, id_field = id_field,
                             custom_file = custom_file, custom_id_field = custom_id_field, 
                             custom_task_field = custom_task_field, 
                             custom_smiles_field = custom_smiles_field,
                             sep_custom_file = sep_custom_file)

## supplementary: all SVR prediction and SAR plots

In [2]:
# initialize parameters
task_names = ['T51', 'T106', 'T105', 'T10618', 
              'T107', 'T227', 'T108',
              'T168', 'T10624', 'T10627', 'T10209']
MT_dat_name = './data/MT_data_clean_June28.csv'
FP_type = 'Circular_2048'
log_path = './logs/Demo_GPCRs'
add_features = None
smiles_field = 'salt_removed_smi'
id_field = 'molregno'
dataset_file = './logs/Demo_GPCRs/tmp.csv'
n_features = 2048

In [3]:
# baseline models -- SVR
custom_file = './data/custom_file.txt'
custom_id_field = 'id'
custom_task_field = 'dummy_value'
custom_smiles_field = 'SMILES'
sep_custom_file = '\t'
model_flag = 'MT'

for i in range(len(task_names)):
    task = task_names[i]
    output_prefix = './logs/Demo_GPCRs/SVR_' + task + '_new_'
    
    generate_RUNKEY_dataframe_baseline(output_prefix, task, dataset_file, FP_type, 
                                   add_features, mode = 'SVR', 
                                   MT_dat_name = MT_dat_name, 
                                   smiles_field = smiles_field, id_field = id_field,
                                   custom_file = custom_file, custom_id_field = custom_id_field, 
                                   custom_task_field = custom_task_field, 
                                   custom_smiles_field = custom_smiles_field,
                                   sep_custom_file = sep_custom_file)

------------- Loading dataset and train baseline model --------------------


  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (3202, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
Featurizing sample 2000
Featurizing sample 3000
TIMING: featurizing shard 0 took 11.793 s
TIMING: dataset construction took 11.952 s
Loading dataset from disk.
------------- Loading custom file --------------------
0
Read in 3 compounds; 3 valid compounds.
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.012 s
TIMING: dataset construction took 0.019 s
Loading dataset from disk.
Computing train/valid/test indices
TIMING: dataset construction took 0.213 s
Loading dataset from disk.
TIMING: dataset construction took 0.097 s
Loading dataset from disk.
TIMING: dataset construction took 0.093 s
Loading dataset from disk.


  y = column_or_1d(y, warn=True)


------------- Prepare information for chemicals ------------------
------------- Prepare information for minibatches ------------------
------------- Saving datasets --------------
------------- Loading dataset and train baseline model --------------------


  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (808, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 3.710 s
TIMING: dataset construction took 3.763 s
Loading dataset from disk.
------------- Loading custom file --------------------
0
Read in 3 compounds; 3 valid compounds.
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.013 s
TIMING: dataset construction took 0.020 s
Loading dataset from disk.
Computing train/valid/test indices
TIMING: dataset construction took 0.058 s
Loading dataset from disk.
TIMING: dataset construction took 0.030 s
Loading dataset from disk.
TIMING: dataset construction took 0.028 s
Loading dataset from disk.


  y = column_or_1d(y, warn=True)


------------- Prepare information for chemicals ------------------
------------- Prepare information for minibatches ------------------
------------- Saving datasets --------------
------------- Loading dataset and train baseline model --------------------


  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (883, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 3.574 s
TIMING: dataset construction took 3.627 s
Loading dataset from disk.
------------- Loading custom file --------------------
0
Read in 3 compounds; 3 valid compounds.
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.013 s
TIMING: dataset construction took 0.021 s
Loading dataset from disk.
Computing train/valid/test indices
TIMING: dataset construction took 0.068 s
Loading dataset from disk.
TIMING: dataset construction took 0.034 s
Loading dataset from disk.
TIMING: dataset construction took 0.032 s
Loading dataset from disk.


  y = column_or_1d(y, warn=True)


------------- Prepare information for chemicals ------------------
------------- Prepare information for minibatches ------------------
------------- Saving datasets --------------
------------- Loading dataset and train baseline model --------------------


  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (109, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.396 s
TIMING: dataset construction took 0.408 s
Loading dataset from disk.
------------- Loading custom file --------------------
0
Read in 3 compounds; 3 valid compounds.
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.012 s
TIMING: dataset construction took 0.019 s
Loading dataset from disk.
Computing train/valid/test indices
TIMING: dataset construction took 0.012 s
Loading dataset from disk.
TIMING: dataset construction took 0.007 s
Loading dataset from disk.
TIMING: dataset construction took 0.007 s
Loading dataset from disk.
------------- Prepare information for chemicals ------------------


  y = column_or_1d(y, warn=True)


------------- Prepare information for minibatches ------------------
------------- Saving datasets --------------
------------- Loading dataset and train baseline model --------------------


  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (2951, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
Featurizing sample 2000
TIMING: featurizing shard 0 took 12.300 s
TIMING: dataset construction took 12.469 s
Loading dataset from disk.
------------- Loading custom file --------------------
0
Read in 3 compounds; 3 valid compounds.
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.020 s
TIMING: dataset construction took 0.030 s
Loading dataset from disk.
Computing train/valid/test indices
TIMING: dataset construction took 0.364 s
Loading dataset from disk.
TIMING: dataset construction took 0.168 s
Loading dataset from disk.
TIMING: dataset construction took 0.128 s
Loading dataset from disk.


  y = column_or_1d(y, warn=True)


------------- Prepare information for chemicals ------------------
------------- Prepare information for minibatches ------------------
------------- Saving datasets --------------
------------- Loading dataset and train baseline model --------------------


  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (1064, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
TIMING: featurizing shard 0 took 3.989 s
TIMING: dataset construction took 4.046 s
Loading dataset from disk.
------------- Loading custom file --------------------
0
Read in 3 compounds; 3 valid compounds.
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.029 s
TIMING: dataset construction took 0.045 s
Loading dataset from disk.
Computing train/valid/test indices
TIMING: dataset construction took 0.115 s
Loading dataset from disk.
TIMING: dataset construction took 0.051 s
Loading dataset from disk.
TIMING: dataset construction took 0.050 s
Loading dataset from disk.


  y = column_or_1d(y, warn=True)


------------- Prepare information for chemicals ------------------
------------- Prepare information for minibatches ------------------
------------- Saving datasets --------------
------------- Loading dataset and train baseline model --------------------


  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (2063, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
Featurizing sample 2000
TIMING: featurizing shard 0 took 8.156 s
TIMING: dataset construction took 8.255 s
Loading dataset from disk.
------------- Loading custom file --------------------
0
Read in 3 compounds; 3 valid compounds.
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.012 s
TIMING: dataset construction took 0.019 s
Loading dataset from disk.
Computing train/valid/test indices
TIMING: dataset construction took 0.134 s
Loading dataset from disk.
TIMING: dataset construction took 0.063 s
Loading dataset from disk.
TIMING: dataset construction took 0.062 s
Loading dataset from disk.


  y = column_or_1d(y, warn=True)


------------- Prepare information for chemicals ------------------
------------- Prepare information for minibatches ------------------
------------- Saving datasets --------------
------------- Loading dataset and train baseline model --------------------


  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (396, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 1.598 s
TIMING: dataset construction took 1.624 s
Loading dataset from disk.
------------- Loading custom file --------------------
0
Read in 3 compounds; 3 valid compounds.
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.012 s
TIMING: dataset construction took 0.019 s
Loading dataset from disk.
Computing train/valid/test indices
TIMING: dataset construction took 0.030 s
Loading dataset from disk.
TIMING: dataset construction took 0.016 s
Loading dataset from disk.
TIMING: dataset construction took 0.016 s
Loading dataset from disk.
------------- Prepare information for chemicals ------------------


  y = column_or_1d(y, warn=True)


------------- Prepare information for minibatches ------------------
------------- Saving datasets --------------
------------- Loading dataset and train baseline model --------------------


  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (313, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 1.186 s
TIMING: dataset construction took 1.210 s
Loading dataset from disk.
------------- Loading custom file --------------------
0
Read in 3 compounds; 3 valid compounds.
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.014 s
TIMING: dataset construction took 0.022 s
Loading dataset from disk.
Computing train/valid/test indices
TIMING: dataset construction took 0.029 s
Loading dataset from disk.
TIMING: dataset construction took 0.014 s
Loading dataset from disk.
TIMING: dataset construction took 0.014 s
Loading dataset from disk.


  y = column_or_1d(y, warn=True)


------------- Prepare information for chemicals ------------------
------------- Prepare information for minibatches ------------------
------------- Saving datasets --------------
------------- Loading dataset and train baseline model --------------------


  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (2523, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
Featurizing sample 2000
TIMING: featurizing shard 0 took 10.935 s
TIMING: dataset construction took 11.073 s
Loading dataset from disk.
------------- Loading custom file --------------------
0
Read in 3 compounds; 3 valid compounds.
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.013 s
TIMING: dataset construction took 0.021 s
Loading dataset from disk.
Computing train/valid/test indices
TIMING: dataset construction took 0.220 s
Loading dataset from disk.
TIMING: dataset construction took 0.107 s
Loading dataset from disk.
TIMING: dataset construction took 0.111 s
Loading dataset from disk.


  y = column_or_1d(y, warn=True)


------------- Prepare information for chemicals ------------------
------------- Prepare information for minibatches ------------------
------------- Saving datasets --------------
------------- Loading dataset and train baseline model --------------------


  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (1560, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
TIMING: featurizing shard 0 took 6.312 s
TIMING: dataset construction took 6.394 s
Loading dataset from disk.
------------- Loading custom file --------------------
0
Read in 3 compounds; 3 valid compounds.
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.012 s
TIMING: dataset construction took 0.019 s
Loading dataset from disk.
Computing train/valid/test indices
TIMING: dataset construction took 0.114 s
Loading dataset from disk.
TIMING: dataset construction took 0.049 s
Loading dataset from disk.
TIMING: dataset construction took 0.047 s
Loading dataset from disk.


  y = column_or_1d(y, warn=True)


------------- Prepare information for chemicals ------------------
------------- Prepare information for minibatches ------------------
------------- Saving datasets --------------


In [4]:
# baseline models -- RidgeCV
custom_file = './data/custom_file.txt'
custom_id_field = 'id'
custom_task_field = 'dummy_value'
custom_smiles_field = 'SMILES'
sep_custom_file = '\t'
model_flag = 'MT'

for i in range(len(task_names)):
    task = task_names[i]
    output_prefix = './logs/Demo_GPCRs/RidgeCV_' + task + '_new_'
    
    generate_RUNKEY_dataframe_baseline(output_prefix, task, dataset_file, FP_type, 
                                   add_features, mode = 'RidgeCV', 
                                   MT_dat_name = MT_dat_name, 
                                   smiles_field = smiles_field, id_field = id_field,
                                   custom_file = custom_file, custom_id_field = custom_id_field, 
                                   custom_task_field = custom_task_field, 
                                   custom_smiles_field = custom_smiles_field,
                                   sep_custom_file = sep_custom_file)

------------- Loading dataset and train baseline model --------------------


  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (3202, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
Featurizing sample 2000
Featurizing sample 3000
TIMING: featurizing shard 0 took 11.767 s
TIMING: dataset construction took 11.925 s
Loading dataset from disk.
------------- Loading custom file --------------------
0
Read in 3 compounds; 3 valid compounds.
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.012 s
TIMING: dataset construction took 0.019 s
Loading dataset from disk.
Computing train/valid/test indices
TIMING: dataset construction took 0.272 s
Loading dataset from disk.
TIMING: dataset construction took 0.151 s
Loading dataset from disk.
TIMING: dataset construction took 0.151 s
Loading dataset from disk.
------------- Prepare informat

  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (808, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 3.005 s
TIMING: dataset construction took 3.048 s
Loading dataset from disk.
------------- Loading custom file --------------------
0
Read in 3 compounds; 3 valid compounds.
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.012 s
TIMING: dataset construction took 0.019 s
Loading dataset from disk.
Computing train/valid/test indices
TIMING: dataset construction took 0.055 s
Loading dataset from disk.
TIMING: dataset construction took 0.029 s
Loading dataset from disk.
TIMING: dataset construction took 0.027 s
Loading dataset from disk.
------------- Prepare information for chemicals ------------------
------------- Prepare information for 

  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (883, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 3.538 s
TIMING: dataset construction took 3.587 s
Loading dataset from disk.
------------- Loading custom file --------------------
0
Read in 3 compounds; 3 valid compounds.
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.012 s
TIMING: dataset construction took 0.019 s
Loading dataset from disk.
Computing train/valid/test indices
TIMING: dataset construction took 0.063 s
Loading dataset from disk.
TIMING: dataset construction took 0.031 s
Loading dataset from disk.
TIMING: dataset construction took 0.029 s
Loading dataset from disk.
------------- Prepare information for chemicals ------------------
------------- Prepare information for 

  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (109, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.596 s
TIMING: dataset construction took 0.612 s
Loading dataset from disk.
------------- Loading custom file --------------------
0
Read in 3 compounds; 3 valid compounds.
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.018 s
TIMING: dataset construction took 0.028 s
Loading dataset from disk.
Computing train/valid/test indices
TIMING: dataset construction took 0.016 s
Loading dataset from disk.
TIMING: dataset construction took 0.010 s
Loading dataset from disk.
TIMING: dataset construction took 0.011 s
Loading dataset from disk.
------------- Prepare information for chemicals ------------------
------------- Prepare information for 

  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (2951, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
Featurizing sample 2000
TIMING: featurizing shard 0 took 11.033 s
TIMING: dataset construction took 11.175 s
Loading dataset from disk.
------------- Loading custom file --------------------
0
Read in 3 compounds; 3 valid compounds.
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.026 s
TIMING: dataset construction took 0.038 s
Loading dataset from disk.
Computing train/valid/test indices
TIMING: dataset construction took 0.329 s
Loading dataset from disk.
TIMING: dataset construction took 0.170 s
Loading dataset from disk.
TIMING: dataset construction took 0.137 s
Loading dataset from disk.
------------- Prepare information for chemicals ------

  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (1064, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
TIMING: featurizing shard 0 took 4.101 s
TIMING: dataset construction took 4.158 s
Loading dataset from disk.
------------- Loading custom file --------------------
0
Read in 3 compounds; 3 valid compounds.
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.031 s
TIMING: dataset construction took 0.046 s
Loading dataset from disk.
Computing train/valid/test indices
TIMING: dataset construction took 0.113 s
Loading dataset from disk.
TIMING: dataset construction took 0.052 s
Loading dataset from disk.
TIMING: dataset construction took 0.051 s
Loading dataset from disk.
------------- Prepare information for chemicals ------------------
-------------

  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (2063, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
Featurizing sample 2000
TIMING: featurizing shard 0 took 7.791 s
TIMING: dataset construction took 7.892 s
Loading dataset from disk.
------------- Loading custom file --------------------
0
Read in 3 compounds; 3 valid compounds.
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.012 s
TIMING: dataset construction took 0.019 s
Loading dataset from disk.
Computing train/valid/test indices
TIMING: dataset construction took 0.159 s
Loading dataset from disk.
TIMING: dataset construction took 0.084 s
Loading dataset from disk.
TIMING: dataset construction took 0.081 s
Loading dataset from disk.
------------- Prepare information for chemicals --------

  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (396, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 1.662 s
TIMING: dataset construction took 1.699 s
Loading dataset from disk.
------------- Loading custom file --------------------
0
Read in 3 compounds; 3 valid compounds.
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.018 s
TIMING: dataset construction took 0.028 s
Loading dataset from disk.
Computing train/valid/test indices
TIMING: dataset construction took 0.041 s
Loading dataset from disk.
TIMING: dataset construction took 0.016 s
Loading dataset from disk.
TIMING: dataset construction took 0.015 s
Loading dataset from disk.
------------- Prepare information for chemicals ------------------
------------- Prepare information for 

  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (313, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 1.135 s
TIMING: dataset construction took 1.156 s
Loading dataset from disk.
------------- Loading custom file --------------------
0
Read in 3 compounds; 3 valid compounds.
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.012 s
TIMING: dataset construction took 0.019 s
Loading dataset from disk.
Computing train/valid/test indices
TIMING: dataset construction took 0.024 s
Loading dataset from disk.
TIMING: dataset construction took 0.013 s
Loading dataset from disk.
TIMING: dataset construction took 0.012 s
Loading dataset from disk.
------------- Prepare information for chemicals ------------------
------------- Prepare information for 

  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (2523, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
Featurizing sample 2000
TIMING: featurizing shard 0 took 9.287 s
TIMING: dataset construction took 9.406 s
Loading dataset from disk.
------------- Loading custom file --------------------
0
Read in 3 compounds; 3 valid compounds.
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.012 s
TIMING: dataset construction took 0.020 s
Loading dataset from disk.
Computing train/valid/test indices
TIMING: dataset construction took 0.160 s
Loading dataset from disk.
TIMING: dataset construction took 0.078 s
Loading dataset from disk.
TIMING: dataset construction took 0.077 s
Loading dataset from disk.
------------- Prepare information for chemicals --------

  exec(code_obj, self.user_global_ns, self.user_ns)


Extracted dataset shape: (1560, 3)
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
TIMING: featurizing shard 0 took 5.703 s
TIMING: dataset construction took 5.779 s
Loading dataset from disk.
------------- Loading custom file --------------------
0
Read in 3 compounds; 3 valid compounds.
Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./logs/Demo_GPCRs/tmp.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 0.012 s
TIMING: dataset construction took 0.019 s
Loading dataset from disk.
Computing train/valid/test indices
TIMING: dataset construction took 0.101 s
Loading dataset from disk.
TIMING: dataset construction took 0.050 s
Loading dataset from disk.
TIMING: dataset construction took 0.048 s
Loading dataset from disk.
------------- Prepare information for chemicals ------------------
-------------