In [None]:
import tensorflow as tf
print(tf.version.VERSION)
from deepchem.models import GraphConvModel

from loaders.Loaders import CSVLoader
from compoundFeaturization import ConvMolFeat
from splitters.splitters import RandomSplitter
from metrics.Metrics import Metric
from metrics.metricsFunctions import r2_score, mean_absolute_error, mean_squared_error, median_absolute_error
from models import DeepchemMode
from parameterOptimization.HyperparameterOpt import HyperparamOpt_CV

In [None]:
# Load Dataset
dataset = CSVLoader(dataset_path='data/PC-3.csv', 
                    mols_field='smiles', 
                    labels_fields='pIC50')
dataset = dataset.create_dataset()
dataset.get_shape()

In [None]:
# SMILES standardization
standardizer = ChEMBLStandardizer().standardize(dataset)

In [None]:
# Featurization
dataset = ConvMolFeat().featurize(dataset)
dataset.get_shape()

In [None]:
# Data Split
splitter = RandomSplitter()
train_dataset, test_dataset = splitter.train_test_split(dataset, frac_train=0.7, seed=123)

train_dataset.get_shape()
test_dataset.get_shape()

In [None]:
# Model build function
def graphconv_builder(graph_conv_layers, dense_layer_size, dropout, learning_rate, batch_size=256, epochs=5):
    graph = GraphConvModel(n_tasks=1, graph_conv_layers=graph_conv_layers, dense_layer_size=dense_layer_size,
                           dropout=dropout, batch_size=batch_size, learning_rate=learning_rate, mode='regression')
    return DeepChemModel(graph, epochs=epochs, use_weights=False, model_dir=None)
# optimizer = Adam by default in DeepChem and loss=L2Loss() by default for regression (it's the same as MSE loss)

In [None]:
# Scoring metrics
metrics = [Metric(mean_absolute_error), Metric(mean_squared_error), Metric(median_absolute_error), Metric(r2_score)]

In [None]:
params_dict = {"graph_conv_layers": [[32, 32], [64, 64], [128, 128],
                                     [32, 64], [64, 128],
                                     [32, 32, 32], [64, 64, 64], [128, 128, 128],
                                     [32, 64, 128]],
               "dense_layer_size": [2048, 1024, 512, 256, 128],
               "dropout": [0.0, 0.1, 0.2, 0.3, 0.4, 0.5],
               "learning_rate": [1e-5, 1e-4, 1e-3, 1e-2, 1e-1]
                  }

optimizer = HyperparamOpt_CV(graphconv_builder, mode='regression')

best_model, best_hyperparams, all_results = optimizer.hyperparam_search('deepchem',
                                                                        params_dict,
                                                                        train_dataset,
                                                                        'neg_mean_squared_error',
                                                                        cv=5,
                                                                        n_iter_search=2,
                                                                        n_jobs=1)

print('#################')
print(best_hyperparams)
print(best_model) # shows several args = None, but I have confirmed that the hyperparams are being passed to the underlying DeepChem classes
print(all_results)

In [None]:
# Evaluate model
# (best_model has already been fit)
best_model.evaluate(test_dataset, metrics)