In [None]:
# https://deepchem.io/
import numpy as np
import deepchem as dc
from rdkit import Chem
np.random.seed(123)

In [None]:
# Load delaney dataset
delaney_tasks, delaney_datasets, transformers = dc.molnet.load_delaney(featurizer='GraphConv')
train_dataset, valid_dataset, test_dataset = delaney_datasets

In [None]:
# Fit models
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)

model = dc.models.GraphConvModel(
    n_tasks=1,
    mode='regression',
    dropouts=[.25],
    batch_size=50)

# Fit trained model
model.fit(train_dataset, nb_epoch=10)

In [None]:
# Perfromance
train_scores = model.evaluate(train_dataset, [metric], transformers)
valid_scores = model.evaluate(valid_dataset, [metric], transformers)

print("Train r2 scores", train_scores)

print("Validation r2 scores",valid_scores)

In [None]:
# Predict
smiles = ['ClCC(Cl)(Cl)Cl',
            'CC(Cl)(Cl)Cl',
            'ClC(Cl)C(Cl)Cl',
            'ClCC(Cl)Cl',
            'CCOC(C)OCC',
            'Clc1ccc(Cl)c(Cl)c1Cl',
            'C1CCc2ccccc2C1',
            'Clc1cc(Cl)c(Cl)c(Cl)c1',
            'Clc1cccc(Cl)c1Cl',
            'Cc1cccc(C)c1C']


mols = [Chem.MolFromSmiles(s) for s in smiles]
featurizer = dc.feat.ConvMolFeaturizer()
x = featurizer.featurize(mols)
predicted_solubility = model.predict_on_batch(x)
predicted_solubility