In [1]:
# Train a neural network to predict the solubility of molecules.  First load the data.
import deepchem as dc
tasks, datasets, transformers = dc.molnet.load_delaney(featurizer='GraphConv')
train_dataset, valid_dataset, test_dataset = datasets

In [2]:
# Create and train the model.

model = dc.models.GraphConvModel(n_tasks=1, mode='regression', dropout=0.2)
model.fit(train_dataset, nb_epoch=100)
model.save_checkpoint(model_dir="./ckpt-model")

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [3]:
model.restore(model_dir="./ckpt-model")

In [4]:
# Evaluate it.

metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)
print("Training set score")
print(model.evaluate(train_dataset, [metric], transformers))
print("Test set score")
print(model.evaluate(test_dataset, [metric], transformers))

# Use it to predict the solubility of some molecules.

smiles = ['COC(C)(C)CCCC(C)CC=CC(C)=CC(=O)OC(C)C',
          'CCOC(=O)CC',
          'CSc1nc(NC(C)C)nc(NC(C)C)n1',
          'CC(C#C)N(C)C(=O)Nc1ccc(Cl)cc1',
          'Cc1cc2ccccc2cc1C']
from rdkit import Chem
mols = [Chem.MolFromSmiles(s) for s in smiles]
featurizer = dc.feat.ConvMolFeaturizer()
x = featurizer.featurize(mols)
predicted_solubility = model.predict_on_batch(x)
for m,s in zip(smiles, predicted_solubility):
    print()
    print('Molecule:', m)
    print('Predicted solubility:', s)

Training set score
{'pearson_r2_score': 0.9193097602421073}
Test set score
{'pearson_r2_score': 0.6520786464744863}

Molecule: COC(C)(C)CCCC(C)CC=CC(C)=CC(=O)OC(C)C
Predicted solubility: [-0.57873654]

Molecule: CCOC(=O)CC
Predicted solubility: [1.8087548]

Molecule: CSc1nc(NC(C)C)nc(NC(C)C)n1
Predicted solubility: [0.13224794]

Molecule: CC(C#C)N(C)C(=O)Nc1ccc(Cl)cc1
Predicted solubility: [-0.01401384]

Molecule: Cc1cc2ccccc2cc1C
Predicted solubility: [-0.73792726]


In [5]:
x[0].get_atom_features()

array([[1., 0., 0., ..., 0., 1., 0.],
       [1., 0., 0., ..., 0., 1., 0.],
       [1., 0., 0., ..., 0., 1., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]])

In [6]:
x[2].get_atom_features().shape

(16, 75)

In [15]:
x[4].get_atom_features().shape

(12, 75)

In [8]:
x[0].get_adjacency_list()

[[8],
 [21],
 [21],
 [17],
 [18],
 [19],
 [20],
 [20],
 [0, 21],
 [21, 10],
 [9, 11],
 [10, 17],
 [17, 13],
 [12, 14],
 [13, 18],
 [18, 19],
 [19, 20],
 [11, 3, 12],
 [14, 4, 15],
 [15, 5, 16],
 [16, 6, 7],
 [8, 1, 2, 9]]

In [9]:
from alibi.explainers import AnchorText, IntegratedGradients

In [10]:
def predict_fn(smiles):
    mols = [Chem.MolFromSmiles(s) for s in smiles]
    featurizer = dc.feat.ConvMolFeaturizer()
    x = featurizer.featurize(mols)
    predicted_solubility = model.predict_on_batch(x)
    return predicted_solubility
    
explainer = AnchorText(model, predict_fn)

AttributeError: 'GraphConvModel' object has no attribute 'vocab'

In [30]:
ig  = IntegratedGradients(model,
                          layer=None,
                          method="gausslegendre",
                          n_steps=50,
                          internal_batch_size=100)

AttributeError: 'GraphConvModel' object has no attribute 'input'

In [29]:
?? dc.models.GraphConvModel