In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '4' 

import deepchem as dc
import pandas as pd
import pickle
import numpy as np
import warnings

from utils import *
warnings.filterwarnings('ignore')

In [34]:
# FreeSolve Database
freeSolve = pickle.load(open('dicts/consol.pickle', 'rb')) 

expt, tip, smiles, gbn, igb, asc, zap, cha, bestgb,nul = [], [], [], [], [], [], [], [], [],[]
for i in freeSolve.keys():
    expt.append(freeSolve[i]['expt'])
    smiles.append(freeSolve[i]['smiles'])
    tip.append(freeSolve[i]['calc'])
    gbn.append(freeSolve[i]['gbnsr6'])
    igb.append(freeSolve[i]['igb5'])
    asc.append(freeSolve[i]['asc'])
    zap.append(freeSolve[i]['zap9'])
    cha.append(freeSolve[i]['cha'])
    bestgb.append(freeSolve[i]['bestgb'])
    nul.append(0)
feats = {'tip3p' : tip, 'gbnsr6' : gbn, 'igb5' : igb, 'asc' : asc, 
         'null' : nul, 'zap9' : zap, 'cha' : cha,'bestgb':bestgb}

In [35]:
np.random.seed(10)
b = list(psuedoScramble(expt, bins=int(len(expt)/10)))

val = []
for i in range(len(b)//8):
    #j = np.random.randint(0, len(b))
    j = i*7
    val.append(b.pop(j))
np.mean(b)
len(b)



# print(params['feat'])
feat = feats[params['feat']]
part = (b, val)
smiles = partition(smiles, part)
expt = partition(expt, part)
feat = partition(feat, part)

In [42]:
params = {'epochs' : 500, 'dropout' : 0.4, 'batch_normalize' : False, 'batch_size' : 1000, 'feat' : 'tip3p', 
           'kfold' : -1, 'dense_layer_size' : 27, 'graph_conv_layers' : [32, 32]}

In [19]:
%%time
featurizer = dc.feat.ConvMolFeaturizer(per_atom_fragmentation=False)
train = dc.data.NumpyDataset(X=featurizer.featurize(smiles[0]), 
                             y=np.array(np.array(expt[0])-np.array(feat[0])).transpose())
# model_dir = tempfile.mkdtemp()
model = dc.models.GraphConvModel(n_tasks=1, graph_conv_layers=params['graph_conv_layers'],
                                     mode='regression', dropout=params['dropout'], 
                                     batch_normalize=params['batch_normalize'], 
                                     batch_size=params['batch_size'], 
                                     dense_layer_size=params['dense_layer_size'],
                                     model_dir = './model'
                                )
model.fit(train, nb_epoch=params['epochs'])
p = ()
final = ()
for i in range(len(part)):
    p += (np.array(model.predict_on_batch(featurizer.featurize(smiles[i])).flatten()),)
    final+=(list(p[i]+feat[i]),)
print(rmsd(expt[0],final[0]),rmsd(expt[1],final[1]))

0.6433284438561003 1.0289539201929117
CPU times: user 33.4 s, sys: 5.45 s, total: 38.9 s
Wall time: 15.1 s


In [43]:
warnings.filterwarnings('ignore')
featurizer = dc.feat.ConvMolFeaturizer(per_atom_fragmentation=False)
reloaded_model = dc.models.GraphConvModel(n_tasks=1, graph_conv_layers=params['graph_conv_layers'],
                                     mode='regression', dropout=params['dropout'], 
                                     batch_normalize=params['batch_normalize'], 
                                     batch_size=params['batch_size'], 
                                     dense_layer_size=params['dense_layer_size'],
                                     model_dir = './model',
                                     nb_epoch=params['epochs']
                                )
reloaded_model.restore()

# print(rmsd(expt[0],final[0]),rmsd(expt[1],final[1]))

In [44]:
p = ()
final = ()
for i in range(len(part)):
    p += (np.array(reloaded_model.predict_on_batch(featurizer.featurize(smiles[i])).flatten()),)
    final+=(list(p[i]+feat[i]),)
print(rmsd(expt[0],final[0]),rmsd(expt[1],final[1]))

0.6433284444610164 1.0289539201929117
