In [None]:
from deepchem.utils.data_utils import load_from_disk
import deepchem as dc
import tensorflow as tf
from tensorflow.keras import models, layers
import train_Delaney

dataset_file= "delaney-processed.csv"

dataset = load_from_disk(dataset_file)

featurizer = dc.feat.CircularFingerprint(size=1024)
loader = dc.data.CSVLoader(tasks=["measured log solubility in mols per litre"], smiles_field="smiles", featurizer=featurizer)
dataset = loader.featurize(dataset_file)

Num_layer = 2
dim = [32,64]

#####  Data splitting into test:train = 2:8
splitter = dc.splits.RandomSplitter()
train_dataset, valid_dataset, test_dataset = splitter.train_valid_test_split(dataset, frac_train= .8, frac_valid = 0, frac_test= .2)

##### Data Normalization (zero mean, unit variance)
transformers = [dc.trans.NormalizationTransformer(transform_y=True, dataset=train_dataset)]

for dataset in [train_dataset, test_dataset]:
    for transformer in transformers:
        dataset = transformer.transform(dataset)


model, history = train_Delaney.train((train_dataset,test_dataset), Num_layer, dim)


In [None]:
loss, mae, mse = model.evaluate(test_dataset.X, test_dataset.y, verbose=2)
print("best RMSE: %f\nbest r2value: %f" % (mse, 1-(mse/test_dataset.y.var())))

In [None]:
model.summary()

In [None]:
##### Optimization plot of the FCNN with best set of parameters, and the best MSE value. 
##### Model_8 (2 hidden layers, 256 neurons each) seems to be the best model
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch

def plot_history(history):

  plt.figure(figsize=(8,12))

  plt.subplot(2,1,2)
  plt.xlabel('Epoch')
  plt.ylabel('Mean Square Error')
  plt.plot(hist['epoch'], hist['mse'],
           label='Train Error')
  plt.plot(hist['epoch'], hist['val_mse'],
           label = 'Val Error')
  plt.ylim([0,2])
  plt.legend()
  plt.show()

plot_history(history)
loss, mae, mse = model.evaluate(test_dataset.X, test_dataset.y, verbose=2)
print("best RMSE: %f\nbest r2value: %f" % (mse, 1-(mse/test_dataset.y.var())))