### Keras and Tensorflow Neural Network (NN)

In [1]:
import pandas as pd
from processing_utils import *

raw_dataset = pd.read_csv('./dataset/data.csv')

pruned_dataset = prune_dataset_lines(raw_dataset, remove_nan_lines=False, remove_nan_cols=True)
print("Raw dataset shape =", raw_dataset.shape, " Pruned dataset shape =", pruned_dataset.shape)

encoded_pruned_data = encode_smiles_column_of(pruned_dataset, 'count_encoding') # change to one_hot_encoding here
print("Encoded dataset shape =", encoded_pruned_data.shape)

X_train, y_train, X_test, y_test = return_required_data(
    encoded_pruned_data, 
    ['Energy_(kcal/mol)', 'Energy DG:kcal/mol)'], 
    normalize=True
)

Raw dataset shape = (39926, 24)  Pruned dataset shape = (12865, 6)
Encoded dataset shape = (12865, 35)


In [None]:
from tensorflow import keras

num_features = X_train.shape[1]

tiny_model = keras.Sequential([
    keras.layers.Dense(64, activation='elu', input_dim=33),
    keras.layers.Dense(2)
])

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
tiny_model.compile(loss='mse', optimizer='sgd', metrics=['mse'])


In [None]:
histories = {}
histories['Tiny'] = tiny_model.fit(X_train, y_train, epochs=150, batch_size=500, validation_data=(X_test, y_test))

In [None]:
import tensorflow_docs as tfdocs
import tensorflow_docs.plots

plotter = tfdocs.plots.HistoryPlotter(metric='mse')
plotter.plot(histories)


In [None]:

fat_model = keras.Sequential([
    keras.layers.Dense(512, activation='relu', input_dim=33),
    keras.layers.Dense(512, activation='relu', input_dim=512),
    keras.layers.Dense(512, activation='relu', input_dim=512),
    keras.layers.Dense(512, activation='relu', input_dim=512),
    keras.layers.Dense(2)
])

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
fat_model.compile(loss='mse', optimizer='sgd', metrics=['mse'])


In [None]:
histories['Fat'] = fat_model.fit(X_train, y_train, epochs=1000, batch_size=500, validation_data=(X_test, y_test))

In [None]:
import matplotlib.pyplot as plt

plt.yscale("log")
plotter = tfdocs.plots.HistoryPlotter(metric='mse')
plotter.plot(histories)

print(mean_squared_error(y_test[:10, :], fat_model.predict(X_test[:10, :])))