In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [2]:
seed = 42
tf.random.set_seed(seed)

In [3]:
data = pd.read_csv('..\\Data\\ze41_mol_desc_db_red.csv', header=0, sep=';', decimal=',')

In [4]:
col_names = data.columns
X = data[col_names[3:]]
y = data[col_names[2]]

In [5]:
X_scaled = MinMaxScaler(feature_range=(0,1)).fit_transform(X)
X_train, X_valid, y_train, y_valid = train_test_split(X_scaled, y, test_size=0.1, random_state=seed)

In [6]:
model = keras.models.Sequential([
    keras.layers.GaussianNoise(stddev=0.1, input_shape=(X_train.shape[1],)),
    keras.layers.Dense(100, activation='relu'),
    keras.layers.Dense(50, activation='relu'),
    keras.layers.Dense(10, activation='relu'),
    keras.layers.Dense(1)
])

In [7]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gaussian_noise (GaussianNois (None, 1260)              0         
_________________________________________________________________
dense (Dense)                (None, 100)               126100    
_________________________________________________________________
dense_1 (Dense)              (None, 50)                5050      
_________________________________________________________________
dense_2 (Dense)              (None, 10)                510       
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 11        
Total params: 131,671
Trainable params: 131,671
Non-trainable params: 0
_________________________________________________________________


In [8]:
model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.005),
    loss='mean_squared_error')

In [9]:
history = model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [10]:
y_valid

0     0.328
5     0.896
36    0.817
45    0.765
13    0.893
54    0.733
Name: LinIE ZE41, dtype: float64

In [11]:
y_pred = model.predict(X_valid)
for i in range(y_pred.shape[0]):
    print('true: {}, predicted: {:.3f}'.format(y_valid.iloc[i], y_pred[i, 0]))

true: 0.328, predicted: 0.592
true: 0.896, predicted: 0.811
true: 0.817, predicted: 0.846
true: 0.765, predicted: 0.728
true: 0.893, predicted: 0.978
true: 0.733, predicted: 0.751


In [12]:
for i in range(y_pred.shape[0]):
    print('true: {:.0f}, predicted: {:.0f}'.format(y_valid.iloc[i]*345-270, y_pred[i, 0]*345-270))

true: -157, predicted: -66
true: 39, predicted: 10
true: 12, predicted: 22
true: -6, predicted: -19
true: 38, predicted: 67
true: -17, predicted: -11
