In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [2]:
seed = 42
tf.random.set_seed(seed)
rng = np.random.default_rng(seed=seed)

In [3]:
data = pd.read_csv('..\\Data\\ze41_mol_desc_db_red.csv', header=0, sep=';', decimal=',')

In [4]:
col_names = data.columns
x_cols = ['P_VSA_MR_5', 'Mor04m', 'E1p', 'Mor22s', 'LUMO / eV']
#x_cols = ['VE2_G/D', 'Eig14_EA(dm)', 'Mor31m', 'TDB04u', 'HATS1e']
X_full = data[col_names[3:]]
y = data[col_names[2]]

In [5]:
X_scaled = MinMaxScaler(feature_range=(0,1)).fit_transform(X_full)

In [6]:
X_train, X_valid, y_train, y_valid = train_test_split(X_scaled, y, test_size=0.1, random_state=seed)

In [7]:
def get_model():
    model = keras.models.Sequential([
        keras.layers.Dense(50, activation='relu', input_shape=(len(x_cols),)),
        keras.layers.Dense(20, activation='relu'),
        keras.layers.Dense(10, activation='relu'),
        keras.layers.Dense(1)
        ])
    model.compile(
        optimizer=tf.optimizers.Adam(learning_rate=0.01),
        loss='mean_squared_error')
    return model

In [8]:
model = get_model()

In [9]:
weights = model.get_weights()

In [10]:
results = []

In [11]:
x_col_idxs = [col_names.tolist().index(x) for x in x_cols]
x_col_idxs

[370, 657, 791, 758, 1258]

In [12]:
X_train_sel = X_train[:, x_col_idxs]
X_val_sel = X_valid[:, x_col_idxs]
history = model.fit(X_train_sel, y_train, validation_data=(X_val_sel, y_valid), verbose=0, epochs=25)
results.append({'idxs': x_col_idxs, 'loss': history.history['loss'][-1], 'val loss': history.history['val_loss'][-1]})

In [13]:
for i in range(100):
    if i%10 == 9:
        print(i+1)
    model = get_model()
    model.set_weights(weights)
    idxs = rng.choice(len(col_names)-3, size=len(x_cols), replace=False)
    X_train_sel = X_train[:, idxs]
    X_val_sel = X_valid[:, idxs]
    history = model.fit(X_train_sel, y_train, validation_data=(X_val_sel, y_valid), verbose=0, epochs=25)
    results.append({'idxs': idxs, 'loss': history.history['loss'][-1], 'val loss': history.history['val_loss'][-1]})

10
20
30
40
50
60
70
80
90
100


In [14]:
#results

In [15]:
y_valid

0     0.328
5     0.896
36    0.817
45    0.765
13    0.893
54    0.733
Name: LinIE ZE41, dtype: float64

In [16]:
val_losses = [r['val loss'] for r in results]

In [17]:
val_losses = sorted(val_losses)

In [18]:
val_losses.index(results[0]['val loss'])

62

In [19]:
len(val_losses)

101

In [20]:
#val_losses

In [21]:
np.mean(val_losses)

0.03288356870375943

In [22]:
results[0]['val loss']

0.0349186472594738

In [23]:
np.min(val_losses)

0.007308712229132652