In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [2]:
seed = 42
tf.random.set_seed(seed)
rng = np.random.default_rng(seed=seed)

In [3]:
data = pd.read_csv('..\\Data\\ze41_mol_desc_db_red.csv', header=0, sep=';', decimal=',')

In [4]:
col_names = data.columns
x_cols = ['P_VSA_MR_5', 'Mor04m', 'E1p', 'CATS3D_02_AP', 'LUMO / eV']
X_full = data[col_names[3:]]
y = data[col_names[2]]

In [5]:
X_scaled = MinMaxScaler(feature_range=(0,1)).fit_transform(X_full)

In [6]:
X_train, X_valid, y_train, y_valid = train_test_split(X_scaled, y, test_size=0.2, random_state=seed)

In [7]:
model = keras.models.Sequential([
    keras.layers.Dense(50, activation='relu', input_shape=(len(x_cols),)),
    keras.layers.Dense(20, activation='relu'),
    keras.layers.Dense(10, activation='relu'),
    keras.layers.Dense(1)
])

In [8]:
model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.005),
    loss='mean_squared_error')

In [9]:
weights = model.get_weights()

In [10]:
results = []

In [11]:
x_col_idxs = [col_names.tolist().index(x) for x in x_cols]
x_col_idxs

[370, 657, 791, 1236, 1258]

In [12]:
X_train_sel = X_train[:, x_col_idxs]
X_val_sel = X_valid[:, x_col_idxs]
history = model.fit(X_train_sel, y_train, validation_data=(X_val_sel, y_valid), verbose=0, epochs=100)
results.append({'idxs': x_col_idxs, 'loss': history.history['loss'][-1], 'val loss': history.history['val_loss'][-1]})

In [13]:
for _ in range(25):
    model.set_weights(weights)
    idxs = rng.choice(len(col_names)-3, size=len(x_cols), replace=False)
    X_train_sel = X_train[:, idxs]
    X_val_sel = X_valid[:, idxs]
    history = model.fit(X_train_sel, y_train, validation_data=(X_val_sel, y_valid), verbose=0, epochs=100)
    results.append({'idxs': idxs, 'loss': history.history['loss'][-1], 'val loss': history.history['val_loss'][-1]})

In [14]:
results

[{'idxs': [370, 657, 791, 1236, 1258],
  'loss': 0.035716813057661057,
  'val loss': 0.038644127547740936},
 {'idxs': array([972, 552, 823, 112, 545], dtype=int64),
  'loss': 0.04792053997516632,
  'val loss': 0.045133695006370544},
 {'idxs': array([1227,  118,  661,  959,  926], dtype=int64),
  'loss': 0.04735381901264191,
  'val loss': 0.047686923295259476},
 {'idxs': array([ 629, 1054,  566,  466,  230], dtype=int64),
  'loss': 0.04228797182440758,
  'val loss': 0.06019053980708122},
 {'idxs': array([ 685,  286,  567,  557, 1033], dtype=int64),
  'loss': 0.0398203507065773,
  'val loss': 0.054395515471696854},
 {'idxs': array([ 208, 1078, 1040,  348,  795], dtype=int64),
  'loss': 0.0408223420381546,
  'val loss': 0.08544125407934189},
 {'idxs': array([1123,  980,  560, 1219,  853], dtype=int64),
  'loss': 0.052374254912137985,
  'val loss': 0.06363093852996826},
 {'idxs': array([ 55, 625, 687, 936, 194], dtype=int64),
  'loss': 0.02891695313155651,
  'val loss': 0.08723362535238266