In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [2]:
seed = 42
tf.random.set_seed(seed)
rng = np.random.default_rng(seed=seed)

In [3]:
data = pd.read_csv('..\\Data\\ze41_mol_desc_db_red.csv', header=0, sep=';', decimal=',')

In [4]:
col_names = data.columns
x_cols = ['MW', 'AMW', 'Mv', 'Mi', 'nTA', 'RBF', 'nDB']
X_full = data[col_names[3:]]
y = data[col_names[2]]

In [5]:
X_scaled = MinMaxScaler(feature_range=(0,1)).fit_transform(X_full)

In [6]:
X_train, X_valid, y_train, y_valid = train_test_split(X_scaled, y, test_size=0.2, random_state=seed)

In [7]:
model = keras.models.Sequential([
    keras.layers.Dense(50, activation='relu', input_shape=(len(x_cols),)),
    keras.layers.Dense(20, activation='relu'),
    keras.layers.Dense(10, activation='relu'),
    keras.layers.Dense(1)
])

In [8]:
model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.005),
    loss='mean_squared_error')

In [9]:
weights = model.get_weights()

In [10]:
results = []

In [12]:
x_col_idxs = [col_names.tolist().index(x) for x in x_cols]
x_col_idxs

[3, 4, 5, 6, 7, 8, 9]

In [18]:
for _ in range(25):
    model.set_weights(weights)
    idxs = rng.choice(len(col_names)-3, size=len(x_cols), replace=False)
    X_train_sel = X_train[:, idxs]
    X_val_sel = X_valid[:, idxs]
    history = model.fit(X_train_sel, y_train, validation_data=(X_val_sel, y_valid), verbose=0, epochs=25)
    results.append({'idxs': idxs, 'loss': history.history['loss'][-1], 'val loss': history.history['val_loss'][-1]})

[{'idxs': array([ 797, 1140,  365,  254,  782,  648, 1195], dtype=int64),
  'loss': 0.15129490196704865,
  'val loss': 0.1431145966053009},
 {'idxs': array([ 546,  374, 1123, 1218,  190, 1248,   56], dtype=int64),
  'loss': 0.06314419955015182,
  'val loss': 0.06967666745185852},
 {'idxs': array([971, 422, 145, 397, 651, 589,  27], dtype=int64),
  'loss': 0.15063151717185974,
  'val loss': 0.13067598640918732},
 {'idxs': array([ 476, 1178,  329,    6, 1193,  303,  936], dtype=int64),
  'loss': 0.1608635038137436,
  'val loss': 0.14364568889141083},
 {'idxs': array([ 905,  936,  461,  753, 1101,   56,  224], dtype=int64),
  'loss': 0.06778114289045334,
  'val loss': 0.07215940207242966},
 {'idxs': array([ 628,  181,  545, 1218, 1176,   17,  626], dtype=int64),
  'loss': 0.2004784196615219,
  'val loss': 0.1954578012228012},
 {'idxs': array([ 152,  965,  874,  636, 1215,  372, 1166], dtype=int64),
  'loss': 0.09722306579351425,
  'val loss': 0.08160001784563065},
 {'idxs': array([ 155,  