In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [2]:
seed = 13
tf.random.set_seed(seed)

In [3]:
data = pd.read_csv('..\\Data\\ze41_mol_desc_db_red.csv', header=0, sep=';', decimal=',')

In [4]:
col_names = data.columns
X = data[col_names[3:]].astype('float32')
y = data[col_names[2]].astype('float32')

In [5]:
X_scaled = MinMaxScaler(feature_range=(0,1)).fit_transform(X)

In [6]:
X_train, X_valid, y_train, y_valid = train_test_split(X_scaled, y, test_size=0.1, random_state=seed)
X_train = pd.DataFrame(X_train)
X_train.columns = X.columns

In [33]:
class Autoencoder(keras.models.Model):
    def __init__(self, latent_dim=5):
        super(Autoencoder, self).__init__()
        self.latent_dim = latent_dim   
        self.encoder = tf.keras.Sequential([
            keras.layers.Dense(150, activation='linear'),
            keras.layers.LeakyReLU(),
            keras.layers.Dense(latent_dim, activation='linear'),
            keras.layers.LeakyReLU(),
        ])
        self.decoder = tf.keras.Sequential([
            keras.layers.Dense(150, activation='linear'),
            keras.layers.LeakyReLU(),
            keras.layers.Dense(len(X_train.columns), activation='linear')
        ])
    
    def call(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [34]:
autoencoder = Autoencoder(latent_dim=63)

In [35]:
autoencoder.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.001),
    loss='mean_squared_error')

In [36]:
history = autoencoder.fit(X_train, X_train, validation_data=(X_valid, X_valid), epochs=75)

Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
Epoch 24/75
Epoch 25/75
Epoch 26/75
Epoch 27/75
Epoch 28/75
Epoch 29/75
Epoch 30/75
Epoch 31/75
Epoch 32/75
Epoch 33/75
Epoch 34/75
Epoch 35/75
Epoch 36/75
Epoch 37/75
Epoch 38/75
Epoch 39/75
Epoch 40/75
Epoch 41/75
Epoch 42/75
Epoch 43/75
Epoch 44/75
Epoch 45/75
Epoch 46/75
Epoch 47/75
Epoch 48/75
Epoch 49/75
Epoch 50/75
Epoch 51/75
Epoch 52/75
Epoch 53/75
Epoch 54/75
Epoch 55/75
Epoch 56/75
Epoch 57/75
Epoch 58/75
Epoch 59/75
Epoch 60/75
Epoch 61/75
Epoch 62/75
Epoch 63/75
Epoch 64/75
Epoch 65/75
Epoch 66/75
Epoch 67/75
Epoch 68/75
Epoch 69/75
Epoch 70/75
Epoch 71/75
Epoch 72/75
Epoch 73/75
Epoch 74/75
Epoch 75/75


In [37]:
encoded = autoencoder.encoder(X_train.to_numpy()).numpy()

In [38]:
np.min(encoded, axis=0), np.mean(encoded, axis=0), np.max(encoded, axis=0)

(array([-1.1270185 , -0.6010805 , -0.61468315, -1.0542951 , -0.59032327,
        -0.4601769 ,  0.30387294,  0.03831662, -0.67637944, -0.29935572,
        -0.12478977, -1.0921764 , -0.671218  , -1.3110518 , -0.25543138,
        -0.20615743, -0.3036142 , -1.0204569 , -0.18818992, -0.62748957,
        -0.95204514, -0.39997703, -0.22898522, -0.13860826, -0.06297753,
        -1.0264585 , -0.800119  , -0.75881326,  2.0023322 , -0.76050377,
        -0.1914937 , -0.94602823, -0.8972123 , -0.21145995, -0.9521343 ,
        -0.8701328 , -0.15694396, -0.16717398, -0.447404  , -0.7705862 ,
         0.19623733, -0.16675119, -1.041593  , -0.29204786,  0.19773118,
        -0.4143301 , -0.22566482, -0.38851857, -0.23464409, -0.7208832 ,
        -0.4243733 , -0.15748309, -0.49535978, -0.19403328,  0.15137514,
        -0.27652723, -0.2279736 , -0.6726384 , -0.36468208, -0.7506586 ,
        -0.02690163, -1.1793337 , -0.6645397 ], dtype=float32),
 array([-0.8039892 , -0.1679984 ,  0.7191011 , -0.24162932, 

In [31]:
np.where(np.all(np.isclose(encoded, 0), axis=0))[0]

array([], dtype=int64)

In [32]:
encoded[3, :]

array([ 0.4169174 , -0.18855493, -0.64100254, -0.229885  , -0.33093712,
       -0.1857523 ,  0.36452618, -0.26486614,  1.0547218 ,  0.96784663,
       -0.33118272,  2.5293264 ,  0.8696953 , -0.01665194, -0.3071468 ,
        1.5978403 , -0.86523074,  0.8250886 ,  0.83584434,  0.09430908,
        0.7158391 , -0.14679645,  1.2907906 ,  0.58988464, -0.9638031 ,
        1.4717908 , -0.21043524,  1.0412288 , -0.33622587,  1.4674118 ,
       -0.95475924,  0.22502956,  1.2308071 , -0.32858747,  1.2273735 ,
       -0.52595603,  0.8984458 , -0.16824324, -0.54150295, -0.5780772 ,
        0.4244134 , -0.05069023,  0.06642459, -0.04441881, -0.10388943,
       -0.6034974 , -0.67719936, -0.05721005,  1.4160762 , -0.46924374,
       -0.51322293, -0.7776545 ,  2.9653423 ,  1.3085799 , -0.48950812,
       -0.22778118, -0.29817116,  1.146699  , -0.9479503 , -0.24659275,
       -0.64653736,  2.7394383 ,  2.3352356 ], dtype=float32)

In [15]:
encoded.shape

(54, 10)