### Autoencoder model on CTG dataset
- Load Data
- Preprocess Data
- Define Autoencoder Model
- Train Model
- Generate predictions for Train and Test Sets

- Evaluate generated data on classification models

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import seaborn as sns

# Load Data
ctg_data = pd.read_csv('../CTGRawData.csv', sep=';')

In [2]:
# Preprocess Data
# Separate Features from labels (last column)
y = ctg_data['NSP']
X = ctg_data.drop('NSP',axis=1).iloc[:,:21]
from sklearn.model_selection import train_test_split
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)

In [3]:
# Normalization of data sets
# Data Scaling MinMax
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train_norm = X_train
X_test_norm = X_test

X_train_norm = pd.DataFrame(scaler.fit_transform(X_train_norm))
X_test_norm = pd.DataFrame(scaler.transform(X_test_norm))


In [4]:
# Check Shape of data
print(X_train_norm.head())
print(X_train_norm.shape)

# 21 features that we are going to try to reduce.

         0         1         2         3         4         5         6   \
0  0.592593  0.000000  0.000000  0.086957  0.360000  0.088235  0.109890   
1  0.685185  0.000000  0.000000  0.130435  0.733333  0.014706  0.065934   
2  0.759259  0.192308  0.000000  0.173913  0.320000  0.147059  0.000000   
3  0.851852  0.000000  0.000000  0.086957  0.600000  0.044118  0.472527   
4  0.407407  0.038462  0.003546  0.043478  0.026667  0.367647  0.000000   

         7      8    9   ...        11        12        13      14   15  \
0  0.179487  0.000  0.0  ...  0.112994  0.743119  0.275862  0.1250  0.0   
1  0.189349  0.000  0.0  ...  0.118644  0.770642  0.310345  0.0625  0.0   
2  0.094675  0.000  0.0  ...  0.220339  0.752294  0.448276  0.1250  0.0   
3  0.171598  0.000  0.0  ...  0.214689  0.669725  0.362069  0.2500  0.0   
4  0.000000  0.125  0.0  ...  0.485876  0.155963  0.293103  0.2500  0.0   

         16        17        18        19   20  
0  0.650794  0.623853  0.605505  0.003717  0.5  


In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
from typing import List

def loss(x: np.ndarray, x_bar: np.ndarray) -> float:
    return tf.losses.mean_squared_error(x, x_bar)

def grad(model: Model, inputs: np.ndarray):
    with tf.GradientTape() as tape:
        reconstruction, inputs_reshaped = model(inputs)
        loss_value = loss(inputs_reshaped, reconstruction)
    return loss_value, tape.gradient(loss_value, model.trainable_variables), inputs_reshaped, reconstruction

def train_model(model: Model):
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
    
    num_epochs = 10
    batch_size = 400

    for epoch in range(num_epochs):
        print("Epoch: ", epoch)
        for x in range(0, len(x_train), batch_size):
            x_inp = x_train[x : x + batch_size]
            loss_value, grads, inputs_reshaped, reconstruction = grad(model, x_inp)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))

        print("Step: {},Loss: {}".format(epoch, sum(loss(inputs_reshaped, reconstruction)/batch_size).numpy()))

In [None]:
# Define Autoencoder
class ComposeAutoEncoder(keras.models.Model):
    def __init__(self):
        super(ComposeAutoEncoder, self).__init__()
        
        # this is the size of our encoded representations
        encoding_dim = 32  # 32 floats -> compression of factor 24.5, assuming the input is 784 floats
        # this is our input placeholder
        input_img = layers.Input(shape=(28,28))

        self.flatten_input = layers.Flatten()(input_img)
        # "Encoder layers"
        self.encoder1 = layers.Dense(64, activation='relu')(self.flatten_input)
        self.encoder2 = layers.Dense(32, activation='relu')(self.encoder1)
        # "Encoder output" - "bottleneck"
        self.bottleneck = layers.Dense(encoding_dim, activation='relu')(self.encoder2)
        # this model maps an input to its encoded representation
        self.encoder = keras.models.Model(input_img, self.bottleneck, name='encoder')

        # "Decoder layers"
        # create a placeholder for an encoded (32-dimensional) input
        self.encoded_input = layers.Input(shape=(encoding_dim,))
        self.decoder1 = layers.Dense(32, activation='relu')(self.encoded_input)
        self.decoder2 = layers.Dense(64, activation='relu')(self.decoder1)
        # "decoded" is the lossy reconstruction of the input
        self.decoded_flatten = layers.Dense(784, activation='sigmoid')(self.decoder1)
        self.decoded = layers.Reshape((28,28))(self.decoded_flatten)
        # create the decoder model
        self.decoder = keras.models.Model(self.encoded_input, self.decoded, name='decoder')

    
    def call(self, inp):
        # full autoencoder for training
        encoder_model = self.encoder(inp)
        decoder_model = self.decoder(encoder_model)

        return decoder_model, inp