In [178]:
import numpy as np
import pandas as pd

from tensorflow import keras
import keras_tuner
from keras.models import Model
from keras import layers
from keras import Input
from keras.layers import Dense, LeakyReLU, ReLU, Conv1D
from tensorflow.keras.utils import plot_model 
import matplotlib.pyplot as plt
from keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler
rng = np.random.default_rng(12345)

In [168]:
def prep_signs():
    """
    reads train_signs and turns it into time series data with 30 columns and 24 hours for each person
    """
    
    df = pd.read_csv('train/train_signs.csv')

    df['charttime'] = pd.to_datetime(df['charttime'])

    #Creates new column 'firsttime' which is the time of first row for each patient
    first_time_row = df.groupby('patient_id')['charttime'].first()
    df['firsttime'] = df['patient_id'].map(first_time_row)
    # Sets the index as the time from the first reading so all patients start at 0 and go toward 24 hours
    df = df.set_index(df['charttime'] - df['firsttime'])
    df = df.drop(['charttime','firsttime'],axis=1)
    # Resamples data so all patients have exactly 24 hours
    df = df.groupby('patient_id').resample('h').mean()
    df = df.reindex(pd.MultiIndex.from_product([df.index.levels[0],pd.timedelta_range(start='00:00:00', end='23:00:00', freq='1h')]))
    df = df.groupby(level=['patient_id']).ffill().bfill()
    # Fills NA with zero, na here means a patient that never had a certain measurment taken
    df[df.isna()] = 0
    
    num_patients = len(df.index.levels[0])
    num_cols = len(df.columns)
    d = np.array(df)
    # patients * cols (30) * hours (24)
    d=np.reshape(d,(num_patients,num_cols,-1))
    d = np.swapaxes(d,1,2)
    
    X_train,X_test = np.split(rng.permutation(d),[int(d.shape[0]*0.8)],axis=0)

    scaler = StandardScaler()
    timesteps = X_train.shape[1]
    dim = X_train.shape[2]
    X_train = scaler.fit_transform(X_train.reshape(X_train.shape[0]*X_train.shape[1],-1))
    X_test = scaler.transform(X_test.reshape(-1,dim))


    X_train = X_train.reshape(-1,timesteps,dim)
    X_test = X_test.reshape(-1,timesteps,dim)
    return X_train, X_test

In [184]:
def RNN_DAE_dimensionality_reducer(X_train,X_test,dim=30):
    """
        Reduces the dimension of X to whatever dim is given using Denoising Auto Encoder, this can lead to worse performance if dim is choosen poorly
    """
    # Specify how much noise to add
    level_of_noise=0.5

    # Add random noise based on sampling from Gaussian distribution
    X_train_noisy = X_train + level_of_noise * np.random.normal(loc=0.0, scale=1.0, size=X_train.shape)

    timesteps = X_train.shape[1]
    input_dim = X_train.shape[2] 
    latent_dim = dim

    inputs = keras.Input(shape=(timesteps, input_dim))
    encoded = layers.LSTM(latent_dim)(inputs)
    middle = layers.Dense(latent_dim)(encoded)

    decoded = layers.RepeatVector(timesteps)(middle)
    decoded = layers.LSTM(input_dim, return_sequences=True)(decoded)

    sequence_autoencoder = keras.Model(inputs, decoded,name='RNN-DAE')
    encoder = keras.Model(inputs, middle,name="Encoder")
    
    opt = keras.optimizers.Adam(learning_rate=0.001)
    sequence_autoencoder.compile(optimizer=opt, loss='mse')

    history = sequence_autoencoder.fit(X_train, X_train, epochs=1000, batch_size=128, verbose=1, validation_split=0.2)
    
    # Plot a loss chart
    display(sequence_autoencoder.summary())
    fig, ax = plt.subplots(figsize=(16,9), dpi=300)
    plt.title(label='DAE Model loss by Epoch', loc='center')
    ax.plot(history.history['loss'], label='Training Data', color='black')
    ax.plot(history.history['val_loss'], label='Test Data', color='red')
    ax.set(xlabel='Epoch', ylabel='Loss')
    plt.xticks(ticks=np.arange(len(history.history['loss'])), labels=np.arange(1, len(history.history['loss'])+1))
    plt.legend()

    plt.show()
    

    return encoder.predict(X_train), encoder.predict(X_test)

In [182]:
X_train,X_test = prep_signs()

In [187]:
X_train_latent, X_test_latent = RNN_DAE_dimensionality_reducer(X_train,X_test,30)

Epoch 1/1000
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - loss: 0.8546 - val_loss: 1.1819
Epoch 2/1000
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 0.9101 - val_loss: 1.1386
Epoch 3/1000
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 0.8768 - val_loss: 1.1183
Epoch 4/1000
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 0.8437 - val_loss: 1.1050
Epoch 5/1000
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 0.8434 - val_loss: 1.0953
Epoch 6/1000
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 0.7682 - val_loss: 1.0891
Epoch 7/1000
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 0.9226 - val_loss: 1.0831
Epoch 8/1000
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 0.7681 - val_loss: 1.0789
Epoch 9/1000
[1m69/69[0m [32m

KeyboardInterrupt: 

In [166]:
X_train_latent

NameError: name 'X_train_latent' is not defined