In [None]:
# import libraries
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler, scale
from sklearn.model_selection import train_test_split

import seaborn as sns
sns.set(color_codes=True)
import matplotlib.pyplot as plt
%matplotlib inline
from keras.layers import Lambda, Input, Dense
from sklearn.decomposition import PCA


from numpy.random import seed
import tensorflow as tf
tf.random.set_seed(10)
import keras
from keras.layers import Input, Dropout, Dense, LSTM, TimeDistributed, RepeatVector
from keras.models import Model, Sequential
from keras import regularizers
from keras.losses import MeanSquaredLogarithmicError
from keras import backend as K

import warnings
warnings.filterwarnings('ignore')

In [None]:
#Merging all the normal batches
data_dir = '/content/drive/MyDrive/Colab Notebooks/PFP/PFP/Normal'

normal_batch = pd.DataFrame()

for filename in os.listdir(data_dir):

    dataset = pd.read_csv(os.path.join(data_dir, filename))
    dataset.fillna(0, inplace=True)
    normal_batch = normal_batch.append(dataset)

In [None]:
normal_batch_shuffled=normal_batch.sample(n=96695)

In [None]:
#dropping unnecessary columns
col_drop=['Time (h)',
          'Agitator RPM(RPM:RPM)',
          'Ammonia shots(NH3_shots:kgs)',
          'Fault reference(Fault_ref:Fault ref)',
          '0 - Recipe driven 1 - Operator controlled(Control_ref:Control ref)',
          '1- No Raman spec',
          ' 1-Raman spec recorded',
          'Batch reference(Batch_ref:Batch ref)',
          '2-PAT control(PAT_ref:PAT ref)',
          'Batch ID',
          'Fault flag']


normal_batch1=normal_batch_shuffled.drop(col_drop, axis=1)

In [None]:
normal_batch2=StandardScaler().fit_transform(normal_batch1)
normal_batch3=pd.DataFrame(normal_batch2)


In [None]:
def create_sequences(X, time_steps=5):
    Xs = []
    for i in range(0, len(X)-time_steps,2):
        Xs.append(X.iloc[i:(i+time_steps)].values)
    
    return np.array(Xs)

In [None]:
X_train=create_sequences(normal_batch3)
X_train.shape


(48345, 5, 28)

In [None]:
#Final GRU Model (90% Avg FDR || model = GRU_AE_test1.h5)
inputs = Input(shape=(X_train.shape[1], X_train.shape[2]))

eL0 = GRU(32, activation='tanh', return_sequences=True,recurrent_activation="sigmoid",
          kernel_initializer="glorot_uniform",
          recurrent_initializer='orthogonal',
          recurrent_regularizer=regularizers.l2(0.001),
          kernel_regularizer=regularizers.l2(0.001))(inputs)

eL1 = GRU(16, activation='tanh', return_sequences=True,
          recurrent_activation="sigmoid", 
          recurrent_initializer='orthogonal',
          kernel_initializer="glorot_uniform", 
          recurrent_regularizer=regularizers.l2(0.001),
          kernel_regularizer=regularizers.l2(0.001))(eL0)

eL2 = GRU(8, activation='tanh', return_sequences=False,
          recurrent_activation="sigmoid", 
          recurrent_initializer='orthogonal',
          kernel_initializer="glorot_uniform", 
          recurrent_regularizer=regularizers.l2(0.001),
          kernel_regularizer=regularizers.l2(0.001))(eL1)

h = RepeatVector(X_train.shape[1])(eL2)

#dL2 = GRU(8, activation='tanh', return_sequences=True)(h)

dL3 = GRU(16, activation='tanh', return_sequences=True)(h)

dL4 = GRU(32, activation='tanh', return_sequences=True)(dL3)

output = TimeDistributed(Dense(X_train.shape[2]))(dL4)

model = Model(inputs=inputs, outputs=output)

#plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)
model.summary()

In [None]:
# fit the model to the data
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)
nb_epochs = 300
batch_size = 512
opt = keras.optimizers.Adam(learning_rate=0.001)
#model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
model.compile(optimizer=opt, loss='log_cosh', metrics=['accuracy'])

history = model.fit(X_train, X_train, 
                    epochs=nb_epochs, 
                    batch_size=batch_size,
                    validation_split=0.3, 
                    callbacks=[callback]
                    ).history
model.save('/content/drive/MyDrive/Colab Notebooks/PFP/PFP/Saved Model/GRU_AE_for_XGBoost.h5')


# plot the training losses
fig, ax = plt.subplots(figsize=(6, 4), dpi=80)
ax.plot(history['loss'], 'b', label='Train', linewidth=2)
ax.plot(history['val_loss'], 'r', label='Validation', linewidth=2)
ax.set_title('Model loss', fontsize=16)
ax.set_ylabel('Loss (mse)')
ax.set_xlabel('Epoch')
ax.legend(loc='upper right')
#plt.savefig('/content/drive/MyDrive/Colab Notebooks/PFP/Figures/GRU_AE_50K.pdf')
plt.show()