# Imports


In [None]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization

# from sklearn.model_selection import GroupShuffleSplit
# from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
import seaborn as sn

# Load Data


In [None]:
path = "/content/drive/MyDrive/Thesis/Datasets/Turbofan_Dataset/final_datasets_normalized/"

In [None]:
# Load data and drop irrelevant columns

df_train = pd.read_csv(path + "TRAINING_SET_FULL.csv")
df_test = pd.read_csv(path + "TEST_SET_FULL.csv")

drop_cols = ["cycle","setting3","s1","s5","s10","s16","s18","s19","RUL"]
corr_cols = ["s11","s4","s15","s17","s2","s3","s8","s13","s9","s14","s12","s7","s20"]
feature_cols = ['cycle_norm', 'setting1', 'setting2', 's2', 's3', 's4', 's6', 's7',
       's8', 's9', 's11', 's12', 's13', 's14', 's15', 's17', 's20', 's21']
prediction_col = 'fail_30'

train_set = df_train.drop(drop_cols, axis=1)
test_set = df_test.drop(drop_cols, axis=1)

In [None]:
# Move cycle_norm column first for convenience

column_to_move = train_set.pop("cycle_norm")
train_set.insert(0, "cycle_norm", column_to_move)

column_to_move = test_set.pop("cycle_norm")
test_set.insert(0, "cycle_norm", column_to_move)

In [None]:
# Shuffle rows
train_set = train_set.sample(frac=1)

In [None]:
# Use 80% for training and 20% for validation

valid_split = round(train_set.shape[0] * 0.8)
valid_split

16505

In [None]:
X_train = train_set.iloc[:valid_split, 0:-1].to_numpy()
y_train = train_set.iloc[:valid_split, -1].to_numpy()

X_valid = train_set.iloc[valid_split:, 0:-1].to_numpy()
y_valid = train_set.iloc[valid_split:, -1].to_numpy()

X_test = test_set.iloc[:, 0:-1].to_numpy()
y_test = test_set.iloc[:, -1].to_numpy()

# Define Model

In [None]:
# Defining LSTM model

model = Sequential()
model.add(Dense(64, activation="relu"))
model.add(BatchNormalization())
model.add(Dense(64, activation="relu"))
model.add(BatchNormalization())
model.add(Dropout(0.1))
model.add(Dense(1, activation='sigmoid'))


# Training

In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics='binary_accuracy')  # the model is recompiled to reset the optimizer
# model.load_weights('simple_lstm_weights.h5')  # weights are reloaded to ensure reproducible results

history = model.fit(X_train, y_train,
                    validation_data=(X_valid, y_valid),
                    epochs=25,
                    batch_size=32)

NameError: ignored

# Evaluation


In [None]:
def plot_loss(fit_history):
    plt.figure(figsize=(5,5))
    plt.plot(range(1, len(fit_history.history['loss'])+1), fit_history.history['loss'], label='train')
    plt.plot(range(1, len(fit_history.history['val_loss'])+1), fit_history.history['val_loss'], label='validate')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

plot_loss(history)

In [None]:
def plot_binary_accuracy(fit_history):
    plt.figure(figsize=(5,5))
    plt.plot(range(1, len(fit_history.history['binary_accuracy'])+1), fit_history.history['binary_accuracy'], label='train')
    plt.plot(range(1, len(fit_history.history['val_binary_accuracy'])+1), fit_history.history['val_binary_accuracy'], label='validate')
    plt.xlabel('Epochs')
    plt.ylabel('binary_accuracy')
    plt.legend()
    plt.show()

plot_binary_accuracy(history)

In [None]:
target_names = ['healthy', 'worn_out']

In [None]:
# Initial
# Train Score
y_hat_train = model.predict(X_train)
y_hat_int_train = y_hat_train.reshape(y_hat_train.shape[0]).round().astype(int)
label_array = y_train.reshape(y_train.shape[0]).astype(int)

print(classification_report(y_hat_int_train, y_train, target_names=target_names))

In [None]:
# Initial
# Test set score

y_hat_test = model.predict(X_test)
y_hat_int_test = y_hat_test.reshape(y_hat_test.shape[0]).round().astype(int)
test_label_int = y_test.astype(int)

print(classification_report(y_hat_int_test, test_label_int, target_names=target_names))

In [None]:
mtx = confusion_matrix(test_label_int, y_hat_int_test)
mtx_norm = confusion_matrix(test_label_int, y_hat_int_test, normalize='true')

In [None]:
hm = sn.heatmap(mtx, annot=True, fmt='g')
hm.set(xlabel='Predicted Value', ylabel='Actual Value')
hm

In [None]:
hm_norm = sn.heatmap(mtx_norm, annot=True, fmt='g')
hm_norm.set(xlabel='Predicted Value', ylabel='Actual Value')
hm_norm