In [None]:
# Load libraries
import numpy as np
import pandas as pd
import keras
from keras.models import Sequential, Model
from keras.layers import Input, Dense, LSTM, concatenate, Dropout
from keras.regularizers import l1, l2
from sklearn.model_selection import train_test_split
from imblearn.under_sampling import RandomUnderSampler

In [None]:
n_rounds= 3

In [None]:
file_path= f'Round{n_rounds}.csv'

In [None]:
data = pd.read_csv(file_path, header= None)

In [None]:
data

In [None]:
data.iloc[:,-1].value_counts()

In [None]:
X= data.iloc[:,:-1]
y= data.iloc[:,-1]
rus= RandomUnderSampler(random_state= 42)
X_resampled, y_resampled= rus.fit_resample(X, y)

In [None]:
y_resampled.value_counts()

In [None]:
X_resampled= X_resampled.to_numpy()
y_resampled= y_resampled.to_numpy()

In [None]:
X_resampled.shape

In [None]:
#one hot encoding
X_ohc= np.zeros(shape=(X_resampled.shape[0], X_resampled.shape[1], 256), dtype=np.int8)

In [None]:
X_ohc.shape

In [None]:
X_resampled[0]

In [None]:
for i in range(X_ohc.shape[0]):
    for j in range (X_ohc.shape[1]):
        X_ohc[i][j][X_resampled[i][j]]= 1

In [None]:
X_resampled= X_ohc

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X_resampled, y_resampled, test_size = 0.30, random_state = 10)
X_test, X_val, Y_test, Y_val = train_test_split(X_test, Y_test, test_size = 0.40, random_state = 10)

print(X_train.shape,Y_train.shape)
print(X_test.shape,Y_test.shape)
print(X_val.shape,Y_val.shape)

In [None]:
encoder_input= Input(shape = (len(X_train[0]), 256))
encoder= LSTM(256, return_state= True) 
d= encoder(encoder_input)
d= Dense(256, activation='relu')(concatenate([d[1], d[2]], axis=-1))
# d= Dropout(0.2)(d) # dense layers and dropouts are included as per reuirement. dropout is added in case of overfitting
d= Dense(128, activation='relu')(d)
# d= Dense(64, activation='relu')(d)
pred_class= Dense(1, activation='sigmoid')(d)

In [None]:
from keras.callbacks import EarlyStopping
from keras.losses import SparseCategoricalCrossentropy
model= Model(encoder_input, pred_class)
model.compile(optimizer=keras.optimizers.Adam(), loss=keras.losses.BinaryCrossentropy(), metrics=['accuracy'])
history = model.fit(X_train, Y_train, batch_size=150, epochs=120, validation_data=(X_test, Y_test), callbacks=[EarlyStopping(monitor="val_accuracy", min_delta= 0, patience= 5, verbose= 0, baseline= None, restore_best_weights= True, start_from_epoch=0)])

In [None]:
preds_val = model.predict(X_val)
preds_train= model.predict(X_train)

In [None]:
for i in range(len(preds_val)):
    preds_val[i] = 1 if (preds_val[i]>=0.5) else 0

for i in range(len(preds_train)):
    preds_train[i] = 1 if (preds_train[i]>=0.5) else 0

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [None]:
print(f"{accuracy_score(Y_train, preds_train) = }")
print(f"{accuracy_score(Y_val, preds_val) = }")

In [None]:
from sklearn import metrics
matrix = metrics.confusion_matrix(Y_val[:len(preds_val)], preds_val)
print(matrix)


In [None]:
TP=matrix[0][0]
FN=matrix[0][1]
FP=matrix[1][0]
TN=matrix[1][1]
TPR=TP/(TP+FN)
print(f'TPR={TPR}')
TNR=TN/(TN+FP)
print(f'TNR={TNR}')

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
print(confusion_matrix(Y_val, preds_val))