In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split, KFold
from imblearn.over_sampling import ADASYN
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.optimizers import Adam
from sklearn import metrics
import math

# Function to create LSTM model
def get_lstm():
    lstm_model = Sequential()
    lstm_model.add(LSTM(64, input_shape=(360, 1)))
    lstm_model.add(Dense(128, activation='relu'))
    lstm_model.add(Dropout(0.3))
    lstm_model.add(Dense(5, activation='softmax'))
    
    lstm_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    lstm_model.summary()
    return lstm_model


# Load data
df_train = pd.read_csv('C:/Users/OJO ABAYOMI MOSES/Documents/ECG-detection-main/mitbih_train.csv', header=None)
df_test = pd.read_csv('C:/Users/OJO ABAYOMI MOSES/Documents/ECG-detection-main/mitbih_test.csv', header=None)
df = pd.concat([df_train, df_test], axis=0)

X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, shuffle=True, stratify=y)

# Handle class imbalance
asy = ADASYN(random_state=42)
X_res, y_res = asy.fit_resample(X_train, y_train)

X_train = np.expand_dims(np.array(X_res), 2)
X_test = np.expand_dims(np.array(X_test), 2)
y_train = to_categorical(np.array(y_res))
y_test = to_categorical(np.array(y_test))

# Print shapes
print("X_train", X_train.shape)
print("y_train", y_train.shape)
print("X_test", X_test.shape)
print("y_test", y_test.shape)

# Initialize LSTM model
lstm = get_lstm()

# Learning rate scheduler
def exp_decay(epoch):
    initial_lrate = 3e-5
    k = 0.75
    t = X_train.shape[0] // (10000 * 500)  # assuming batch_size is 500
    lrate = initial_lrate * math.exp(-k * t)
    return lrate

lrate = LearningRateScheduler(exp_decay)

# Cross-validate
kf = KFold(5, shuffle=True, random_state=42)
oos_y = []
oos_pred = []

lstm_acc_per_fold = []
lstm_loss_per_fold = []
lstm_rmse_per_fold = []
fold = 0

for train, test in kf.split(X_train, y_train):
    fold += 1
    print(f"Fold #{fold}")
    
    x_train, y_train_fold = X_train[train], y_train[train]
    x_test, y_test_fold = X_train[test], y_train[test]
    
    history = lstm.fit(x_train, y_train_fold, validation_data=(x_test, y_test_fold), verbose=0, epochs=10, callbacks=[lrate])
    
    pred = lstm.predict(x_test)
    oos_y.append(y_test_fold)
    oos_pred.append(pred)  

    # RMSE with the validation test
    score = np.sqrt(metrics.mean_squared_error(pred, y_test_fold))
    lstm_rmse_per_fold.append(score)
    print(f"Fold score (RMSE): {score}")

    # LSTM evaluation scores with the validation test
    scores = lstm.evaluate(x_test, y_test_fold, verbose=0)
    print(f'Score for fold {fold}: {lstm.metrics_names[0]} of {scores[0]}; {lstm.metrics_names[1]} of {scores[1]*100}%')
    lstm_acc_per_fold.append(scores[1] * 100)
    lstm_loss_per_fold.append(scores[0])

# Average scores
lstm_average_acc = sum(lstm_acc_per_fold) / 5
lstm_average_loss = sum(lstm_loss_per_fold) / 5
lstm_average_rmse = sum(lstm_rmse_per_fold) / 5
print(f"Average Accuracy: {lstm_average_acc}")
print(f"Average Loss: {lstm_average_loss}")
print(f"Average RMSE: {lstm_average_rmse}")

# Evaluate on test data
score = lstm.evaluate(X_test, y_test)
print('Test Loss:', score[0])
print('Test accuracy:', score[1])

# Predict on test data
y_pred = lstm.predict(X_test, batch_size=1000)
print(classification_report(y_test.argmax(axis=1), y_pred.argmax(axis=1)))

# Confusion matrix and heatmap
y_true = [np.argmax(element) for element in y_test]
prediction_proba = lstm.predict(X_test)
prediction = np.argmax(prediction_proba, axis=1)
model_cf_matrix = confusion_matrix(y_true, prediction)
sns.heatmap(model_cf_matrix / np.sum(model_cf_matrix), annot=True, fmt='.3%', cmap='Blues')
plt.show()

# RMSE Score
score = np.sqrt(metrics.mean_squared_error(y_true, prediction))
print(f"RMSE Score: {score}")

# Save the model
lstm.save(r'C:\Users\OJO ABAYOMI MOSES\3D Objects\lstm_model.h5')
print('Saved the model to disk')


X_train (317133, 187, 1)
y_train (317133, 5)
X_test (32834, 187, 1)
y_test (32834, 5)


  super().__init__(**kwargs)


Fold #1
