<a href="https://colab.research.google.com/github/AbdullahiOlapojoye/Machine-Learning-based-Infants-Sucking-Patterns-Classification/blob/main/LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Libraries
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import SimpleRNN, Bidirectional, LSTM, Dense, Dropout, BatchNormalization, Conv1D, MaxPooling1D, Flatten, GlobalAveragePooling1D
from keras.optimizers import Adam
from keras.models import load_model
from keras.callbacks import ModelCheckpoint
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# For the Visuals
import pandas as pd
import seaborn as sns
import matplotlib as mpl
from matplotlib import cm
import matplotlib.patches as patches
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.ticker import MaxNLocator
from matplotlib.offsetbox import AnnotationBbox, OffsetImage
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from matplotlib.patches import Rectangle
from IPython.display import display_html
plt.rcParams.update({'font.size': 16})
import plotly.graph_objects as go
#performance
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import precision_score, recall_score, confusion_matrix, precision_recall_fscore_support
from sklearn.metrics import roc_auc_score,auc,f1_score
from sklearn.metrics import precision_recall_curve,roc_curve

# **DATA PREPARATION**

In [None]:
# Function to read and process CSV files
def read_and_process_csv(file_path):
    df = pd.read_csv(file_path)
    data = df[['F1', 'F2']].values
    return data

# Function to read labels from the "targets.csv" file
def read_labels_from_targets(targets_file_path):
    df_targets = pd.read_csv(targets_file_path)
    return df_targets.values

# Function to generate data and labels from a list of CSV files
def generate_data_from_csv(file_list, targets_file_path):
    data_list = []

    for file_path in file_list:
        time_series_data = read_and_process_csv(file_path)
        data_list.append(time_series_data)

    labels = read_labels_from_targets(targets_file_path)

    return np.array(data_list), np.array(labels)

# Specify the path to your CSV files
path1 = "/content/drive/MyDrive/BMEN 6367/SENSOR_DATA_DL_m/Features/Subject_"
targets_file_path = "/content/drive/MyDrive/BMEN 6367/SENSOR_DATA_DL_V2/Target/Target.csv"
#files
csv_files = []
for i in range(1,41):
    file_path = path1 + str(i) + '.csv'
    print(file_path)
    csv_files.append(file_path)

# Generate data and labels
X, y = generate_data_from_csv(csv_files, targets_file_path)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# **MODELLING WITH LSTM**

In [None]:
import numpy as np
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Define your LSTM model_LSTM
model_LSTM = Sequential()
model_LSTM.add(LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2]), name="lstm_1"))
model_LSTM.add(LSTM(units=32, name="lstm_2"))
model_LSTM.add(Dense(1, activation='sigmoid', name="output"))

# Compile the model
model_LSTM.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Define 5-fold cross-validation
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Perform cross-validation
for train_index, val_index in kfold.split(X_train, y_train):
    X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
    y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]

    # Train the model on the current fold
    chk = ModelCheckpoint('best_model_LSTM.pkl', monitor='val_accuracy', save_best_only=True, mode='max', verbose=1)
    history = model_LSTM.fit(X_train_fold, y_train_fold, epochs=10, batch_size=32, callbacks=[chk], validation_data=(X_val_fold, y_val_fold))

    # Evaluate the model on the validation set of the current fold
    results = model_LSTM.evaluate(X_val_fold, y_val_fold)
    print(f'Fold Accuracy: {results[1]}')

#  Evaluate the overall performance of  model_LSTM on the test set
test_results = model_LSTM.evaluate(X_test, y_test)
print(f'Test Accuracy: {test_results[1]}')

In [None]:
#plot loss history
x=np.linspace(1,num_epochs,num_epochs).astype(int)
plt.plot(x, history.history["val_loss"], c='red', label='val_loss')
plt.plot(x, history.history["loss"], c='blue', label='train_loss')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Loss vs epochs")
plt.legend()
plt.show()
#plot accuracy
x=np.linspace(1,num_epochs,num_epochs).astype(int)
plt.plot(x, history.history["val_accuracy"], c='red', label='val_accuracy')
plt.plot(x, history.history["accuracy"], c='blue', label='train_accuracy')
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("Accuracy vs epochs")
plt.legend()
plt.show()

**Performance Evaluation**

In [None]:
Train_acc = model_LSTM_Best.predict(X_train)
y_pred_train = (Train_acc > 0.5).astype(int)
accuracy_train = accuracy_score(y_train, y_pred_train)
print("Training Accuracy:", accuracy)

In [None]:
# Evaluate the model on the test set
model_LSTM_Best = load_model('best_model_LSTM.pkl')
y_pred = model_LSTM_Best.predict(X_test)
y_pred_LSTM_Best = (y_pred > 0.5).astype(int)

#Classification Report
print(classification_report(y_test, y_pred_LSTM_Best))
print("Recall score  =",recall_score(y_test, y_pred_LSTM_Best))
print("Precision score =",precision_score(y_test, y_pred_LSTM_Best))
print("Accuracy score  =",accuracy_score(y_test, y_pred_LSTM_Best))
print("F score  =", f1_score(y_test, y_pred_LSTM_Best, average='weighted'))

# Create confusion matrix
cm = confusion_matrix(y_test, y_pred_LSTM_Best)
# Plot confusion matrix using seaborn
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Set1', xticklabels=['Healthy', 'Unhealthy'], yticklabels=['Healthy', 'Unhealthy'])
plt.title('Confusion Matrix (LSTM)')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

# Compute ROC curve and ROC area for each class
fpr, tpr, _ = roc_curve(y_test, y_pred)
roc_auc = auc(fpr, tpr)

# Plot ROC curve
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

In [None]:
#plot model architecture
import keras
from keras import models, layers

keras.utils.plot_model(model_LSTM_Best,
                       to_file="Best_lstm_model.png",
                       show_shapes=True,
                       show_dtype=False,
                       dpi=55,
                       show_layer_activations=True,
                       )

In [None]:
# Save predictions to a CSV file
predictions_df = pd.DataFrame({'Actual': y_test.flatten(), 'Predicted': y_pred.flatten()})
predictions_df.to_csv('LSTM_model_predictions_0.csv', index=False)

# # Download the CSV file to your local machine
from google.colab import files
files.download('LSTM_model_predictions_0.csv')