In [None]:
!pip install tsfel

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import tsfel
import warnings
warnings.filterwarnings("ignore")
from imblearn.under_sampling import RandomUnderSampler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix

from tensorflow.keras import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Conv1D, MaxPooling1D, Flatten, Dense, Input, Dropout, LSTM, Bidirectional, Activation, RepeatVector, Permute, Multiply, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import AUC
from tensorflow.keras.callbacks import EarlyStopping

import time

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Configuration

`FILE_PATH`: file containing the data. \
`FS`: the hertz used in the file. \
`GROUP`: The ID of the group to run. \
`OVERLAP`: overlap value used for segmentation. Between 0 and 1. \
`FLAG_SPLIT`: 0 - intra-subject, 1 - inter-subject, 2-inter-session. \
`LR`: Learning rate to use for models. \
`BATCH_SIZE`: Batch size to use for models. \
`EPOCHS`: Epochs to use for models.



In [None]:
FILE_PATH = '/content/drive/UiS4ADL-100hz.csv'
FS = 100

GROUP = 0
OVERLAP = 0.5
FLAG_SPLIT = 2

LR = 0.00001
BATCH_SIZE = 8
EPOCHS = 200

## Load the Data

In [None]:
data = pd.read_csv(FILE_PATH)
data = data.drop(columns='timestamp', axis=1)

### Data to drop
Dropping subjects data because of incorrect data recording. \
**Change variable `subjects_to_drop` accordingly to goal.**

In [None]:
subjects_to_drop = [1727,1826,2097]
to_drop = data[data['subject'].isin(subjects_to_drop)]
data.drop(to_drop.index, inplace=True)
data.reset_index(drop=True, inplace=True)
data

### Analysis if there's missing data, and drop it if there's

In [None]:
to_drop = data[data.isna().any(axis=1)]
print(to_drop.subject.unique(),to_drop.session.unique(),to_drop.adl.unique())

In [None]:
data.drop(to_drop.index,inplace=True)
data.reset_index(drop=True, inplace=True)
data

## Choose group

**Change GROUP variables accordingly to analysis.**

In [None]:
if GROUP == 0:
    adls_in_group = [1, 3, 4, 7, 10, 11, 12, 13, 14, 16, 19, 20]
    w = 2 # window size (number of samples)
if GROUP == 1:
    adls_in_group = [6, 8, 9, 18, 21, 24]
    w = 5 # window size (number of samples)
if GROUP == 2:
    adls_in_group = [2, 5, 15, 17, 22, 23]
    w = 10 # window size (number of samples)

data = data[data['adl'].isin(adls_in_group)]
data.reset_index(drop=True, inplace=True)

data

## Downsample to 32 Hz

In [None]:
FS = 32
# Define a function for downsampling
def downsample_group(group):
    return group.iloc[::3]

# Apply the downsampling function separately for each 'fileID'
data = data.groupby('fileID').apply(downsample_group)

# Reset index if needed
data.reset_index(drop=True, inplace=True)
data

## Feature extraction

**Change variable `directory` and `filename` accordingly to filepath of segment / where to save it.**

In [None]:
directory = "/content/drive/My Drive/MASTER/SEGMENT_2/"
filename = "UiS4ADL_seg_w_" + str(w) + "ov_" + str(int(OVERLAP*100)) + ".csv"

In [None]:
def save_dataframe_to_csv(directory, filename, df):

    # Check if the file exists in the directory
    file_path = os.path.join(directory, filename)
    # Save the DataFrame as CSV
    df.to_csv(file_path, mode='a', index=False, header= not os.path.exists(file_path))
    print(f"DataFrame saved as {filename} in {directory}")

file_path = os.path.join(directory, filename)

if not os.path.exists(file_path):
  #Use statistical features
  cfg_file = tsfel.get_features_by_domain('statistical')

  for window in [w]:
    for o in [OVERLAP]:
      windows_size = int(window * FS)
      for fileID in data.fileID.unique():
        #Get unique files.
        tmp = data[data.fileID==fileID].iloc[0:,0:-4]

        #The window size is sufficient.
        if tmp.shape[0]>=windows_size:

          #Extract features.
          tmp_features = tsfel.time_series_features_extractor(cfg_file, tmp, fs = FS, window_size=windows_size, overlap=o, header_names = tmp.columns.values, verbose=False, n_jobs = -1)

          #Add ADL, Session, Subject and fileID to dataframe.
          tmp_features['adl'] = data[data.fileID==fileID].iloc[0,-4]
          tmp_features['session'] = data[data.fileID==fileID].iloc[0,-3]
          tmp_features['subject'] = data[data.fileID==fileID].iloc[0,-2]
          tmp_features['fileID'] = fileID

          #Save file to csv
          print('------------------------------------------------------------------------------------------------------')
          print('Extracted Features from Series ' + str(fileID) + ' and window ' + str(w))
          print('------------------------------------------------------------------------------------------------------')
          save_dataframe_to_csv(directory, filename, tmp_features)
          print('------------------------------------------------------------------------------------------------------')
        else:
          print('FileID --> ' + str(fileID) + ' it to short. Only '+ str(tmp.shape[0]) + ' rows (aka. ' + str(tmp.shape[0]/FS) +' seconds).')
  data = pd.read_csv(directory + filename)
else:
  data = pd.read_csv(directory + filename)


In [None]:
display(data)

## Overview of the dataset

In [None]:
to_drop = data[data.isna().any(axis=1)]
print(to_drop.subject.unique(),to_drop.session.unique(),to_drop.adl.unique())

In [None]:
data.drop(to_drop.index,inplace=True)
data.reset_index(drop=True, inplace=True)
data

In [None]:
print("Dataset Overview:")
display((data.head()))
print(data.shape)
print()

# Check for missing values
print("Missing Values:")
print(data.isnull().sum().sum())
print()

# Basic statistics of the numerical columns
print("Basic Statistics:")
display(data.describe().iloc[0:,0:-4])
print()

## Feature selection

### Feature variance for all ADLs in group

In [None]:
variance_features_df = {
   "Feature": data.columns.to_list()[0:-4]
    }
variance_features_df = pd.DataFrame(variance_features_df)

variance_features = data.iloc[0:,0:-4].var()
variance_features_df['Variance'] = variance_features.values
sorted_variances = variance_features_df.sort_values('Variance')

display(sorted_variances)

### Feature variance based on ADL ID

List the feature to remove for each ADL ID based on variance. \
1 is remove, 0 is to keep \
**Change variable `variance_threshold` accordingly to goal**

In [None]:
feature_remove = {
   "Feature_remove": data.columns.to_list()[0:-4]
    }
feature_remove = pd.DataFrame(feature_remove)

variance_threshold = 0.00
for adl in data.adl.unique():
    variance_features = data[data['adl'] == adl].iloc[0:,0:-4].var()
    feature_keep_index = variance_features[variance_features > variance_threshold].index.tolist()

    result = []
    for feature in feature_remove['Feature_remove']:
        if feature in feature_keep_index:
            result.append(0)
        else:
            result.append(1)
    feature_remove[adl] = result

feature_remove['Total'] = feature_remove.iloc[0:,1:].sum(axis=1)
feature_column = feature_remove.sum(axis=0).values
feature_column[0] = 'Total'
feature_remove.loc[len(feature_remove)] = feature_column
display(feature_remove)

Remove the features which have been recognized to have low variance in one of the ADLs.

In [None]:
drop_features = []

new_feature_remove = feature_remove.iloc[:-1, :]
for i in range(len(new_feature_remove['Total'])):
  if new_feature_remove['Total'][i] > 0:
    drop_features.append(new_feature_remove['Feature_remove'][i])

print(len(drop_features))
feature_remove = new_feature_remove[~new_feature_remove['Feature_remove'].isin(drop_features)]
feature_remove.reset_index(drop=True, inplace=True)

feature_column = feature_remove.sum(axis=0).values
feature_column[0] = 'Total'
feature_remove.loc[len(feature_remove)] = feature_column

data = data.drop(columns=drop_features, axis=1)

### Feature correlation based on ADL ID

The features with highest variance will be checked first for correlation.

**Change variable `correlation_threshold` accordingly to goal**

In [None]:
correlation_threshold = 0.95

new_feature_remove = feature_remove.iloc[:-1,:-1]
for adl in data.adl.unique():
    feature_columns = data.columns.tolist()[0:-4]

    #Finding variance and sort by descending
    variance_features_df = {
    "Feature": feature_columns
    }
    variance_features_df = pd.DataFrame(variance_features_df)

    #Adding adl such that we can select based on adl.
    feature_columns.extend(["adl"])
    data_copy = data[feature_columns]

    variance_features = data_copy[data_copy['adl'] == adl].var()
    variance_features = variance_features.drop("adl")
    variance_features_df['Variance'] = variance_features.values

    variance_features_df = variance_features_df.sort_values('Variance', ascending=False)
    highest_variance_features = variance_features_df['Feature'].tolist()

    #Adding adl such that we can select based on adl.
    highest_variance_features.extend(["adl"])

    #Restructure data copy to have same order as highest variance features.
    data_copy = data_copy.reindex(columns=highest_variance_features)

    correlation_matrix = data_copy[data_copy['adl'] == adl].corr()
    correlation_matrix = correlation_matrix.iloc[:-1,:-1]

    #Remove the high correlation features and only keep one left.
    #Keep the one with highest variance.

    correlation_features_remove = []
    for column in correlation_matrix.columns:
        if column in correlation_features_remove:
            continue

        feature_column = correlation_matrix[column].abs()
        selected_features = feature_column[feature_column > correlation_threshold].index.tolist()
        filtered_features = [item for item in selected_features if item != column]

        for feature in filtered_features:
            if feature not in correlation_features_remove:
                correlation_features_remove.append(feature)

    for feature in correlation_features_remove:
        for j in range(len(new_feature_remove['Feature_remove'].values)):
            if feature == new_feature_remove['Feature_remove'].values[j]:
                new_feature_remove[adl][j] = 1
                break

new_feature_remove['Total'] = new_feature_remove.iloc[0:,1:].sum(axis=1)
feature_column = new_feature_remove.sum(axis=0).values
feature_column[0] = 'Total'
new_feature_remove.loc[len(new_feature_remove)] = feature_column
feature_remove = new_feature_remove
display(feature_remove)

Removing high correlation features

In [None]:
drop_features = []

new_feature_remove = feature_remove.iloc[:-1, :]
for i in range(len(new_feature_remove['Total'])):
  if new_feature_remove['Total'][i] > 0:
    drop_features.append(new_feature_remove['Feature_remove'][i])

feature_remove = new_feature_remove[~new_feature_remove['Feature_remove'].isin(drop_features)]
feature_remove.reset_index(drop=True, inplace=True)

feature_column = feature_remove.sum(axis=0).values
feature_column[0] = 'Total'
feature_remove.loc[len(feature_remove)] = feature_column

data = data.drop(columns=drop_features, axis=1)

### Feature selection methods

In [None]:
def get_important_features_GRF(data):
    # Split data into features and target
    X = data.drop(columns=['adl'])
    y = data['adl']

    # Define parameter grid for GridSearchCV
    param_grid = {
        'n_estimators': [100, 200, 300],  # Number of trees in the forest
        'max_depth': [None, 10, 20],  # Maximum depth of the tree
        'min_samples_split': [2, 5, 10]  # Minimum number of samples required to split an internal node
    }

    # Initialize Random Forest classifier
    clf = RandomForestClassifier(random_state=42)

    # Initialize GridSearchCV
    grid_search = GridSearchCV(clf, param_grid, cv=5, n_jobs=-1)

    # Train the classifier
    grid_search.fit(X, y)

    # Get the best estimator from GridSearchCV
    best_clf = grid_search.best_estimator_

    # Get feature importances
    feature_importances = best_clf.feature_importances_

    #Discard irrelevant features.
    model = SelectFromModel(best_clf, prefit=True)
    X_new = model.transform(X)

    #Getting top feature names
    cols_idxs = model.get_support(indices=True)
    top_features = X.iloc[:,cols_idxs].columns

    #Getting feature importance of the top features.
    feature_importance_dict = {}
    for feature in top_features:
      id = X.columns.get_loc(feature)
      feature_importance_dict[feature] = feature_importances[id]

    feature_importance_dict = dict(sorted(feature_importance_dict.items(), key=lambda item: item[1], reverse=True))

    if len(feature_importance_dict) % 2 != 0:
      feature_importance_dict.popitem()

    x_labels = list(feature_importance_dict.keys())
    y_values = list(feature_importance_dict.values())

    # Plot the data using a bar plot
    plt.figure(figsize=(8, 6))  # Set figure size
    plt.bar(x_labels, y_values, color='skyblue')  # Create a bar plot

    # Customize the plot
    plt.title('Feature importance MDI')
    plt.xlabel('')
    plt.ylabel('Mean decrease impurity')
    ax = plt.gca()
    ax.xaxis.set_ticklabels([])

    return x_labels

Finding top features to use based on the remaining features \
**Change variable `feature_selection_method` based on which feature selection method to use.**

In [None]:
important_features = get_important_features_GRF(data.iloc[:,:-3])

print(important_features)

### Only use the top features

In [None]:
columns = important_features
columns.extend(['adl', 'session', 'subject', 'fileID'])
data = data[columns]
data

## Distribution

In [None]:
def display_distribution(data,column,fs,w):
    plt.figure(figsize=(16, 5))  # Adjust the figure size as per your preference
    counts = data[column].value_counts().sort_index()
    counts.plot(kind='bar')

    for i, v in enumerate(counts):
        plt.text(i, v, str(v) + '(' + str(int(round((v/len(data))*100,0)))+'%' + ')', ha='center', va='bottom')

    plt.xlabel(column)
    plt.ylabel('Count')
    plt.title('Distribution of ' + column + ' samples in terms of segments of '+ str(w) + ' second')
    plt.show()

In [None]:
display_distribution(data, 'adl',FS,w)

In [None]:
display_distribution(data, 'subject',FS, w)

In [None]:
display_distribution(data, 'session',FS, w)

## Undersample

In [None]:
adl_distribution = data['adl'].value_counts().sort_index()
adl_smallest_value = adl_distribution.min()

rus = RandomUnderSampler(sampling_strategy='not minority', random_state=42)
X = data.drop(columns=['adl'])
y = data['adl']
X_resampled, y_resampled = rus.fit_resample(X, y)

X_resampled['adl'] = y_resampled
data = X_resampled

In [None]:
display_distribution(data, 'adl',FS,w)

In [None]:
display_distribution(data, 'subject',FS,w)

In [None]:
display_distribution(data, 'session',FS, w)

### Rename activities from 0 to N

In [None]:
unique_values = data['adl'].unique()
codes, unique_labels = pd.factorize(unique_values)
value_mapping = dict(zip(unique_values, codes))
data['adl'] = data['adl'].map(value_mapping)
old_new_mapping = dict(zip(unique_values, unique_labels))

In [None]:
def plot_confusion_matrix( y_test, y_pred):
    classification_report_df = pd.DataFrame(classification_report(y_test.argmax(axis=1), y_pred.argmax(axis=1),output_dict=True)).transpose()
    new_mapping = sorted(value_mapping.keys())
    new_mapping.extend(['accuracy', 'macro avg', 'weighted avg'])
    classification_report_df.index = new_mapping

    print("Classification Report:")
    display(classification_report_df)

    cm = np.round(confusion_matrix(y_test.argmax(axis=1), y_pred.argmax(axis=1),normalize='true')*100,2)

    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, cmap='Blues', fmt='g',
                xticklabels=new_mapping[:-3], yticklabels=new_mapping[:-3])

    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()


def plot_training_validation_history(history):
    fig, axs = plt.subplots(1, 2, figsize=(8, 2))

    axs[0].plot(history.history["accuracy"], label="Training Accuracy")
    axs[0].plot(history.history["val_accuracy"], label="Validation Accuracy")
    axs[0].set_xlabel("Epoch")
    axs[0].set_ylabel("Accuracy")
    axs[0].legend()

    axs[1].plot(history.history["loss"], label="Training Loss")
    axs[1].plot(history.history["val_loss"], label="Validation Loss")
    axs[1].set_xlabel("Epoch")
    axs[1].set_ylabel("Loss")
    axs[1].legend()

    plt.tight_layout()
    plt.show()

def time_model_inference(model: Model, input_data):
    start_time = time.time()
    model.predict(input_data)
    end_time = time.time()
    print('Elapsed Time ', end_time - start_time)
    print('# of Test samples', len(input_data))
    print('Average Inference time (ms):', (end_time - start_time)/len(input_data)*1000)

In [None]:
def prepare_data_for_the_model(data):
    X = data.iloc[0:,0:-4]
    y = data['adl']

    subject = data['subject'].values
    session = data['session'].values
    adl = data['adl'].values
    adl_encoder = LabelEncoder()
    adl = adl_encoder.fit_transform(adl)

    if FLAG_SPLIT == 2:
        train_session, test_session = train_test_split(np.unique(session), test_size=0.2, random_state=28)
        train_session, val_session = train_test_split(train_session, test_size=0.2, random_state=28)

        train_idx = np.where(np.isin(session, train_session))[0]
        val_idx = np.where(np.isin(session, val_session))[0]
        test_idx = np.where(np.isin(session, test_session))[0]

        print('Train Sessions: ', train_session)
        print('Validation Sessions: ', val_session)
        print('Test Sessions: ', test_session)

        X_train, X_val, X_test = X.loc[train_idx], X.loc[val_idx], X.loc[test_idx]
        y_train, y_val, y_test = y.loc[train_idx], y.loc[val_idx], y.loc[test_idx]

    elif FLAG_SPLIT == 1:
        train_subjects, test_subjects = train_test_split(np.unique(subject), test_size=0.2, random_state=28)
        train_subjects, val_subjects = train_test_split(train_subjects, test_size=0.25, random_state=28)

        train_idx = np.where(np.isin(subject, train_subjects))[0]
        val_idx = np.where(np.isin(subject, val_subjects))[0]
        test_idx = np.where(np.isin(subject, test_subjects))[0]

        print("Train Subjects :", train_subjects)
        print("Validation Subjects :", val_subjects)
        print("Test Subjects :", test_subjects)

        X_train, X_val, X_test = X.loc[train_idx], X.loc[val_idx], X.loc[test_idx]
        y_train, y_val, y_test = y.loc[train_idx], y.loc[val_idx], y.loc[test_idx]

    elif FLAG_SPLIT == 0:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
        X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42, stratify=y_train)

    X_train = X_train.values
    X_val = X_val.values
    X_test = X_test.values

    num_classes = len(adl_encoder.classes_)
    y_train = to_categorical(y_train, num_classes=num_classes)
    y_val = to_categorical(y_val, num_classes=num_classes)
    y_test = to_categorical(y_test, num_classes=num_classes)


    input_shape = (X_train[0].shape)

    X_train = X_train.reshape(X_train.shape[0], *input_shape)
    X_val = X_val.reshape(X_val.shape[0], *input_shape)
    X_test = X_test.reshape(X_test.shape[0], *input_shape)

    return X_train, X_val, X_test, y_train, y_val, y_test, num_classes, input_shape


def evaluate_my_model(model, history, X_test, y_test):
    plot_training_validation_history(history)
    score = model.evaluate(X_test, y_test, verbose=0)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

    y_pred =  np.round(model.predict(X_test, batch_size=None, verbose="auto", steps=None, callbacks=None))
    plot_confusion_matrix(y_test, y_pred)
    time_model_inference(model,X_test)

def my_LSTM_model(X_train, X_val, X_test, y_train, y_val, y_test, num_classes, epochs, batch_size, lr):
    model = Sequential()
    model.add(LSTM(128, return_sequences=True, input_shape=(X_train[0].shape)))
    model.add(Dropout(0.2))
    model.add(LSTM(64))
    model.add(Dropout(0.2))
    model.add(Dense(num_classes, activation='softmax'))
    pr_metric = AUC(curve='PR', num_thresholds=100)
    optimizer = Adam(learning_rate=lr)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=[pr_metric,"accuracy"])
    stop_early=EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=20, verbose=0, mode='auto',restore_best_weights=True)
    history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1,validation_data=(X_val,y_val),callbacks=[stop_early])
    evaluate_my_model(model, history, X_test, y_test)

def my_CNN_model(X_train, X_val, X_test, y_train, y_val, y_test, num_classes, epochs, batch_size, lr):
    model = Sequential()
    model.add(Conv1D(64, kernel_size=3, activation='relu', input_shape=(X_train[0].shape), padding='same'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(128, kernel_size=3, activation='relu', padding='same'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(256, kernel_size=3, activation='relu', padding='same'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    pr_metric = AUC(curve='PR', num_thresholds=100)
    optimizer = Adam(learning_rate=lr)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=[pr_metric,"accuracy"])
    stop_early=EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=20, verbose=0, mode='auto',restore_best_weights=True)
    history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1,validation_data=(X_val,y_val),callbacks=[stop_early])
    evaluate_my_model(model, history, X_test, y_test)

def my_CNNRNN_model(X_train, X_val, X_test, y_train, y_val, y_test, num_classes, epochs, batch_size, lr):
    model = Sequential()
    model.add(Conv1D(filters=256, kernel_size=3, activation="relu", input_shape=(X_train[0].shape)))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Conv1D(filters=128, kernel_size=3, activation="relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Conv1D(filters=64, kernel_size=3, activation="relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Bidirectional(LSTM(128, return_sequences=True)))
    model.add(Dropout(0.5))
    model.add(Bidirectional(LSTM(256)))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation="softmax"))
    pr_metric = AUC(curve='PR', num_thresholds=100)
    optimizer = Adam(learning_rate=lr)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=[pr_metric,"accuracy"])
    stop_early=EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=20, verbose=0, mode='auto',restore_best_weights=True)
    history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1,validation_data=(X_val,y_val),callbacks=[stop_early])
    evaluate_my_model(model, history, X_test, y_test)

def my_RNN_model(X_train, X_val, X_test, y_train, y_val, y_test, num_classes, epochs, batch_size, lr):
    model = Sequential()
    model.add(SimpleRNN(128, input_shape=(X_train[0].shape), return_sequences=True))
    model.add(SimpleRNN(128, return_sequences=True))
    model.add(SimpleRNN(128, return_sequences=True))
    model.add(SimpleRNN(128))
    model.add(Dense(num_classes, activation='softmax'))
    pr_metric = AUC(curve='PR', num_thresholds=100)
    optimizer = Adam(learning_rate=lr)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=[pr_metric,"accuracy"])
    stop_early=EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=20, verbose=0, mode='auto',restore_best_weights=True)
    history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1,validation_data=(X_val,y_val),callbacks=[stop_early])
    evaluate_my_model(model, history, X_test, y_test)

def my_MLP_model(X_train, X_val, X_test, y_train, y_val, y_test, num_classes, epochs, batch_size, lr):
    input_layer = Input(shape=(X_train.shape[1], X_train.shape[2]))
    flatten_layer = Flatten()(input_layer)
    hidden_layer1 = Dense(32, activation='relu')(flatten_layer)
    dropout1 = Dropout(0.4)(hidden_layer1)
    hidden_layer2 = Dense(64, activation='relu')(dropout1)
    dropout2 = Dropout(0.4)(hidden_layer2)
    hidden_layer3 = Dense(128, activation='relu')(dropout2)
    dropout3 = Dropout(0.4)(hidden_layer3)
    hidden_layer4 = Dense(256, activation='relu')(dropout3)
    dropout4 = Dropout(0.4)(hidden_layer4)
    output_layer = Dense(num_classes, activation='softmax')(dropout4)
    model = Model(inputs=input_layer, outputs=output_layer)
    pr_metric = AUC(curve='PR', num_thresholds=100)
    optimizer = Adam(learning_rate=lr)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=[pr_metric,"accuracy"])
    stop_early=EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=20, verbose=0, mode='auto',restore_best_weights=True)
    history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1,validation_data=(X_val,y_val),callbacks=[stop_early])
    evaluate_my_model(model, history, X_test, y_test)

def my_LSTMBiAtt_model(X_train, X_val, X_test, y_train, y_val, y_test, num_classes, epochs, batch_size, lr):
    input_layer = Input(shape=(X_train[0].shape))
    lstm_layer = Bidirectional(LSTM(128, return_sequences=True))(input_layer)
    attention = Dense(1, activation='tanh')(lstm_layer)
    attention = Flatten()(attention)
    attention = Activation('softmax')(attention)
    attention = RepeatVector(256)(attention)
    attention = Permute([2, 1])(attention)
    attention_mul = Multiply()([lstm_layer, attention])
    output_layer = LSTM(128)(attention_mul)
    output_layer = Dense(num_classes, activation='softmax')(output_layer)
    model = Model(inputs=input_layer, outputs=output_layer)
    pr_metric = AUC(curve='PR', num_thresholds=100)
    optimizer = Adam(learning_rate=lr)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=[pr_metric,"accuracy"])
    stop_early=EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=20, verbose=0, mode='auto',restore_best_weights=True)
    history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1,validation_data=(X_val,y_val),callbacks=[stop_early])
    evaluate_my_model(model, history, X_test, y_test)

## Train/test split

In [None]:
X_train, X_val, X_test, y_train, y_val, y_test, num_classes, input_shape = prepare_data_for_the_model(data)

print("X_train shape:", X_train.shape)
print("X_val shape:", X_val.shape)
print("X_test shape:",  X_test.shape)

print("y_train shape:", y_train.shape)
print("y_val shape:", y_val.shape)
print("y_test shape:",  y_test.shape)

print("Number of classes:", num_classes)
print("Input shape:", input_shape)

X_train = X_train.reshape(len(X_train),-1,2)
X_val = X_val.reshape(len(X_val),-1,2)
X_test = X_test.reshape(len(X_test),-1,2)

## Deep Learning Model

In [None]:
my_CNN_model(X_train, X_val, X_test, y_train, y_val, y_test, num_classes, EPOCHS, BATCH_SIZE, LR)

In [None]:
my_CNNRNN_model(X_train, X_val, X_test, y_train, y_val, y_test, num_classes, EPOCHS, BATCH_SIZE, LR)

In [None]:
my_LSTM_model(X_train, X_val, X_test, y_train, y_val, y_test, num_classes, EPOCHS, BATCH_SIZE, LR)

In [None]:
my_RNN_model(X_train, X_val, X_test, y_train, y_val, y_test, num_classes, EPOCHS, BATCH_SIZE, LR)

In [None]:
my_MLP_model(X_train, X_val, X_test, y_train, y_val, y_test, num_classes, EPOCHS, BATCH_SIZE, LR)

In [None]:
my_LSTMBiAtt_model(X_train, X_val, X_test, y_train, y_val, y_test, num_classes, EPOCHS, BATCH_SIZE, LR)