In [13]:
import mne
import pandas as pd
from scipy.signal import butter, filtfilt
from tqdm import tqdm
import numpy as np

from keras.regularizers import l2
from keras.callbacks import EarlyStopping, ModelCheckpoint
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense, Bidirectional, Conv1D, MaxPooling1D, Flatten, BatchNormalization, Attention
from keras.optimizers import Adam

In [14]:
path='C:/Users/karme/Downloads/new_data_encoded/'

In [15]:
def load_emg_data(signals):
    data = mne.io.read_raw_edf(signals, preload=True, verbose=False, encoding='latin1')
    data.resample(1000)
    return data

In [16]:
def emg_data_preprocessing(signals):
    # Convert to DataFrame and select relevant columns
    raw_data = signals.to_data_frame()
    raw_data = raw_data.loc[:, ~raw_data.columns.str.startswith('time')]
    raw_data = raw_data.iloc[:, -6:].to_numpy(dtype='float64')

    # Bandpass filter (10-450 Hz)
    low_band = 20 / 500
    high_band = 450 / 500
    a, b = butter(2, [low_band, high_band], btype='band')
    emg_filtered = filtfilt(a, b, raw_data, method='gust')

    # Rectify the signal
    emg_rectified = np.abs(emg_filtered)
    # Normalize the data
    emg_normalized = (emg_rectified - np.mean(emg_rectified, axis=0)) / np.std(emg_rectified, axis=0)

    return emg_normalized

In [17]:
def get_datalabels(non_data, seizure_data, insert_col):
    seizure_labels = [1] * len(seizure_data)
    non_labels = [0] * len(non_data)

    all_data = np.concatenate((non_data, seizure_data), axis = 0)
    all_label = non_labels + seizure_labels

    data_label = np.insert(all_data, insert_col, all_label, axis=1)
    np.random.shuffle(data_label)

    data_label = pd.DataFrame(data_label)

    print(data_label)
    sig , label = data_label.iloc[:, :-1], data_label.iloc[:, -1]

    return sig , label

In [18]:
def splitting_data(sig , label):
    oversampler = RandomOverSampler(
        sampling_strategy=1.0, random_state=42
    )
    data_input_balanced, data_output_balanced = oversampler.fit_resample(
        sig, label
    )


    '''
    Split the data into training, validation and testing with ratios 60:20:20 respectively
    '''
    X, X_test, y, y_test = train_test_split(
        data_input_balanced, data_output_balanced, test_size = 0.20, random_state = 42
    )

    X_train, X_val, y_train, y_val = train_test_split(
        X, y, test_size = 0.25, random_state = 0
    )

    return X_train, y_train, X_val, y_val, X_test, y_test

In [19]:
Start_End=pd.read_csv('Start_End.csv')
Start_End=Start_End.loc[:,~Start_End.columns.str.startswith('Unnamed: 0')]
Start_End

Unnamed: 0,Signal,Non_start,Non_end,Seizure_start,Seizure_end
0,P1.1.edf,120,150,54,68
1,P1.2.edf,100,120,39,56
2,P1.3.edf,126,140,65,76
3,P1.4.edf,130,150,69,82
4,P10.1.edf,116,130,53,66
5,P10.2.edf,105,135,47,58
6,P11.1.edf,50,70,26,48
7,P11.2.edf,130,170,42,58
8,P11.3.edf,100,130,25,40
9,P12.1.edf,100,130,28,37


In [20]:
all_seizure_data = []
all_non_data = []

for i in tqdm(range(len(Start_End.index))):
    temp = str(Start_End.iloc[i, 0])
    file = load_emg_data(path + temp)
    preprocessed = emg_data_preprocessing(file)


    Nonstart_time = (Start_End.iloc[i, 1])*1024
    Nonend_time = (Start_End.iloc[i, 2])*1024
    Seizurestart_time = (Start_End.iloc[i, 3])*1024
    Seizureend_time = (Start_End.iloc[i, 4])*1024


    non_data = preprocessed[:][Nonstart_time:Nonend_time]
    seizure_data = preprocessed[:][Seizurestart_time:Seizureend_time]

    all_seizure_data.append(pd.DataFrame(seizure_data))
    all_non_data.append(pd.DataFrame(non_data))

all_seizure_data = pd.concat(all_seizure_data, ignore_index=True)
all_non_data = pd.concat(all_non_data, ignore_index=True)

100%|██████████| 15/15 [00:30<00:00,  2.02s/it]


In [21]:
import tensorflow as tf

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0


In [22]:
sig , label = get_datalabels(all_non_data, all_seizure_data, 6)
# print(label.value_counts())

X_train, y_train, X_val, y_val, X_test, y_test = splitting_data(sig , label)

checkpoint = ModelCheckpoint(
    filepath='best-model/best_model.weights.h5',
    save_weights_only=True,
    save_best_only=True,
    verbose=1
)

               0         1         2         3         4         5    6
0       0.105276  1.550028  0.101586  0.195083  0.987401  3.288814  1.0
1       5.518878 -0.168667  3.052647 -0.451323 -0.171620  1.252200  0.0
2       1.573374 -0.437759  0.378517  3.318596  0.178550  0.100831  1.0
3       1.570637  2.316245  4.387606  1.193490  9.571342  2.430719  1.0
4      -0.426869 -0.453103  0.197178  0.392171  1.132225 -0.362813  0.0
...          ...       ...       ...       ...       ...       ...  ...
604155 -0.447926 -0.586444 -0.427571 -0.520714 -0.490685 -0.667375  0.0
604156  1.266325 -0.352104  3.590747  0.413340  1.077991  0.236407  1.0
604157 -0.839811 -0.097092 -0.766393 -0.711786 -0.312977 -0.488010  0.0
604158 -0.085439  0.372500 -0.740664  0.621390 -0.415309 -0.334758  1.0
604159  2.695051 -0.674836  5.716072 -0.708466  1.675557  1.821291  1.0

[604160 rows x 7 columns]


In [23]:
X_train_array = X_train.to_numpy()
X_val_array = X_val.to_numpy()
X_test_array = X_test.to_numpy()

# Reshape the input data
X_train_reshaped = X_train_array.reshape((X_train_array.shape[0], 1, X_train_array.shape[1]))
X_val_reshaped = X_val_array.reshape((X_val_array.shape[0], 1, X_val_array.shape[1]))
X_test_reshaped = X_test_array.reshape((X_test_array.shape[0], 1, X_test_array.shape[1]))

y_train_reshaped = np.expand_dims(y_train, axis=1)
y_val_reshaped = np.expand_dims(y_val, axis=1)
y_test_reshaped = np.expand_dims(y_test, axis=1)

## 1. Additional and Different Layers:
Convolutional Layers: Convolutional Neural Networks (CNNs) are excellent at capturing spatial hierarchies in data. Adding a few convolutional layers before the LSTM layers can help in capturing local patterns in the EMG signals.
More LSTM Layers: Adding more LSTM layers can help capture more complex temporal dependencies in the data.
Attention Mechanism: Integrating attention layers can help the model focus on the most relevant parts of the input sequence.
Batch Normalization: Adding batch normalization can help stabilize and accelerate the training process.
## 2. Data Augmentation:
Noise Injection: Adding noise to the EMG signals during training can make the model more robust.
Signal Transformation: Applying various transformations such as scaling, shifting, and jittering can help the model generalize better.
## 3. Hyperparameter Tuning:
Experiment with different numbers of LSTM units, dropout rates, learning rates, and batch sizes.
## 4. Regularization:
Dropout: Consider increasing the dropout rate or applying dropout to more layers to prevent overfitting.
L2 Regularization: Add L2 regularization to the dense layers.
## 5. Model Evaluation:
Use a more comprehensive evaluation metric, such as the F1 score, precision, recall, or the area under the ROC curve (AUC-ROC).
## Summary of Changes:
Added Conv1D Layers: To capture spatial features in the EMG signals.
Batch Normalization: To stabilize and accelerate training.
Dropout Increased: To prevent overfitting.
Regularization in Dense Layer: To further prevent overfitting.
AUC Metric: Added as an additional evaluation metric.
## Further Steps:
Cross-Validation: Use cross-validation to evaluate the model performance.
Experimentation: Experiment with different architectures and hyperparameters to find the best model configuration for your specific data and task.
These changes should help improve the performance and robustness of your model for detecting convulsive seizures from EMG signals.

In [24]:
input_shape=(1, 6)

model = Sequential()
# Bidirectional LSTM layers
model.add(Bidirectional(LSTM(128, return_sequences=True)))
model.add(Dropout(0.3))
model.add(Bidirectional(LSTM(128)))
model.add(Dropout(0.3))

# Dense layer with L2 regularization
model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.3))

model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy', 'AUC'])


history = model.fit(X_train_reshaped, y_train_reshaped, epochs=10, batch_size=32, validation_data=(X_val_reshaped, y_val_reshaped), verbose=1, callbacks=[checkpoint])

Epoch 1/10
Epoch 1: val_loss improved from inf to 0.26319, saving model to best-model\best_model.weights.h5
Epoch 2/10
Epoch 2: val_loss improved from 0.26319 to 0.22584, saving model to best-model\best_model.weights.h5
Epoch 3/10
Epoch 3: val_loss improved from 0.22584 to 0.20991, saving model to best-model\best_model.weights.h5
Epoch 4/10
Epoch 4: val_loss improved from 0.20991 to 0.19805, saving model to best-model\best_model.weights.h5
Epoch 5/10
Epoch 5: val_loss improved from 0.19805 to 0.18847, saving model to best-model\best_model.weights.h5
Epoch 6/10
Epoch 6: val_loss improved from 0.18847 to 0.18421, saving model to best-model\best_model.weights.h5
Epoch 7/10
Epoch 7: val_loss improved from 0.18421 to 0.17975, saving model to best-model\best_model.weights.h5
Epoch 8/10
Epoch 8: val_loss improved from 0.17975 to 0.17677, saving model to best-model\best_model.weights.h5
Epoch 9/10
Epoch 9: val_loss improved from 0.17677 to 0.17327, saving model to best-model\best_model.weights

In [25]:
# Evaluate the model
loss, accuracy, auc = model.evaluate(X_test_reshaped, y_test_reshaped)
print(f'Test Loss: {loss}')
print(f'Test Accuracy: {accuracy}')
print(f'Test AUC: {auc}')

Test Loss: 0.1711205542087555
Test Accuracy: 0.9480628967285156
Test AUC: 0.9814354181289673
