In [None]:
import tensorflow as tf

# Check for GPU and set memory growth
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPUs are available and memory growth is set")
    except RuntimeError as e:
        print(e)

In [None]:
import os
import pandas as pd

# Specify the directory where the files are located
directory = "/kaggle/input/vollmer-csv/vollmer_s0"  # Replace with the actual directory path

# Create an empty list to store the data from the CSV files
df = pd.DataFrame()
data = []
# Loop through the desired file numbers (1 to 13) with two digits
for file_number in range(1, 14):
    # Use string formatting to ensure two-digit file numbers
    file_name = f"{file_number:02d}.csv"
    #print(file_name)
    file_path = directory + file_name
    #print(file_path)
    data = pd.read_csv(file_path)
    third_column = data.iloc[:, 4] 
    #print(third_column.shape)
    df = pd.concat([df, third_column], axis=1, ignore_index=True)
    #print(df.shape)

# Now, data_list contains the data from the 13 specific CSV files with two-digit file numbers.

print(df.shape)


In [None]:
import os
import pandas as pd

# Specify the directory where the files are located
directory = "/kaggle/input/vollmer-csv/vollmer_labels_s0"  # Replace with the actual directory path

# Create an empty list to store the data from the CSV files
cut_points = pd.DataFrame()

# Loop through the desired file numbers (1 to 13) with two digits
for file_number in range(1, 14):
    # Use string formatting to ensure two-digit file numbers
    file_name = f"{file_number:02d}.csv"
    #print(file_name)
    file_path = directory + file_name
    #print(file_path)
    cut = pd.read_csv(file_path)
    #print(cut)
    selected_columns = cut[['FAROS_Marker/Rest', 'FAROS_Marker/Walking', 'FAROS_Marker/2-Back','Manual/Running']]

    #print(selected_columns3)
    cut_points = pd.concat([cut_points, selected_columns], axis=0, ignore_index=True)

# Now, data_list contains the data from the 13 specific CSV files with two-digit file numbers.

print(cut_points.shape)


In [None]:
ecg = df[0]
    #print(ecg)
cut_point = cut_points.iloc[0]
cut_point1 = cut_point[0]
ecg = ecg.to_frame()
ecg.reset_index(drop=True, inplace=True)
print(ecg.T)
rest = ecg.iloc[cut_point1:cut_point1+76800]
rest.reset_index(drop=True, inplace=True)
print(rest.T)

# Here, LOOCV test-train splitting (only 1 subject is considered as validation data. the rests are as train data.) is done. Subjects =[0 to 12] sub = 0 means subject 1 of the Vollmer dataset will be considered as test data, while other subjects will be considered as train data.

In [None]:
import numpy as np
import pandas as pd
result_test = pd.DataFrame()
labels_test = []
result_train = pd.DataFrame()
labels_train = []
for i in range(13):
    ecg = df[i]
    #print(ecg)
    cut_point = cut_points.iloc[i]
    cut_point1 = cut_point[0]
    ecg = ecg.to_frame()

    sub = 2 # subject no 0
    # 0 to 12
    if i == sub:
        rest = ecg.iloc[cut_point1:cut_point1+76800]
        rest.reset_index(drop=True, inplace=True)
        result_test = pd.concat([result_test, rest], axis=1, ignore_index=True)
        
        stress = ecg.iloc[cut_point[2]:cut_point[2]+76800]
        stress.reset_index(drop=True, inplace=True)
        result_test = pd.concat([result_test, stress], axis=1, ignore_index=True)
            
        label1 = [0,1]
        labels_test = labels_test+label1
    elif i!=sub:
        rest = ecg.iloc[cut_point1:cut_point1+76800]
        rest.reset_index(drop=True, inplace=True)
        result_train = pd.concat([result_train, rest], axis=1, ignore_index=True)
        
        stress = ecg.iloc[cut_point[2]:cut_point[2]+76800]
        stress.reset_index(drop=True, inplace=True)
        result_train = pd.concat([result_train, stress], axis=1, ignore_index=True)
        label1 = [0,1]
        labels_train = labels_train+label1
 


    #walking = ecg.iloc[cut_point[1]:cut_point[1]+76800]
    #walking.reset_index(drop=True, inplace=True)
    #result = pd.concat([result, walking], axis=1, ignore_index=True)



    #running = ecg.iloc[cut_point[3]:cut_point[3]+76800]
    #running.reset_index(drop=True, inplace=True)
    #result = pd.concat([result, running], axis=1, ignore_index=True)


print(result_train.shape)
print(labels_train)
print(result_test.shape)
print(labels_test)

In [None]:
midpoint = int(76800/5)  # Adjust the value as needed

# Split the result into two DataFrames
first_half = result_train.iloc[:midpoint,: ]
first_half.reset_index(drop=True, inplace=True)

second_half = result_train.iloc[ midpoint:midpoint*2,:]
second_half.reset_index(drop=True, inplace=True)

third_half = result_train.iloc[ midpoint*2:midpoint*3,:]
third_half.reset_index(drop=True, inplace=True)

fourth_half = result_train.iloc[ midpoint*3:midpoint*4,:]
fourth_half.reset_index(drop=True, inplace=True)

fifth_half = result_train.iloc[ midpoint*4:midpoint*5,:]
fifth_half.reset_index(drop=True, inplace=True)
#print(fifth_half)
# Concatenate the two DataFrames
X_trai = pd.concat([first_half, second_half,third_half, fourth_half, fifth_half], axis=1, ignore_index=True)
y_tra = labels_train+labels_train+labels_train+labels_train+labels_train
y_train = pd.DataFrame(y_tra)
X_train = X_trai.T
print(X_train.shape)
print(y_train.shape)

# Splitting the train samples into 10 seconds segments.

In [None]:
import pandas as pd

# Assuming filtered_data is your DataFrame
# Split the DataFrame into two halves along the columns
#this is for 30 sec segment 15360

first = X_train.iloc[:, :2560]
first1 = X_train.iloc[:, 2560:5120]
first_aug1 = X_train.iloc[:, 768:3328]
#first_aug2 = X_train.iloc[:, 1792:4352]
second = X_train.iloc[:, 5120:7680]
print(second.shape)
second1 = X_train.iloc[:, 7680:10240]
second_aug1 = X_train.iloc[:, 5888:8448]
#second_aug2 = X_train.iloc[:, 6912:9472]
third = X_train.iloc[:, 10240:12800]
third1 = X_train.iloc[:, 12800:]
third_aug1 = X_train.iloc[:, 11008:13568]
#third_aug2 = X_train.iloc[:, 12032:14592]

# Stack the two halves along the rows
result_train = np.vstack((first, first1, second, second1, third, third1, first_aug1,
                    second_aug1,third_aug1))

# Verify the shape of the resulting array
print(result_train.shape)  # This should print (260, 7680)

repeated_y_train = np.tile(y_train, (12, 1))

# Verify the shape of the resulting array
print(repeated_y_train.shape) 

# Splitting the test samples into 10 seconds segments.

In [None]:
midpoint = int(76800/5)  # Adjust the value as needed

# Split the result into two DataFrames
first_half = result_test.iloc[:midpoint,: ]
first_half.reset_index(drop=True, inplace=True)

second_half = result_test.iloc[ midpoint:midpoint*2,:]
second_half.reset_index(drop=True, inplace=True)

third_half = result_test.iloc[ midpoint*2:midpoint*3,:]
third_half.reset_index(drop=True, inplace=True)

fourth_half = result_test.iloc[ midpoint*3:midpoint*4,:]
fourth_half.reset_index(drop=True, inplace=True)

fifth_half = result_test.iloc[ midpoint*4:midpoint*5,:]
fifth_half.reset_index(drop=True, inplace=True)
#print(fifth_half)
# Concatenate the two DataFrames
X_tes = pd.concat([first_half, second_half,third_half, fourth_half, fifth_half], axis=1, ignore_index=True)
y_tes = labels_test+labels_test+labels_test+labels_test+labels_test
y_test = pd.DataFrame(y_tes)
X_test = X_tes.T
print(X_test.shape)
print(y_test.shape)

In [None]:
import pandas as pd

# Assuming filtered_data is your DataFrame
# Split the DataFrame into two halves along the columns
#this is for 30 sec segment 15360

first = X_test.iloc[:, :2560]
first1 = X_test.iloc[:, 2560:5120]
second = X_test.iloc[:, 5120:7680]
print(second.shape)
second1 = X_test.iloc[:, 7680:10240]
third = X_test.iloc[:, 10240:12800]
third1 = X_test.iloc[:, 12800:]

# Stack the two halves along the rows
result_test = np.vstack((first, first1, second, second1, third, third1))

# Verify the shape of the resulting array
print(result_test.shape)  # This should print (260, 7680)

repeated_y_test = np.tile(y_test, (6, 1))

# Verify the shape of the resulting array
print(repeated_y_test.shape) 

In [None]:
repeated_y_train = np.array(repeated_y_train)
y_test = np.array(repeated_y_test)
from sklearn.preprocessing import StandardScaler,scale,MaxAbsScaler
scaling=StandardScaler()
X_train=scaling.fit_transform(result_train)
X_test=scaling.transform(result_test)

In [None]:
import numpy as np

# Check shapes of X_train and repeated_y_train
print(f"Shape of X_train: {X_train.shape}")
print(f"Shape of repeated_y_train: {repeated_y_train.shape}")

# Ensure repeated_y_train is a 1-dimensional array
if repeated_y_train.ndim > 1:
    repeated_y_train = repeated_y_train.ravel()

# Use boolean indexing to split the data
X_majority = X_train[repeated_y_train == 0, :]
X_minority = X_train[repeated_y_train == 1, :]

print(f"Shape of X_majority: {X_majority.shape}")
print(f"Shape of X_minority: {X_minority.shape}")


# Checking of minority class. As it was balanced dataset, no augmentation was done.

In [None]:
X_minority_augmented = np.zeros((2*X_minority.shape[0], 2304))
print(X_minority_augmented.shape)
j=0
i=0
#print(int(X_minority.shape[0]/2))
for i in range(int(X_minority.shape[0]/2)):
    i = i*2
    #print(i)
    x1=X_minority[i,:]
    x2=X_minority[i+1,:]
    x_combined = np.concatenate((X_minority[i,:], X_minority[i+1,:]))
    #print(x_combined.shape)
    x3 = x_combined[768:3072]
    x4 = x_combined[1792:4096]
    #print(x3.shape)
    #print(x2.shape)
        
    X_minority_augmented[j] = x1  #np.roll(x1, 2000)
    j=j+1    
    X_minority_augmented[j] = x2  #np.roll(x1, 2000)
    j=j+1
    X_minority_augmented[j] = x3  #np.roll(x1, 2000)
    j=j+1
    #x1=X_minority[i,:]
    X_minority_augmented[j] = x4 #np.roll(x1, 2000)x1
    j=j+1
    #i=i+1
    #print(i)
#print(i)
print(X_minority_augmented.shape)

X_majority_augmented = X_majority
X_minority_augmented = X_minority
print(X_majority_augmented.shape)
print(X_minority_augmented.shape)

In [None]:
X_train_augmented = np.vstack((X_majority_augmented, X_minority_augmented))
y_train_augmented = np.hstack((np.zeros(X_majority_augmented.shape[0]), np.ones(X_minority_augmented.shape[0])))
# shuffle the data

idx = np.random.permutation(X_train_augmented.shape[0])
#print(idx)
X_train_augmented = X_train_augmented[idx]
y_train_augmented = pd.DataFrame(y_train_augmented[idx])
print(X_train_augmented.shape)
#print(y_train_augmented[0])
X_train=X_train_augmented
y_train=y_train_augmented

In [None]:
print(X_train.shape)
print(X_test.shape)
print(y_test.shape)
print(y_train.shape)

# Model Buildup

In [None]:
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, LSTM, Bidirectional, Dense, Dropout, BatchNormalization, GlobalMaxPooling1D, Attention, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2

# Define the input layer
input_layer = Input(shape=(2560, 1))

# Shared CNN layers
def shared_cnn(input_layer, filters1, kernel_size1, filters2, kernel_size2, filters3, kernel_size3):
    conv1 = Conv1D(filters=filters1, kernel_size=kernel_size1, activation='relu')(input_layer)
    maxpool1 = MaxPooling1D(pool_size=2)(conv1)
    batch_norm1 = BatchNormalization()(maxpool1)

    conv2 = Conv1D(filters=filters2, kernel_size=kernel_size2, activation='relu')(batch_norm1)
    maxpool2 = MaxPooling1D(pool_size=2)(conv2)
    batch_norm2 = BatchNormalization()(maxpool2)

    conv3 = Conv1D(filters=filters3, kernel_size=kernel_size3, activation='relu')(batch_norm2)
    maxpool3 = MaxPooling1D(pool_size=2)(conv3)
    batch_norm3 = BatchNormalization()(maxpool3)

    return batch_norm3

# Shared BiLSTM layer with attention
def shared_bilstm_with_attention(shared_cnn_output, units, return_sequences, num_heads):
    bilstm_output = Bidirectional(LSTM(units=units, return_sequences=return_sequences))(shared_cnn_output)
    attention_heads = []
    for _ in range(num_heads):
        attention_head = Attention()([bilstm_output, bilstm_output])
        attention_heads.append(attention_head)

    # Concatenate the outputs of attention heads
    multi_attention = Concatenate(axis=-1)(attention_heads)
    global_pool = GlobalMaxPooling1D()(multi_attention)
    return global_pool

# Apply shared CNN and BiLSTM with attention to each input path with different parameters
shared_cnn_output1 = shared_cnn(input_layer, filters1=64, kernel_size1=10, filters2=128, kernel_size2=5, filters3=256, kernel_size3=3)
shared_bilstm_output1 = shared_bilstm_with_attention(shared_cnn_output1, units=64, return_sequences=True, num_heads=4)

shared_cnn_output2 = shared_cnn(input_layer, filters1=32, kernel_size1=14, filters2=64, kernel_size2=8, filters3=128, kernel_size3=5)
shared_bilstm_output2 = shared_bilstm_with_attention(shared_cnn_output2, units=32, return_sequences=True, num_heads=2)

# Concatenate the outputs of both paths
concatenated_output = Concatenate(axis=-1)([shared_bilstm_output1, shared_bilstm_output2])

# Dense layers
dense1 = Dense(units=256, activation='relu', kernel_regularizer=l2(0.01))(concatenated_output)
batch_norm4 = BatchNormalization()(dense1)
drop1 = Dropout(0.4)(batch_norm4)

dense2 = Dense(units=128, activation='relu', kernel_regularizer=l2(0.01))(drop1)
batch_norm5 = BatchNormalization()(dense2)
drop2 = Dropout(0.4)(batch_norm5)

# Output layer
output = Dense(units=1, activation='sigmoid')(drop2)

# Model
model = Model(inputs=input_layer, outputs=output)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# Training the model

In [None]:
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
# Define early stopping and checkpoint callbacks
es = EarlyStopping(monitor='val_loss', mode='min', patience=100)
mc = ModelCheckpoint('best_model.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)

# Train the model with early stopping and checkpoint callbacks
history = model.fit(X_train, y_train, epochs=600, batch_size=32, validation_data=(X_test, y_test), callbacks=[es, mc])

# Calculating the confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix
from keras.models import load_model
# Load the saved model
model = load_model('best_model.h5')

# Predict the class probabilities for the test set
y_pred = model.predict(X_test)

# Convert the probabilities into class labels using a threshold of 0.5
y_pred_classes = (y_pred > 0.5).astype(int)

# Calculate the confusion matrix
cm = confusion_matrix(y_test, y_pred_classes)
print(cm)

# Loss vs Epoch curve plotting

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score

# Print loss vs. epoch curve
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# AUC score calculating

In [None]:
# Calculate predictions for the test set
y_pred = model.predict(X_test)

# Calculate AUC score
auc_score = roc_auc_score(y_test, y_pred)

# Print AUC score
print('AUC Score:', auc_score)

# ROC curve plotting

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, roc_auc_score

# Calculate predictions for the test set
y_pred = model.predict(X_test)

# Calculate false positive rate, true positive rate, and thresholds
fpr, tpr, thresholds = roc_curve(y_test, y_pred)

# Calculate AUC score
auc_score = roc_auc_score(y_test, y_pred)

# Plot ROC curve
plt.plot(fpr, tpr, label='ROC Curve (AUC = {:.2f})'.format(auc_score))
plt.plot([0, 1], [0, 1], linestyle='--', color='r', label='Random')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend()
plt.show()