Training and testing with ECG

In [None]:
# Importing libraries
import pandas as pd
import seaborn as sns
import numpy as np
import os
import glob
from scipy.signal import butter, filtfilt
import matplotlib.pyplot as plt
%matplotlib inline
from keras import layers
import keras
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import warnings
warnings.filterwarnings('ignore')

# Load the training data

In [None]:
# File directory_path
## Columns of the training dataset: 'subject column' was introduced to extract activities performed by a specific subject
headers=('acc_CHsensor_X','acc_CHsensor_Y','acc_CHsensor_Z','ecg_i','ecg_ii','acc_LAsensor_X','acc_LAsensor_Y','acc_LAsensor_Z',
       'gyro_LAsensor_X','gyro_LAsensor_Y','gyro_LAsensor_Z','magn_LAsensor_X','magn_LAsensor_Y','magn_LAsensor_Z',
      ' acc_RLAsensor_X',' acc_RLAsensor_Y',' acc_RLAsensor_Z', 'gyro_RLAsensor_sensor_X',' gyro_RLAsensor_sensor_Y',
      ' gyro_RLAsensor_sensor_Z','magn_RLAsensor_X','magn_RLAsensor_Y','magn_RLAsensor_Z','classes', 'subject')
# File location
path = r"C:/Users/val-c/Machine Learning_Deep Learning/MHEALTHDATASET-csv/*.csv" # get files from the introduced subjects, .csv
data_files = glob.glob(path)
# Get list of files from path
data_frames = []
for file in data_files:
    movement_activity_data = pd.read_csv(file,header=None, names=headers)
    data_frames.append( movement_activity_data)

In [None]:
# Merge the folder files and display data
movement_activity_data = pd.concat(data_frames, ignore_index=True)
movement_activity_data.head() #display first-five rows

In [None]:
# Check for missing values
movement_activity_data.isna().sum(axis=0).to_frame('Total')

In [None]:
# Data info of the dataframe
movement_activity_data.info()

In [None]:
# Display class labels in the training data column
movement_activity_data.classes.unique()

In [None]:
# Plot the number of samples present in each class_label using a Bar plot.
class_label = ['Null','Standing still','Sitting and relaxing','Lying down','Walking', 'Climbing stairs',
                       'Waist bends forward', 'Frontal elevation of arms','Knees bending (crouching)','Cycling',
                      'Jogging','Running','Jump front & back']
plt.figure(figsize=(8, 7), dpi=80) # figure sizing
num_classes = len(class_label)
plt.bar(range(num_classes), movement_activity_data["classes"].value_counts(), color='k', linewidth=0.1) #plot data
# Replace the numerical x-axis ticks with class labels
plt.xticks(range(num_classes), class_label, rotation=45, ha='right')
# the x-axis and y-axis labels, title
plt.xlabel('\nClasses', fontsize=12)
plt.ylabel('\nCount', fontsize=12)
plt.title("\nNumber of samples by class", fontsize=14)
plt.show() #display plot

In [None]:
# Examining the class label imbalance
counts = np.bincount(movement_activity_data["classes"])
neg , pos = counts[0], counts[1]
total = neg + pos
print('samples:\nTotal: {}\nPositive: {} ({:.2f}% of total)\n'.format(total, pos, 100 * pos / total))

In [None]:
# Downsampling null_classes_0 observation to account for the imbalance distribution
def downsample_data(movement_activity_data):
    mask = movement_activity_data['classes'] == 0
    HAR_data_classes0 = movement_activity_data[mask].sample(n=30720, random_state=1)
    HAR_data_classes_else = movement_activity_data[~mask]
    return pd.concat([HAR_data_classes0, HAR_data_classes_else], ignore_index=True)
movement_data = downsample_data(movement_activity_data)
# display the statistical balance
movement_data.classes.value_counts()

In [None]:
# Display the rows and columns after balancing the distribution
print('rows, cols :',movement_data.shape)

In [None]:
# Feature aggregation of the ECG measurements by creating a new column of the average of ecg_i and ii called 'Mean_ecg'
movement_data.insert(5, 'Mean_ecg', (movement_data['ecg_i'] + movement_data['ecg_ii']) / 2)
# Display the DataFrame
movement_data.head(n=2) # display first-two rows

In [None]:
# Display the statistics of the dataframe
movement_data.describe().T

# Filtering the Training data

In [None]:
# Columns of the dataframe to apply low pass filtering, > exclude the columns for classes & subject
cols=['acc_CHsensor_X','acc_CHsensor_Y','acc_CHsensor_Z','ecg_i','ecg_ii','Mean_ecg','acc_LAsensor_X','acc_LAsensor_Y','acc_LAsensor_Z',
       'gyro_LAsensor_X','gyro_LAsensor_Y','gyro_LAsensor_Z','magn_LAsensor_X','magn_LAsensor_Y','magn_LAsensor_Z',
      ' acc_RLAsensor_X',' acc_RLAsensor_Y',' acc_RLAsensor_Z', 'gyro_RLAsensor_sensor_X',' gyro_RLAsensor_sensor_Y',
      ' gyro_RLAsensor_sensor_Z','magn_RLAsensor_X','magn_RLAsensor_Y','magn_RLAsensor_Z']
# filtering the sensor signal from noise
# low pass filtering > remove high frequency noise
## Defining the low-pass filter parameters
cutoff_freq = 20 # hz
fs = 50  # hz
nyquist_freq = 0.5 * fs
n_order = 3 #filter_order
# Designing the Butterworth filter
b, a = butter(n_order , cutoff_freq/nyquist_freq, btype='low')
# Applying the filter to the selected data_columns
filtered_data = filtfilt(b, a,movement_data[cols].values, axis=0)# applying low_pass filtering to columns with filtfilt
#assign the filtered data to the dataframe
filtered_movement_data= pd.DataFrame(filtered_data, columns=cols) # put the filtered data to df
filtered_movement_data[['classes','subject']] = movement_data[['classes','subject']] # assign back the class labels and subject
# Display the filtered DataFrame
#filtered_movement_data.head() # display first_five rows

# Feature selection for training

In [None]:
# Drop other columns and leave 'ecg_i','ecg_ii and classes
move_data=filtered_movement_data.drop(['acc_CHsensor_X','acc_CHsensor_Y','acc_CHsensor_Z', 'ecg_i',
                                       'ecg_ii','acc_LAsensor_X','acc_LAsensor_Y',
                                       'acc_LAsensor_Z', 'gyro_LAsensor_X','gyro_LAsensor_Y','gyro_LAsensor_Z',
                                       'magn_LAsensor_X','magn_LAsensor_Y','magn_LAsensor_Z',' acc_RLAsensor_X',
                                       ' acc_RLAsensor_Y',' acc_RLAsensor_Z', 'gyro_RLAsensor_sensor_X',
                                       ' gyro_RLAsensor_sensor_Y',' gyro_RLAsensor_sensor_Z','magn_RLAsensor_X',
                                       'magn_RLAsensor_Y','magn_RLAsensor_Z','subject'],axis=1)

In [None]:
# Feature and label extraction from the dataframe
Feature =move_data.copy() #feature
Label = Feature.pop('classes') # label

In [None]:
# Display column of the feature (input variable), i.e 'Mean_ecg' will be used as the feature variable for training
Feature.columns

# Normalize the training data

In [None]:
# StandardScaler to normalize the input variables (train set)
scaler = StandardScaler()
# Normalize the training input
X_train_normalized = scaler.fit_transform(Feature)

# Data segmentation

In [None]:
# Creating a function for the sliding_window  ( this function would be applied to the training set database and testing set movesense data)
#num_time_step: which specifies the length of each segment
#step_size: which specifies the step size used to slide the window over the data
def create_sequences(X, y, num_time_step, stepsize=1): # X:input y: output/target
    segment, label = [],[]
    for i in range(0,len(X) - num_time_step+1, stepsize):
        x = X[i:i + num_time_step]
        labels = y[i + num_time_step-1]
        segment.append(x)
        label.append(labels)
    return np.array(segment), np.array(label)

# Creating training set

In [None]:
# Creating the training sequence by applying the above create_sequences function
X_train_seq,y_train_seq = create_sequences(X_train_normalized, Label, num_time_step=100, stepsize=50)
print (X_train_seq.shape, y_train_seq.shape)

In [None]:
# Extract window_length,number of features, number of classes to be used in the input & ouput of the neural network
window_length,num_features,num_classes= X_train_seq.shape[1], X_train_seq.shape[2],len(np.unique(y_train_seq))
print(window_length,num_features,num_classes)

# Configure the neural network

In [None]:
# Configuring >> CNN-1D_LSTM network architecture
input_shape=(window_length,num_features)
model= keras.Sequential([
# Input layer
     layers.Input(input_shape), #input
     layers.Conv1D(filters=64, kernel_size=3, padding='same'),# CNN-1D layer 1
     layers.BatchNormalization(),# batch_normalization
     layers.Activation('relu'), # activation function

# Hidden Conv Layer
     layers.Conv1D(filters=64, kernel_size=3, padding='same'), # CNN-1D layer 2
     layers.BatchNormalization(), # batch_normalization
     layers.Activation('relu'),# activation function

# Recurrent LSTM Layers
     layers.LSTM(units=128, return_sequences=True), # LSTM layer 1
     layers.Activation('relu'), # activation function
     layers.LSTM(units=128,return_sequences=False), # LSTM layer 2
     layers.Activation('relu'), # activation function

# Dense layers
     layers.Dense(128,activation="relu"), # Dense layer 1
# Activation function for the output 'softmax' for the Multi-classification
     layers.Dense(num_classes, activation='softmax')]) # ouput of dense layer

# Summary of the model configuration
model.summary(line_length=None,
              positions=None,
              print_fn=None,
              expand_nested=False,
              show_trainable=False,
              layer_range=None,
             )

# Compile the model

In [None]:
# Compiling the model
initial_learning_rate =0.001
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=10,
    decay_rate=0.9) #decay_rate/factor
opt = keras.optimizers.RMSprop(learning_rate=lr_schedule) # optimizer
model.compile(optimizer=opt, loss= keras.losses.SparseCategoricalCrossentropy(), metrics=["sparse_categorical_accuracy"],)

# Train the model

In [None]:
# Training of the subsets for 10 iterations
history=model.fit(X_train_seq, y_train_seq,validation_split=0.1,epochs=10,batch_size=64)

In [None]:
# View the training history in pandas df
train_history= pd.DataFrame(history.history)
train_history #display history

In [None]:
# Training progress / session visualization
# Training accuracy plotting
plt.figure(figsize=(11, 8), dpi=80)
plt.subplot(1, 2, 1)
plt.plot(history.history['sparse_categorical_accuracy'],c='g') #plot accuracy
plt.plot(history.history['val_sparse_categorical_accuracy'],c='g', linestyle='--' ) #plot val_acc
plt.title('Training Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['accuracy', 'val_accuracy'])
# Training loss plotting
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'],c='r') #plot loss
plt.plot(history.history['val_loss'],c='r',linestyle='--') #plot val_loss
plt.title('Training Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['loss','val_loss'])
# Display plot
plt.show()

# Evaluate the training session

In [None]:
# Training session evaluation of the model
loss, accuracy =model.evaluate(X_train_seq,y_train_seq,verbose=0)
print(f'Train loss: {loss:.4f}')
print(f'\nTrain accuracy: {accuracy:.4f}')

In [None]:
# Prediction on the train_data to get training classifications
classify_train_data_ecg =model.predict(X_train_seq,verbose=0)
classify_train_data_ecg= np.argmax(classify_train_data_ecg, axis=1)

In [None]:
# Create confusion matrix for the classifications on the train set
cm = confusion_matrix(y_train_seq, classify_train_data_ecg)
class_labels  = ['Null','Standing still','Sitting and relaxing','Lying down','Walking', 'Climbing stairs',
                       'Waist bends forward', 'Frontal elevation of arms','Knees bending (crouching)', 'Cycling',
                      'Jogging','Running','Jump front & back' ] #'null',
# Plot the confusion matrix for the classification
plt.figure(figsize=(11, 9),dpi=75)
sns.heatmap(cm,fmt="d",annot=True, cmap='GnBu', xticklabels=class_labels, yticklabels=class_labels,linewidths = 0.1)
plt.title('\nConfusion Matrix : Classification-training', fontsize=14, fontweight='bold')  #title
plt.xlabel('\nPredicted Activities', fontsize=12, fontweight='bold') #x-axis label
plt.ylabel('\nActual Activities', fontsize=12, fontweight='bold') #y-axis label
#display cm plot
plt.show()

# Load  the testing data

In [None]:
# ecg files.......get the files from the resampled ecg folder
# files folder_location
folder_path_ecg ='C:/Users/val-c/Desktop/movesense_lblled_rsp_ecg'
# Get the list of all files in the folder_location
file_list = os.listdir(folder_path_ecg)
# Loop through the list of files and read them into pandas
dataframes= []
for file_name in file_list:
    if file_name.endswith(".csv"):  # files in csv format
        file_path = os.path.join(folder_path_ecg, file_name)
        df = pd.read_csv(file_path)
        dataframes.append(df)
# Combine all the data_frame into one_df
combined_df_ecg = pd.concat(dataframes, ignore_index=True)
combined_df_ecg.head(n=5)# display dataframe

In [None]:
# Convert float to integer_value for attibute > 'classes'
combined_df_ecg['classes'] = combined_df_ecg['classes'].astype('int')
print(combined_df_ecg.info())

In [None]:
# Checking for missing values on the test data
combined_df_ecg.isna().sum()

In [None]:
# Display data info of the files
combined_df_ecg.info()

In [None]:
# Display class_labels present in the dataframe
combined_df_ecg.classes.unique()

In [None]:
# Examining if class imbalance exist on the test data
## display nos of value in each class_label
combined_df_ecg["classes"].value_counts().sort_values()

In [None]:
# Statistics of the dataframe
combined_df_ecg.describe().T

In [None]:
# Conversion of the ecg sample to mV
## defining vaiables below:
peak_to_peak_volt = 60  # dynamic range/full-scale input voltage in mV
digital_resolution = 15  # resolution
full_scale_digital_value = 2 ** digital_resolution  # number of steps in the digital output
full_scale_input_volt = peak_to_peak_volt / 2  # average voltage level in mV
digital_output_value = combined_df_ecg['sample']  # digital output values of ECG measurements
combined_df_ecg.insert(1, 'sample_mV', (digital_output_value / full_scale_digital_value) * full_scale_input_volt) #put the new field to index position
# Display new df
combined_df_ecg.head() # display first-five rows

In [None]:
# Drop the 'sample' column
combined_df_ecg=combined_df_ecg.drop(['sample'],axis=1)

# Visualize ECG activities of the Mhealth and Movesense datasets

- Visualization plot without filtering

In [None]:
# Performed activities with the ecg sensor >>
# To be used for 1 min/20x plot
activity_map_ecg = {1:'Standing still (1 min)',2:'Sitting and relaxing (1 min)',3:'Lying down (1 min)',
                    4:'Walking (1 min)', 5: 'Climbing stairs (1 min)',6:'Waist bends forward (20x)',
                    7:'Frontal elevation of arms (20x)',8:'Knees bending (crouching) (20x)',9:'Cycling (1 min)',
                    10:'Jogging (1 min)',11:'Running (1 min)',12:'Jump front & back (20x)'}

# To be used for 100 to 500 samples plot
_activity_map_ecg_ = {1:'Standing still',2:'Sitting and relaxing',3:'Lying down',4:'Walking',
                      5:'Climbing stairs',6:'Waist bends forward',7:'Frontal elevation of arms',
                      8:'Knees bending (crouching)',9:'Cycling',10:'Jogging',11:'Running',
                      12:'Jump front & back'}

In [None]:
# Plot the activities performed (without filtering) for both mhealth and movesense datasets >1min/20x
fig, axs = plt.subplots(len(activity_map_ecg), 2, figsize=(12, 4 * len(activity_map_ecg)))
# Plot the ecg measurement performed activities for the movesense dataset
for i, activity_id in enumerate(activity_map_ecg.keys()):
    axs[i, 1].plot(combined_df_ecg[combined_df_ecg['classes'] == activity_id].reset_index(drop=True)['sample_mV'], c='r', alpha=0.7, label='ECG Sample')
    axs[i, 1].set_xlabel('Sample points', fontsize=9.5)  # X label
    axs[i, 1].set_ylabel('Amplitude (mV)', fontsize=10)  # Y label
    axs[i, 1].set_title(f'{activity_map_ecg[activity_id]} - Movesense Dataset', fontsize=11)  # Title
    axs[i, 1].legend(loc='upper left', fontsize=9)

# plot the ecg 1&2 for a subject chosen from the list of subjects for the activities: 1min/20x
## select a subject from the list of subjects in the experiment
subject = movement_data[movement_data['subject'] == 'subject_f'] #subject_f=subject-6
readings = ['ecg']
for i, activity_id in enumerate(activity_map_ecg.keys()):
    for r in readings:
        if r == 'ecg':
            # Plot the activities
            axs[i, 0].plot(subject[subject['classes'] == activity_id].reset_index(drop=True)[r + '_i'], color='b', alpha=0.7)
            axs[i, 0].plot(subject[subject['classes'] == activity_id].reset_index(drop=True)[r + '_ii'], color='green', alpha=0.7)
            axs[i, 0].set_title(f'{activity_map_ecg[activity_id]} - MHEALTH Dataset', fontsize=11)
            axs[i, 0].set_ylabel('\nAmplitude (mV)', fontsize=10)
            axs[i, 0].set_xlabel('Sample points', fontsize=9.5)
            axs[i, 0].legend(["ECG Lead 1", "ECG Lead 2"], fontsize=9, loc="upper left")
# Display plots
plt.tight_layout()
plt.show()

In [None]:
# Plot the activities performed (without filtering) for both mhealth and movesense datasets >1min/20x
fig, axs = plt.subplots(len(activity_map_ecg), 2, figsize=(12, 4 * len(activity_map_ecg)))
# Plot the ecg measurement performed activities for the movesense dataset
for i, activity_id in enumerate(activity_map_ecg.keys()):
    axs[i, 1].plot(combined_df_ecg[combined_df_ecg['classes'] == activity_id].reset_index(drop=True)['sample_mV'], c='r', alpha=0.7, label='ECG Sample')
    axs[i, 1].set_xlabel('Sample points', fontsize=9.5)  # X label
    axs[i, 1].set_ylabel('Amplitude (mV)', fontsize=10)  # Y label
    axs[i, 1].set_title(f'{activity_map_ecg[activity_id]} - Movesense Dataset', fontsize=11)  # Title
    axs[i, 1].legend(loc='upper left', fontsize=9)

# plot the ecg for a subject chosen from the list of subjects for the activities: 1min/20x for the 'mean_ecg column'
## select a subject from the list of subjects in the experiment
subject = movement_data[movement_data['subject'] == 'subject_f'] #subject_f=subject-6
readings = ['Mean']
for i, activity_id in enumerate(activity_map_ecg.keys()):
    for r in readings:
        if r == 'Mean':
            # Plot the activities
            axs[i, 0].plot(subject[subject['classes'] == activity_id].reset_index(drop=True)[r + '_ecg'], color='purple', alpha=0.7)
            axs[i, 0].set_title(f'{activity_map_ecg[activity_id]} - MHEALTH Dataset', fontsize=11)
            axs[i, 0].set_ylabel('\nAmplitude (mV)', fontsize=10)
            axs[i, 0].set_xlabel('Sample points', fontsize=9.5)
            axs[i, 0].legend(["Mean of ECG Lead 1 & 2"], fontsize=9, loc="upper left")
# Display plots
plt.tight_layout()
plt.show()

In [None]:
# Plot for 100 to 500 samples (without filtering)
fig, axs = plt.subplots(len(_activity_map_ecg_), 2, figsize=(12, 4 * len(_activity_map_ecg_)))

# Plot the ECG activities performed for the Movesense dataset
for i, activity_id in enumerate(_activity_map_ecg_.keys()):
    axs[i, 1].plot(combined_df_ecg[combined_df_ecg['classes'] == activity_id].reset_index(drop=True)['sample_mV'][100:501], c='r', alpha=0.9, label='ECG Sample')
    axs[i, 1].set_xlabel('Sample points', fontsize=9.5)  # X label
    axs[i, 1].set_ylabel('Amplitude (mV)', fontsize=10)  # Y label
    axs[i, 1].set_title(f'{_activity_map_ecg_[activity_id]} - Movesense Dataset', fontsize=11)  # Title
    axs[i, 1].legend(loc='upper left', fontsize=9)

# Select a subject from the list of subjects in the experiment
subject = movement_data[movement_data['subject'] == 'subject_f']  # subject_f=subject-6
readings = ['ecg']

# Plot the ECG activities from Mhealth dataset
for i, activity_id in enumerate(_activity_map_ecg_.keys()):
    for r in readings:
        if r == 'ecg':
            axs[i, 0].plot(subject[subject['classes'] == activity_id].reset_index(drop=True)[r + '_i'][100:501], color='b', alpha=0.7)
            axs[i, 0].plot(subject[subject['classes'] == activity_id].reset_index(drop=True)[r + '_ii'][100:501], color='green', alpha=0.7)
            axs[i, 0].set_title(f'{_activity_map_ecg_[activity_id]} - MHEALTH Dataset', fontsize=11)
            axs[i, 0].set_ylabel('\nAmplitude (mV)', fontsize=10)
            axs[i, 0].set_xlabel('Sample points', fontsize=9.5)
            axs[i, 0].legend(["ECG Lead 1", "ECG Lead 2"], fontsize=9, loc="upper left")

# Display plots
plt.tight_layout()
plt.show()

In [None]:
# Plot for 100 to 500 samples,(without filtering)
fig, axs = plt.subplots(len(_activity_map_ecg_), 2, figsize=(12, 4 * len(_activity_map_ecg_)))
# Plot the ecg activites performed for the movesense dataset
for i, activity_id in enumerate(_activity_map_ecg_.keys()):
    axs[i, 1].plot(combined_df_ecg[combined_df_ecg['classes'] == activity_id].reset_index(drop=True)['sample_mV'][100:501], c='r', alpha=0.7, label='ECG Sample')
    axs[i, 1].set_xlabel('Sample points', fontsize=9.5)  # X label
    axs[i, 1].set_ylabel('Amplitude (mV)', fontsize=10)  # Y label
    axs[i, 1].set_title(f'{_activity_map_ecg_[activity_id]} - Movesense Dataset', fontsize=11)  # Title
    axs[i, 1].legend(loc='upper left', fontsize=9)

# Select a subject from the list of subjects in the experiment for the 'mean_ecg column'
subject = movement_data[movement_data['subject'] == 'subject_f'] #subject_f=subject-6
readings = ['Mean']
for i, activity_id in enumerate(_activity_map_ecg_.keys()):
    for r in readings:
        if r == 'Mean':
            # Plot the ecg activities from Mhealth dataset
            axs[i, 0].plot(subject[subject['classes'] == activity_id].reset_index(drop=True)[r + '_ecg'][100:501], color='purple', alpha=0.7)
            axs[i, 0].set_title(f'{_activity_map_ecg_[activity_id]} - MHEALTH Dataset', fontsize=11)
            axs[i, 0].set_ylabel('\nAmplitude (mV)', fontsize=10)
            axs[i, 0].set_xlabel('Sample points', fontsize=9.5)
            axs[i, 0].legend(["Mean of ECG Lead 1 & 2"], fontsize=9, loc="upper left")

# Display plots
plt.tight_layout()
plt.show()

# Filtering the test data

In [None]:
# Filter the ecg sensor signals from noise
# low pass filtering > remove high frequency/abnormal noise
## Define the low-pass filter parameters
cutoff_freq = 20  # hz
fs = 50  # sampling rate of the ecg_sample, hz
nyquist_freq = 0.5 * fs
n_order = 3
# Designing the Butterworth low-pass filter
b, a = butter(n_order, cutoff_freq/nyquist_freq, btype='low')
# Applying the filter to the ecg data
filterd_data = filtfilt(b, a, combined_df_ecg[['sample_mV']].values, axis=0)
# assign the filtered data to the dataframe
col=['sample_mV'] #column of df
df_filtered_ecg= pd.DataFrame(filterd_data, columns=col) # put the filtered data to df
df_filtered_ecg['classes'] =  combined_df_ecg['classes'] # assign its class labels
# Display the filtered DataFrame
#df_filtered_ecg.head() # display first_five rows

- Visualization plot with filtering

In [None]:
# Plot the activities performed for both mhealth and movesense datasets >1min/20x (with filtering)
fig, axs = plt.subplots(len(activity_map_ecg), 2, figsize=(12, 4 * len(activity_map_ecg)))
# Plot the ecg measurement performed activities for the movesense dataset
for i, activity_id in enumerate(activity_map_ecg.keys()):
    axs[i, 1].plot(df_filtered_ecg[df_filtered_ecg['classes'] == activity_id].reset_index(drop=True)['sample_mV'], c='r', alpha=0.7, label='ECG Sample')
    axs[i, 1].set_xlabel('Sample points', fontsize=9.5)  # X label
    axs[i, 1].set_ylabel('Amplitude (mV)', fontsize=10)  # Y label
    axs[i, 1].set_title(f'{activity_map_ecg[activity_id]} - Movesense Dataset', fontsize=11)  # Title
    axs[i, 1].legend(loc='upper left', fontsize=9)

# plot the ecg 1&2 for a subject chosen from the list of subjects for the activities of 1min/20x
## select a subject from the list of subjects in the experiment
subject = filtered_movement_data[filtered_movement_data['subject'] == 'subject_f'] #subject_f=subject-6
readings = ['ecg']
for i, activity_id in enumerate(activity_map_ecg.keys()):
    for r in readings:
        if r == 'ecg':
            # Plot the activities
            axs[i, 0].plot(subject[subject['classes'] == activity_id].reset_index(drop=True)[r + '_i'], color='b', alpha=0.7)
            axs[i, 0].plot(subject[subject['classes'] == activity_id].reset_index(drop=True)[r + '_ii'], color='green', alpha=0.7)
            axs[i, 0].set_title(f'{activity_map_ecg[activity_id]} - MHEALTH Dataset', fontsize=11)
            axs[i, 0].set_ylabel('\nAmplitude (mV)', fontsize=10)
            axs[i, 0].set_xlabel('Sample points', fontsize=9.5)
            axs[i, 0].legend(["ECG Lead 1", "ECG Lead 2"], fontsize=9, loc="upper left")
# Display plots
plt.tight_layout()
plt.show()

In [None]:
# Plot the activities performed for both mhealth and movesense datasets >1min/20x (with filtering)
fig, axs = plt.subplots(len(activity_map_ecg), 2, figsize=(12, 4 * len(activity_map_ecg)))
# Plot the ecg measurement performed activities for the movesense dataset
for i, activity_id in enumerate(activity_map_ecg.keys()):
    axs[i, 1].plot(df_filtered_ecg[df_filtered_ecg['classes'] == activity_id].reset_index(drop=True)['sample_mV'], c='r', alpha=0.7, label='ECG Sample')
    axs[i, 1].set_xlabel('Sample points', fontsize=9.5)  # X label
    axs[i, 1].set_ylabel('Amplitude (mV)', fontsize=10)  # Y label
    axs[i, 1].set_title(f'{activity_map_ecg[activity_id]} - Movesense Dataset', fontsize=11)  # Title
    axs[i, 1].legend(loc='upper left', fontsize=9)

# plot the ecg for a subject chosen from the list of subjects for the activities of 1min/20x for'mean_ecg column'
## select a subject from the list of subjects in the experiment
subject = filtered_movement_data[filtered_movement_data['subject'] == 'subject_f'] #subject_f=subject-6
readings = ['Mean']
for i, activity_id in enumerate(activity_map_ecg.keys()):
    for r in readings:
        if r == 'Mean':
            # Plot the activities
            axs[i, 0].plot(subject[subject['classes'] == activity_id].reset_index(drop=True)[r + '_ecg'], color='purple', alpha=0.7)
            axs[i, 0].set_title(f'{activity_map_ecg[activity_id]} - MHEALTH Dataset', fontsize=11)
            axs[i, 0].set_ylabel('\nAmplitude (mV)', fontsize=10)
            axs[i, 0].set_xlabel('Sample points', fontsize=9.5)
            axs[i, 0].legend(["Mean of ECG Lead 1 & 2"], fontsize=9, loc="upper left")
# Display plots
plt.tight_layout()
plt.show()

In [None]:
# Plot for 100 to 500 samples (with filtering)
fig, axs = plt.subplots(len(_activity_map_ecg_), 2, figsize=(12, 4 * len(_activity_map_ecg_)))
# Plot the ecg activites performed for the movesense dataset
for i, activity_id in enumerate(_activity_map_ecg_.keys()):
    axs[i, 1].plot(df_filtered_ecg[df_filtered_ecg['classes'] == activity_id].reset_index(drop=True)['sample_mV'][100:501], c='r', alpha=0.7, label='ECG Sample')
    axs[i, 1].set_xlabel('Sample points', fontsize=9.5)  # X label
    axs[i, 1].set_ylabel('Amplitude (mV)', fontsize=10)  # Y label
    axs[i, 1].set_title(f'{_activity_map_ecg_[activity_id]} - Movesense Dataset', fontsize=11)  # Title
    axs[i, 1].legend(loc='upper left', fontsize=9)

# Select a subject from the list of subjects in the experiment
subject =filtered_movement_data[filtered_movement_data['subject'] == 'subject_f'] #subject_f=subject-6
readings = ['ecg']
for i, activity_id in enumerate(_activity_map_ecg_.keys()):
    for r in readings:
        if r == 'ecg':
            # Plot the ecg activities from Mhealth dataset
            axs[i, 0].plot(subject[subject['classes'] == activity_id].reset_index(drop=True)[r + '_i'][100:501], color='b', alpha=0.7)
            axs[i, 0].plot(subject[subject['classes'] == activity_id].reset_index(drop=True)[r + '_ii'][100:501], color='green', alpha=0.7)
            axs[i, 0].set_title(f'{_activity_map_ecg_[activity_id]} - MHEALTH Dataset', fontsize=11)
            axs[i, 0].set_ylabel('\nAmplitude (mV)', fontsize=10)
            axs[i, 0].set_xlabel('Sample points', fontsize=9.5)
            axs[i, 0].legend(["ECG Lead 1", "ECG Lead 2"], fontsize=9, loc="upper left")

# Display plots
plt.tight_layout()
plt.show()

In [None]:
# Plot for 100 to 500 samples (with filtering)
fig, axs = plt.subplots(len(_activity_map_ecg_), 2, figsize=(12, 4 * len(_activity_map_ecg_)))
# Plot the ecg activites performed for the movesense dataset
for i, activity_id in enumerate(_activity_map_ecg_.keys()):
    axs[i, 1].plot(df_filtered_ecg[df_filtered_ecg['classes'] == activity_id].reset_index(drop=True)['sample_mV'][100:501], c='r', alpha=0.7, label='ECG Sample')
    axs[i, 1].set_xlabel('Sample points', fontsize=9.5)  # X label
    axs[i, 1].set_ylabel('Amplitude (mV)', fontsize=10)  # Y label
    axs[i, 1].set_title(f'{_activity_map_ecg_[activity_id]} - Movesense Dataset', fontsize=11)  # Title
    axs[i, 1].legend(loc='upper left', fontsize=9)

# Select a subject from the list of subjects in the experiment for'mean_ecg column'
subject =filtered_movement_data[filtered_movement_data['subject'] == 'subject_f'] #subject_f=subject-6
readings = ['Mean']
for i, activity_id in enumerate(_activity_map_ecg_.keys()):
    for r in readings:
        if r == 'Mean':
            # Plot the ecg activities from Mhealth dataset
            axs[i, 0].plot(subject[subject['classes'] == activity_id].reset_index(drop=True)[r + '_ecg'][100:501], color='purple', alpha=0.7)
            axs[i, 0].set_title(f'{_activity_map_ecg_[activity_id]} - MHEALTH Dataset', fontsize=11)
            axs[i, 0].set_ylabel('\nAmplitude (mV)', fontsize=10)
            axs[i, 0].set_xlabel('Sample points', fontsize=9.5)
            axs[i, 0].legend(["Mean of ECG Lead 1 & 2"], fontsize=9, loc="upper left")

# Display plots
plt.tight_layout()
plt.show()

# Feature selection

In [None]:
# feature and label
feature_ecg=df_filtered_ecg.copy() #feature/input
label_ecg=feature_ecg.pop('classes') #label/output
# display feature and label shape
feature_ecg.shape , label_ecg.shape

# Normalize the test data

In [None]:
# Creating an instance of StandardScaler to normalize the data
scaler = StandardScaler()
# Fit and transform the test data
scaled_feature = scaler.fit_transform(feature_ecg)

# Apply data segmentation function to the Test data

In [None]:
# Creating the test_data sequence
# apply sliding window function
_X_test_seq,_y_test_seq =create_sequences(scaled_feature,label_ecg,num_time_step=100,stepsize=50)
print (_X_test_seq.shape, _y_test_seq.shape)

# Evaluate the model on the Test data

In [None]:
# Model evaluation on the movesense_ecg_data to ascertain model's performance
loss, accuracy =model.evaluate(_X_test_seq,_y_test_seq,verbose=0)
print(f'Test loss: {loss:.4f}')
print(f'\nTest accuracy: {accuracy:.4f}')

In [None]:
# Prediction testing on the test_data to get classification
classify_test_data_ecg =model.predict(_X_test_seq,verbose=0)
classify_test_data_ecg= np.argmax(classify_test_data_ecg, axis=1)

In [None]:
# Create confusion matrix for the classifications > test data
cm = confusion_matrix(_y_test_seq, classify_test_data_ecg)
class_labels  = ['Standing still','Sitting and relaxing','Lying down','Walking', 'Climbing stairs',
                       'Waist bends forward', 'Frontal elevation of arms','Knees bending (crouching)','Cycling',
                      'Jogging','Running','Jump front & back' ] #'null'
# Plot the confusion matrix for the classification
plt.figure(figsize=(12, 8),dpi=75)
sns.heatmap(cm,fmt="d",annot=True, cmap='GnBu', xticklabels=class_labels, yticklabels=class_labels,linewidths = 0.1)
plt.title('\nConfusion Matrix : Classification-testing', fontsize=14, fontweight='bold')  #title
plt.xlabel('\nPredicted Activities', fontsize=12, fontweight='bold') #x-axis label
plt.ylabel('\nActual Activities', fontsize=12, fontweight='bold') #y-axis label
#display cm plot
plt.show()