Training and Testing with stat-consult sensor measurements (IMU-ECG-Pressure)

In [None]:
# Importing libraries
import pandas as pd
import seaborn as sns
import numpy as np
import os
import matplotlib.pyplot as plt
%matplotlib inline
from keras import layers
import keras
from keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, confusion_matrix
import warnings
warnings.filterwarnings('ignore')

In [None]:
# File folder_location for the dataset
folder_path = 'C:/Users/val-c/Desktop/synchronised_measurements'
# Get the list of all files in the folder_location
file_list = os.listdir(folder_path)
# Loop via the list of files & read them into pd
dataframes = []
for file_name in file_list:
    if file_name.endswith(".xlsx"):  # respective activity files in excel format
        file_path = os.path.join(folder_path, file_name)
        df = pd.read_excel(file_path)
        dataframes.append(df)
# Combine all the files into a single dataframe
combined_df = pd.concat(dataframes, ignore_index=True)
combined_df.head(n=8)  # display selected nos of rows

In [None]:
#Save the single_dataframe to a csv-file >> (built_homogenous_dataset)
#combined_df.to_csv('built_homogenous_dataset.csv', index=False)

In [None]:
# Display info of the dataframe
combined_df.info()

In [None]:
# Convert "ECG-Data" column from integer to float
combined_df["ECG-Data"] = combined_df["ECG-Data"].astype(float)

In [None]:
# Class labels in the dataframe (df)
unique_label = np.unique(combined_df.classes)
class_label = np.sort(unique_label)
print(class_label)

In [None]:
# Checking for missing values in the df
combined_df.isna().sum(axis=0).to_frame('Total')

In [None]:
# Plot the number of samples present in each class_label
class_label = ['Standing still','Sitting and relaxing','Lying down','Walking', 'Climbing stairs',
               'Waist bends forward', 'Frontal elevation of arms','Knees bending (crouching)', 'cycling',
              'Jogging','Running','Jump front & back'] # ,
plt.figure(figsize=(7, 6)) # figure sizing
num_of_classes = len(class_label)
# Creating barplot for the activitiy distribution in the df
sns.barplot(x=class_label, y=combined_df["classes"].value_counts(), color='olive', width=0.55)
# Replace the numerical x-axis ticks with class labels
plt.xticks(range(num_of_classes), class_label, rotation=45, ha='right')
# the x_axis & y_axis labels, title
plt.xlabel('\nClasses', fontsize=10)
plt.ylabel('\nCount', fontsize=10)
plt.title("\n Activity distribution of the stat-consult dataset", fontsize=12)
#display plot
plt.tight_layout()
plt.show()

- Visualizing the measured data

In [None]:
# Performed activities
activity_dict = {1: 'Standing still (1 min)',2: 'Sitting and relaxing (1 min)', 3: 'Lying down (1 min)', 4: 'Walking (1 min)',
    5: 'Climbing stairs (1 min)',6: 'Waist bends forward (20x)',7: 'Frontal elevation of arms (20x)',8: 'Knees bending (crouching) (20x)',
    9: 'Cycling (1 min)',10: 'Jogging (1 min)',11: 'Running (1 min)', 12: 'Jump front & back (20x)'}
# Determining the number of rows and columns for subplots
nos_row = len(activity_dict)
nos_col = 5  # (accelerometer, gyroscope, magnetometer, ECG, and pressure)
# Setting the subplots
fig, axs = plt.subplots(nos_row, nos_col, figsize=(23, 5 * nos_row)) #(dpi=90)
for i, activity_id in enumerate(activity_dict.keys()):
    # Plot accelerometer measurements for the performed activities
    axs[i, 0].plot(combined_df[combined_df['classes'] == activity_id].reset_index(drop=True)['a0X_LSM6DSL'], color='r', alpha=0.9)
    axs[i, 0].plot(combined_df[combined_df['classes'] == activity_id].reset_index(drop=True)['a0Y_LSM6DSL'], color='b', alpha=0.9)
    axs[i, 0].plot(combined_df[combined_df['classes'] == activity_id].reset_index(drop=True)['a0Z_LSM6DSL'], color='y', alpha=0.9)
    axs[i, 0].set_title(f'{activity_dict[activity_id]} - Accelerometer', fontsize=13)
    axs[i, 0].set_ylabel('Acceleration (m/s^2)', fontsize=12)
    axs[i, 0].set_xlabel('Sample points', fontsize=11)
    axs[i, 0].legend(["acc. x", "acc. y", "acc. z"], fontsize=9, loc="upper left")
    # Plot gyroscope measurements for the performed activities
    axs[i, 1].plot(combined_df[combined_df['classes'] == activity_id].reset_index(drop=True)['g0X_LSM6DSL'], c='r', alpha=0.9)
    axs[i, 1].plot(combined_df[combined_df['classes'] == activity_id].reset_index(drop=True)['g0Y_LSM6DSL'], c='b', alpha=0.9)
    axs[i, 1].plot(combined_df[combined_df['classes'] == activity_id].reset_index(drop=True)['g0Z_LSM6DSL'], c='y', alpha=0.9)
    axs[i, 1].set_title(f'{activity_dict[activity_id]} - Gyroscope', fontsize=13)
    axs[i, 1].set_ylabel('Rotation (rad/s)', fontsize=12)
    axs[i, 1].set_xlabel('Sample points', fontsize=11)
    axs[i, 1].legend(["gyro. x", "gyro. y", "gyro. z"], fontsize=9, loc="upper left")
    # Plot magnetometer measurements for the performed activities
    axs[i, 2].plot(combined_df[combined_df['classes'] == activity_id].reset_index(drop=True)['m0X_LSM303AH'], c='r', alpha=0.9)
    axs[i, 2].plot(combined_df[combined_df['classes'] == activity_id].reset_index(drop=True)['m0Y_LSM303AH'], c='b', alpha=0.9)
    axs[i, 2].plot(combined_df[combined_df['classes'] == activity_id].reset_index(drop=True)['m0Z_LSM303AH'], c='y', alpha=0.9)
    axs[i, 2].set_title(f'{activity_dict[activity_id]} - Magnetometer', fontsize=13)
    axs[i, 2].set_ylabel('Magnetic Field (mT)', fontsize=12)
    axs[i, 2].set_xlabel('Sample points', fontsize=11)
    axs[i, 2].legend(["mag. x", "mag. y", "mag. z"], fontsize=9, loc="upper left")
    # Plot ECG measurements for the performed activities
    axs[i, 3].plot(combined_df[combined_df['classes'] == activity_id].reset_index(drop=True)['ECG-Data'], c='purple', alpha=0.9)
    axs[i, 3].set_title(f'{activity_dict[activity_id]} - ECG', fontsize=13)
    axs[i, 3].set_ylabel('Amplitude (v)', fontsize=12)
    axs[i, 3].set_xlabel('Sample points', fontsize=11)
    axs[i, 3].legend(["ECG"], fontsize=9, loc="upper left")
    # Plot Pressure measurements for the performed activities
    axs[i, 4].plot(combined_df[combined_df['classes'] == activity_id].reset_index(drop=True)['Pres'], c='g', alpha=0.9)
    axs[i, 4].set_title(f'{activity_dict[activity_id]} - Pressure', fontsize=13)
    axs[i, 4].set_ylabel('atm (hPa)', fontsize=12)
    axs[i, 4].set_xlabel('Sample points', fontsize=11)
    axs[i, 4].legend(["air-pressure"], fontsize=9, loc="upper left")
# Adjust the layout
plt.tight_layout()
plt.show()# Display plots

In [None]:
# Features and class from the dataframe
Features =combined_df.copy() #features/input
Label = Features.pop('classes') # class/output/target

In [None]:
# Normalization of the features/input variables
scaler = StandardScaler()
normalized_feature = scaler.fit_transform(Features)

In [None]:
# Data-Segmentation
# Creating function for the sliding-window
#num_time_step: specifies the length of each segment/sequence
#stepsize: specifies the step-size used to slide the window over the data
def create_sequences(X, y, num_time_step, stepsize=1): # X:input y: output/target
    segment, label = [],[]
    for i in range(0,len(X) - num_time_step+1, stepsize):
        x = X[i:i + num_time_step]
        labels = y[i + num_time_step-1]
        segment.append(x)
        label.append(labels)
    return np.array(segment), np.array(label)

In [None]:
# Creating the data sequence
X_seq, y_seq = create_sequences(normalized_feature, Label ,num_time_step=390, stepsize=195)
# print shape of the data
print(X_seq.shape, y_seq.shape)

In [None]:
# Extract window_length,number of features, number of outputs to be used in the input & ouput of the neural network
window_length,num_features,num_outputs= X_seq.shape[1], X_seq.shape[2],to_categorical(y_seq).shape[1]
print(window_length,num_features,num_outputs)

In [None]:
# Perform cross-validation
scores = []
# Creating the k-fold cross-validator
kfold = KFold(n_splits=5, shuffle=True) # K=(n_splits)=5 >> total nos of samples/total nos of samples * % to be used as test data
for i, (train, test) in enumerate(kfold.split(X_seq, y_seq)):
    print(f'Fold {i}:')
    print(train.shape)
    print(test.shape)
    # Convert output variables to categorical (one-hot encoding)
    y_train_seq = to_categorical(y_seq[train])
    y_test_seq = to_categorical(y_seq[test])

    # Configuring the CNN-1D_LSTM network architecture
    input_shape = (window_length, num_features)
    model = keras.Sequential([
        # Input layer
        layers.Input(input_shape),
        layers.Conv1D(filters=64, kernel_size=3, padding='same'), # CNN-1D layer-1
        layers.BatchNormalization(), # batch_normalization
        layers.Activation('relu'), # activation function
        # Hidden Conv Layer
        layers.Conv1D(filters=64, kernel_size=3, padding='same'), # CNN-1D layer-2
        layers.BatchNormalization(), # batch_normalization
        layers.Activation('relu'), # activation function
        # Recurrent LSTM Layers
        layers.LSTM(units=128, return_sequences=True), # LSTM layer-1
        layers.Activation('relu'), # activation function
        layers.LSTM(units=128, return_sequences=False), # LSTM layer-2
        layers.Activation('relu'), # activation function
        # Dense layers
        layers.Dense(128, activation="relu"),
        # Activation function for the output layer 'softmax' for Multi-classification
        layers.Dense(num_outputs, activation='softmax')])

    # Compiling the model
    initial_learning_rate = 0.001
    lr_schedule = keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate,
        decay_steps=10,
        decay_rate=0.9)
    opt = keras.optimizers.RMSprop(learning_rate=lr_schedule)#optimizer
    model.compile(loss=keras.losses.CategoricalCrossentropy(), optimizer=opt, metrics=['accuracy'])

    # Training the model
    model.fit(X_seq[train], y_train_seq , validation_data=(X_seq[test], y_test_seq), epochs=10, batch_size=64)

    # Predicting on the testing set
    y_pred = model.predict(X_seq[test])
    # Converting predictions to class_labels
    predctd_label = np.argmax(y_pred, axis=1)
    true_label = y_seq[test]

    # Calculate accuracy score for the current fold
    fold_accuracy = accuracy_score(true_label, predctd_label)
    # Storing the accuracy score for the fold
    scores.append(fold_accuracy)

# Print the accuracy scores for each fold and finally compute the Mean accuracy
print('\n')
print(scores)
print('\n')
avg_acc = np.mean(scores)
print("Mean_Accuracy:", avg_acc)

In [None]:
# Performance of the model on the testing set
avg_accuracy_percent=avg_acc * 100
# Print the mean accuracy
print(f"Mean_Accuracy: {avg_accuracy_percent:.1f} %")

In [None]:
# Classes of the the performed activities
class_labels  = ['Standing still','Sitting and relaxing','Lying down','Walking','Climbing stairs',
                 'Waist bends forward', 'Frontal elevation of arms','Knees bending (crouching)','Cycling',
                 'Jogging','Running','Jump front & back']
# Create a confusion matrix >> test set
conf_matrix = confusion_matrix(np.argmax(y_test_seq, axis=1),predctd_label)
# Calculate accuracy percentages for each predicted activity labels
class_lbl_accuracy = conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis] * 100
# Plot the confusion matrix with accuracy percentages for the predictions
plt.figure(figsize=(8, 6))
sns.heatmap(class_lbl_accuracy, annot=True, fmt='.1f', cmap='OrRd', xticklabels=class_labels, yticklabels=class_labels, linewidths=0.5) #plot data
plt.title('\nConfusion Matrix : Testing Classification Accuracy (%)', fontsize=11, fontweight='bold')
plt.xlabel('\nPredicted Activities', fontsize=11, fontweight='bold')
plt.ylabel('\nActual Activities', fontsize=11, fontweight='bold')
plt.show() # display plot