Training and Testing with MHealth & Movesense IMU (acceleration)

In [None]:
# Importing libraries
import pandas as pd
import seaborn as sns
import numpy as np
import os
import glob
from scipy.signal import butter, filtfilt
import matplotlib.pyplot as plt
%matplotlib inline
from keras import layers
import keras
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import warnings
warnings.filterwarnings('ignore')

# Load the training data

In [None]:
# File directory_path
## Columns of the training dataset: 'subject column' was introduced to extract activities performed by a specific subject
headers=('acc_CHsensor_X','acc_CHsensor_Y','acc_CHsensor_Z','ecg_i','ecg_ii','acc_LAsensor_X','acc_LAsensor_Y','acc_LAsensor_Z',
       'gyro_LAsensor_X','gyro_LAsensor_Y','gyro_LAsensor_Z','magn_LAsensor_X','magn_LAsensor_Y','magn_LAsensor_Z',
      ' acc_RLAsensor_X',' acc_RLAsensor_Y',' acc_RLAsensor_Z', 'gyro_RLAsensor_sensor_X',' gyro_RLAsensor_sensor_Y',
      ' gyro_RLAsensor_sensor_Z','magn_RLAsensor_X','magn_RLAsensor_Y','magn_RLAsensor_Z','classes', 'subject')
# File location
path = r"C:/Users/val-c/Machine Learning_Deep Learning/MHEALTHDATASET-csv/*.csv" # get files from the introduced subjects, .csv
data_files = glob.glob(path)
# Get list of files from path
data_frames = []
for file in data_files:
    movement_activity_data = pd.read_csv(file,header=None, names=headers)
    data_frames.append( movement_activity_data)

In [None]:
# Merge the folder files and display data
movement_activity_data = pd.concat(data_frames, ignore_index=True)
movement_activity_data.head() #display first-five rows

In [None]:
# Check for missing values
movement_activity_data.isna().sum(axis=0).to_frame('Total')

In [None]:
# Data info of the dataframe
movement_activity_data.info()

In [None]:
# Display class labels in the training data column
movement_activity_data.classes.unique()

In [None]:
# Plot the number of samples present in each class_label using a Bar plot.
class_label = ['Null','Standing still','Sitting and relaxing','Lying down','Walking', 'Climbing stairs',
                       'Waist bends forward', 'Frontal elevation of arms','Knees bending (crouching)','Cycling',
                      'Jogging','Running','Jump front & back']
plt.figure(figsize=(8, 6),dpi=80) # figure sizing
num_classes = len(class_label)
plt.bar(range(num_classes), movement_activity_data["classes"].value_counts(), color='k', linewidth=0.1) #plot data
# Replace the numerical x-axis ticks with class labels
plt.xticks(range(num_classes), class_label, rotation=45, ha='right')
# the x-axis and y-axis labels, title
plt.xlabel('\nClasses', fontsize=12)
plt.ylabel('\nCount', fontsize=12)
plt.title("\nNumber of samples by class", fontsize=14)
#display plot
plt.tight_layout()
plt.show()

In [None]:
# Examining the class-label imbalance distribution
counts = np.bincount(movement_activity_data["classes"])
neg , pos = counts[0], counts[1]
total = neg + pos
print('samples:\nTotal: {}\nPositive: {} ({:.2f}% of total)\n'.format(total, pos, 100 * pos / total))

In [None]:
# Downsampling null_classes_0 observation to account for the imbalance distribution
def downsample_data(movement_activity_data):
    mask = movement_activity_data['classes'] == 0
    HAR_data_classes0 = movement_activity_data[mask].sample(n=30720, random_state=1)
    HAR_data_classes_else = movement_activity_data[~mask]
    return pd.concat([HAR_data_classes0, HAR_data_classes_else], ignore_index=True)
movement_data = downsample_data(movement_activity_data)
# display the statistical balance
movement_data.classes.value_counts()

In [None]:
# Plot the number of samples present in each class_label after downsampling the null class.
class_label = ['Null','Standing still','Sitting and relaxing','Lying down','Walking', 'Climbing stairs',
                       'Waist bends forward', 'Frontal elevation of arms','Knees bending (crouching)','Cycling',
                      'Jogging','Running','Jump front & back']
plt.figure(figsize=(8, 6), dpi=80) # figure sizing
num_classes = len(class_label)
sns.barplot(x=class_label, y=movement_data["classes"].value_counts(), color='k', width=0.7) #plot data
# Replace the numerical x-axis ticks with class labels
plt.xticks(range(num_classes), class_label, rotation=45, ha='right')
# the x-axis and y-axis labels, title
plt.xlabel('\nClasses', fontsize=12)
plt.ylabel('\nCount', fontsize=12)
plt.title("\nActivity distribution of the MHealth dataset", fontsize=14)
#display plot
plt.tight_layout()
plt.show()

In [None]:
# Display the rows and columns after balancing the distribution
print('rows, cols :',movement_data.shape)

In [None]:
# Display the statistics of the dataframe
movement_data.describe().T

# Filtering the Training data

In [None]:
# Columns of the dataframe to apply low pass filtering, > exclude the columns for classes & subject
cols=['acc_CHsensor_X','acc_CHsensor_Y','acc_CHsensor_Z','ecg_i','ecg_ii','acc_LAsensor_X','acc_LAsensor_Y','acc_LAsensor_Z',
       'gyro_LAsensor_X','gyro_LAsensor_Y','gyro_LAsensor_Z','magn_LAsensor_X','magn_LAsensor_Y','magn_LAsensor_Z',
      ' acc_RLAsensor_X',' acc_RLAsensor_Y',' acc_RLAsensor_Z', 'gyro_RLAsensor_sensor_X',' gyro_RLAsensor_sensor_Y',
      ' gyro_RLAsensor_sensor_Z','magn_RLAsensor_X','magn_RLAsensor_Y','magn_RLAsensor_Z']
# filtering the sensor signal from noise
# low pass filtering > remove high frequency noise
## Defining the low-pass filter parameters
cutoff_freq = 20 # hz
fs = 50  # hz
nyquist_freq = 0.5 * fs
n_order = 3 #filter_order
# Designing the Butterworth filter
b, a = butter(n_order , cutoff_freq/nyquist_freq, btype='low')
# Applying the filter to the selected data_columns
filtered_data = filtfilt(b, a,movement_data[cols].values, axis=0)
#assign the filtered data to the dataframe
filtered_movement_data= pd.DataFrame(filtered_data, columns=cols) # put the filtered data to df
filtered_movement_data[['classes','subject']] = movement_data[['classes','subject']] # assign back the class labels and subject
# Display the filtered DataFrame
filtered_movement_data.head() # display first_five rows

# Feature selection for training

In [None]:
# Feature selection >>
## Drop other columns and leave acceleration_chest (x,y,z) and classes
move_data=filtered_movement_data.drop(['ecg_i','ecg_ii','acc_LAsensor_X','acc_LAsensor_Y','acc_LAsensor_Z',
       'gyro_LAsensor_X','gyro_LAsensor_Y','gyro_LAsensor_Z','magn_LAsensor_X','magn_LAsensor_Y','magn_LAsensor_Z',
      ' acc_RLAsensor_X',' acc_RLAsensor_Y',' acc_RLAsensor_Z', 'gyro_RLAsensor_sensor_X',' gyro_RLAsensor_sensor_Y',
      ' gyro_RLAsensor_sensor_Z','magn_RLAsensor_X','magn_RLAsensor_Y','magn_RLAsensor_Z','subject'],axis=1)

In [None]:
# Selected features and class from the dataframe
Features =move_data.copy() #features/input
Label = Features.pop('classes') # class_labels/output

In [None]:
# Display columns of the selected features (input variables)
Features.columns

# Normalize the training data

In [None]:
# StandardScaler to normalize the input variables
scaler = StandardScaler()
# Normalize the training input
X_train_normalized = scaler.fit_transform(Features)

# Data segmentation

In [None]:
# Creating a function for the sliding_window  ( this function would be applied to the training set database and testing set movesense data)
#num_time_step: which specifies the length of each segment
#step_size: which specifies the step size used to slide the window over the data
def create_sequences(X, y, num_time_step, stepsize=1): # X:input y: output/target
    segment, label = [],[]
    for i in range(0,len(X) - num_time_step+1, stepsize):
        x = X[i:i + num_time_step]
        labels = y[i + num_time_step-1]
        segment.append(x)
        label.append(labels)
    return np.array(segment), np.array(label)

In [None]:
# Creating the training sequence by applying the above create_sequences function
X_train_seq,y_train_seq = create_sequences(X_train_normalized, Label, num_time_step=100, stepsize=50)
print (X_train_seq.shape, y_train_seq.shape)

In [None]:
# Extract window_length,number of features, number of classes to be used in the input & ouput of the neural network
window_length,num_features,num_classes= X_train_seq.shape[1], X_train_seq.shape[2],len(np.unique(y_train_seq))
print(window_length,num_features,num_classes)

# Configure the neural network

In [None]:
# Configuring >> CNN-1D_LSTM network architecture
input_shape=(window_length,num_features)
model= keras.Sequential([
# Input layer
     layers.Input(input_shape), #input
     layers.Conv1D(filters=64, kernel_size=3, padding='same'),# CNN-1D layer 1
     layers.BatchNormalization(),# batch_normalization
     layers.Activation('relu'), # activation function
# Hidden Conv. Layer
     layers.Conv1D(filters=64, kernel_size=3, padding='same'), # CNN-1D layer 2
     layers.BatchNormalization(), # batch_normalization
     layers.Activation('relu'),# activation function
# Recurrent LSTM Layers
     layers.LSTM(units=128, return_sequences=True), # LSTM layer 1
     layers.Activation('relu'), # activation function
     layers.LSTM(units=128,return_sequences=False), # LSTM layer 2
     layers.Activation('relu'), # activation function
# Dense layers
     layers.Dense(128,activation="relu"), # Dense layer 1
# Activation function for the output 'softmax' for the Multi-classification
     layers.Dense(num_classes, activation='softmax')]) # ouput of dense layer

# Summary of the model configuration
model.summary(line_length=None,
              positions=None,
              print_fn=None,
              expand_nested=False,
              show_trainable=False,
              layer_range=None,
             )

In [None]:
# display block diagram of the network architecture
#tf.keras.utils.plot_model(model, show_shapes=True)

# Compile the model

In [None]:
# Compiling the model
initial_learning_rate =0.001
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=10,
    decay_rate=0.9) #decay_rate/factor
opt = keras.optimizers.RMSprop(learning_rate=lr_schedule) # optimizer
model.compile(optimizer=opt, loss= keras.losses.SparseCategoricalCrossentropy(), metrics=["sparse_categorical_accuracy"],)

# Train the model

In [None]:
# Training of the subsets for 10 iterations/epochs
history=model.fit(X_train_seq, y_train_seq,validation_split=0.1,epochs=10,batch_size=64)

In [None]:
# View the training history in pandas df
train_history= pd.DataFrame(history.history)
train_history #display history

In [None]:
# Training progress / session visualization
# Training accuracy plotting
plt.figure(figsize=(8, 7), dpi=75)
plt.subplot(1, 2, 1)
plt.plot(history.history['sparse_categorical_accuracy'],c='g') #plot accuracy
plt.plot(history.history['val_sparse_categorical_accuracy'],c='g', linestyle='--' ) #plot val_acc
plt.title('Training Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['accuracy', 'val_accuracy'])
# Training loss plotting
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'],c='r') #plot loss
plt.plot(history.history['val_loss'],c='r',linestyle='--') #plot val_loss
plt.title('Training Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['loss','val_loss'])
#display plot
plt.tight_layout()
plt.show()

# Evaluate the training session

In [None]:
# Training session evaluation of the model
loss, accuracy =model.evaluate(X_train_seq, y_train_seq,verbose=0)
print(f'Train loss: {loss:.4f}')
print(f'\nTrain accuracy: {accuracy:.4f}')

In [None]:
# Prediction on the train_data to get training classifications
classify_train_data =model.predict(X_train_seq,verbose=0)
classify_train_data= np.argmax(classify_train_data, axis=1)

In [None]:
# Create confusion matrix for the classifications on the training session
cm = confusion_matrix(y_train_seq, classify_train_data)
class_labels = ['null','Standing still','Sitting and relaxing','Lying down','Walking', 'Climbing stairs',
                       'Waist bends forward', 'Frontal elevation of arms','Knees bending (crouching)', 'Cycling',
                      'Jogging','Running','Jump front & back']
# Plot the confusion matrix for the classification >training
plt.figure(figsize=(12, 8),dpi=80)
sns.heatmap(cm,fmt="d",annot=True, cmap='cividis', xticklabels=class_labels, yticklabels=class_labels,linewidths = 0.1)
plt.title('\nConfusion Matrix : Classification-training', fontsize=14, fontweight='bold')  #title
plt.xlabel('\nPredicted Activities', fontsize=12, fontweight='bold') #x-axis label
plt.ylabel('\nActual Activities', fontsize=12, fontweight='bold') #y-axis label
#display cm plot
plt.show()

# Load the testing data

In [None]:
# File folder_location for the resampled IMU_movesense dataset
folder_path_imu ='C:/Users/val-c/Desktop/movesense_lblled_rsp_imu'
# Get the list of all files in the folder_location
file_list = os.listdir(folder_path_imu)
# Loop via the list of files & read them into pd
dataframes = []
for file_name in file_list:
    if file_name.endswith(".csv"):  # files in csv format
        file_path = os.path.join(folder_path_imu, file_name)
        df = pd.read_csv(file_path)
        dataframes.append(df)
# Combine all the testing data_frame into one
combined_df = pd.concat(dataframes, ignore_index=True)
combined_df.head(n=5) #display first-five rows

In [None]:
# Checking for missing values on the test data
combined_df.isna().sum()

In [None]:
# Display data info of the file
combined_df.info()

In [None]:
# Convert float to integer_value for attibute > 'classes'
combined_df['classes'] = combined_df['classes'].astype('int')

In [None]:
# Show the class_labels in the test data
combined_df.classes.unique()

In [None]:
# Examining if class label imbalance exist on the test data
## display nos of value in each class_label
combined_df["classes"].value_counts().sort_values()

In [None]:
# Drop the gyroscope measurements from the dataframe
combined_df=combined_df.drop(['gx','gy','gz'],axis=1)

In [None]:
# Statistics of the test data dataframe
combined_df.describe().T

# Visualize activities of the Mhealth and Movesense datasets

- Visualization plot without filtering

In [None]:
# Performed activities >>
# To be used for 1 min/20x plot
activity_map_acc = {1:'Standing still (1 min)',2:'Sitting and relaxing (1 min)',3:'Lying down (1 min)',
                    4:'Walking (1 min)', 5: 'Climbing stairs (1 min)',6:'Waist bends forward (20x)',
                    7:'Frontal elevation of arms (20x)',8:'Knees bending (crouching) (20x)',9:'Cycling (1 min)',
                    10:'Jogging (1 min)',11:'Running (1 min)',12:'Jump front & back (20x)'}

# To be used for 100 to 500 samples plot
activity_map_accrl = {1:'Standing still',2:'Sitting and relaxing',3:'Lying down',4:'Walking',
                      5:'Climbing stairs',6:'Waist bends forward',7:'Frontal elevation of arms',
                      8:'Knees bending (crouching)',9:'Cycling',10:'Jogging',11:'Running',
                      12:'Jump front & back'}

In [None]:
# Plot the activities performed (not filtered) for both mhealth and movesense datasets
fig, axs = plt.subplots(len(activity_map_acc), 2, figsize=(12, 4 * len(activity_map_acc)))
# Plot the acceleration measurement performed for the movesense dataset
for i, activity_id in enumerate(activity_map_acc.keys()):
    axs[i, 1].plot(combined_df[combined_df['classes'] == activity_id].reset_index(drop=True)['x'], c='r', alpha=0.7, label='acc. x')
    axs[i, 1].plot(combined_df[combined_df['classes'] == activity_id].reset_index(drop=True)['y'], c='b', alpha=0.7, label='acc. y')
    axs[i, 1].plot(combined_df[combined_df['classes'] == activity_id].reset_index(drop=True)['z'], c='y', alpha=0.7, label='acc. z')
    axs[i, 1].set_xlabel('Sample points', fontsize=9.5)  # X label
    axs[i, 1].set_ylabel('Acceleration (m/s^2)', fontsize=10)  # Y label
    axs[i, 1].set_title(f'{activity_map_acc[activity_id]} - Movesense Dataset', fontsize=11)  # Title
    axs[i, 1].legend(loc='upper left', fontsize=9)

# plot the chest_accleration for a subject chosen from the list of subjects for the activities of 1min/20x
## select a subject from the list of subjects in the experiment
subject = movement_data[movement_data['subject'] == 'subject_f'] #subject_f=subject-6
readings = ['acc']
for i, activity_id in enumerate(activity_map_acc.keys()):
    for r in readings:
        if r == 'acc':
            # Plot the activities
            axs[i, 0].plot(subject[subject['classes'] == activity_id].reset_index(drop=True)[r + '_CHsensor_X'], color='r', alpha=0.7)
            axs[i, 0].plot(subject[subject['classes'] == activity_id].reset_index(drop=True)[r + '_CHsensor_Y'], color='b', alpha=0.7)
            axs[i, 0].plot(subject[subject['classes'] == activity_id].reset_index(drop=True)[r + '_CHsensor_Z'], color='y', alpha=0.7)
            axs[i, 0].set_title(f'{activity_map_acc[activity_id]} - MHEALTH Dataset', fontsize=11)
            axs[i, 0].set_ylabel('\nAcceleration (m/s^2)', fontsize=10)
            axs[i, 0].set_xlabel('Sample points', fontsize=9.5)
            axs[i, 0].legend(["acc. x", "acc. y", "acc. z"], fontsize=9, loc="upper left")
#display plots
plt.tight_layout()
plt.show()

In [None]:
# Also plot the activities performed for samples 100 to 500 (not filtered)
# Plot the acceleration measurements for different activities of the movesense dataset
fig, axs = plt.subplots(len(activity_map_accrl), 2, figsize=(12,4* len(activity_map_accrl))) #, dpi=80
# Creating a plot for the activities to be displayed
for idx, i in enumerate(activity_map_accrl.keys()):
    data_subset = combined_df[combined_df['classes'] == i].reset_index(drop=True)[100:501]  # Select samples 100 to 500
    axs[idx, 1].plot(data_subset['x'], c='r', alpha=0.7, label='acc. x')  # acc.x
    axs[idx, 1].plot(data_subset['y'], c='b', alpha=0.7, label='acc. y')  # acc.y
    axs[idx, 1].plot(data_subset['z'], c='y', alpha=0.7, label='acc. z')  # acc.z
    axs[idx, 1].set_xlabel('Sample points', fontsize=9.5)  # x_label
    axs[idx, 1].set_ylabel('Acceleration (m/s^2)', fontsize=10)  # y_label
    axs[idx, 1].set_title(f'{activity_map_accrl[i]} -  Movesense Dataset', fontsize=11)  # Title
    axs[idx, 1].legend(loc='upper left', fontsize=9)

# Plot acceleration for different activities with Mhealth dataset for samples 100 to 500
# Select a subject from the list of subjects in the experiment
subject = movement_data[movement_data['subject'] == 'subject_f']
readings = ['acc']
for idx, (activity_id, activity_name) in enumerate(activity_map_accrl.items()):
    for r in readings:
        # plot the activities
        if r == 'acc':
            data_subset = subject[subject['classes'] == activity_id].reset_index(drop=True)[100:501]  # Select samples 100 to 500
            axs[idx, 0].plot(data_subset[r + '_CHsensor_X'], color='r', alpha=0.7)  # plot acc.x
            axs[idx, 0].plot(data_subset[r + '_CHsensor_Y'], color='b', alpha=0.7)  # plot acc.y
            axs[idx, 0].plot(data_subset[r + '_CHsensor_Z'], color='y', alpha=0.7)  # plot acc.z

            axs[idx, 0].set_title(f'{activity_name} - MHEALTH Dataset', fontsize=11)  # Title
            axs[idx, 0].set_ylabel('\nAcceleration (m/s^2)', fontsize=10)  # y_label
            axs[idx, 0].set_xlabel('Sample points', fontsize=9.5)  # x_label
            axs[idx, 0].legend(["acc. x", "acc. y", "acc. z"], fontsize=9, loc="upper left")
#display plots
plt.tight_layout()
plt.show()

# Re-order the Movesense acceleration measurements

In [None]:
# Creating a new DataFrame with desired column order and introduce negative signs
new_df_acc = combined_df[['y', 'z', 'x','classes']].copy() # desired column order to match the training features > x,y,z columns
new_df_acc[['y', 'z', 'x']] = -new_df_acc[['y', 'z', 'x']]
# Display the modified DataFrame for the first-five rows
new_df_acc.head()

# Filtering the test data

In [None]:
# Filter the movesense acceleration measurements (re-orderd cols.) from noise
# remove existing high frequency noise
## defining the low-pass filter parameters
cutoff_freq = 20
fs = 50
nyquist_freq = 0.5 * fs
n_order = 3
# Designing the Butterworth filter
b, a = butter(n_order, cutoff_freq/nyquist_freq, btype='low')
# Applying the filter to the accelerometer_measurements
filter_data = filtfilt(b, a, new_df_acc[['y', 'z', 'x']].values, axis=0) # apply filter to columns with filtfilt function
# assign the filtered data to the dataframe
col=['y', 'z', 'x'] #columns of the the re-ordered df
df_filtered_acc= pd.DataFrame(filter_data, columns=col) # put the filtered data to df
df_filtered_acc['classes'] = new_df_acc['classes'] # assign its class labels
# Display the filtered DataFrame
#df_filtered_acc.head() # display first_five rows

- Visualization plot with filtering

In [None]:
# Plot and display peformed activities "aftering filtering has been done" on the datasets (mhealth and movesense measurements)
# applying the filter to the original movesense accelerometer_data (not the re-ordered columns)
filter_data_orl = filtfilt(b, a, combined_df[['x', 'y', 'z']].values, axis=0)
# assigned the filtered data to the dataframe
colmn=['x', 'y', 'z']
orgl_filtered_acc= pd.DataFrame(filter_data_orl, columns=colmn)
orgl_filtered_acc['classes'] = combined_df['classes'] # assign its class labels
# Plot the acceleration measurements for movesense dataset > 1 min/20x
fig, axs = plt.subplots(len(activity_map_acc), 2, figsize=(12, 4 * len(activity_map_acc)))
# Plot activites
for i, activity_id in enumerate(activity_map_acc.keys()):
    axs[i, 1].plot(orgl_filtered_acc[orgl_filtered_acc['classes'] == activity_id].reset_index(drop=True)['x'], c='r', alpha=0.7, label='acc. x')
    axs[i, 1].plot(orgl_filtered_acc[orgl_filtered_acc['classes'] == activity_id].reset_index(drop=True)['y'], c='b', alpha=0.7, label='acc. y')
    axs[i, 1].plot(orgl_filtered_acc[orgl_filtered_acc['classes'] == activity_id].reset_index(drop=True)['z'], c='y', alpha=0.7, label='acc. z')
    axs[i, 1].set_xlabel('Sample points', fontsize=9.5)  # X label
    axs[i, 1].set_ylabel('Acceleration (m/s^2)', fontsize=10)  # Y label
    axs[i, 1].set_title(f'{activity_map_acc[activity_id]} - Movesense Dataset', fontsize=11)  # Title
    axs[i, 1].legend(loc='upper left', fontsize=9)

# Plot the acceleration measurements for Mhealth datasets > 1 min/20x
# select a subject from the list of subjects in the experiment
subject =filtered_movement_data[filtered_movement_data['subject'] == 'subject_f']
readings = ['acc']
for i, activity_id in enumerate(activity_map_acc.keys()):
    for r in readings:
        if r == 'acc':
            # Plot activities for subject_f=subject-6
            axs[i, 0].plot(subject[subject['classes'] == activity_id].reset_index(drop=True)[r + '_CHsensor_X'], color='r', alpha=0.7)
            axs[i, 0].plot(subject[subject['classes'] == activity_id].reset_index(drop=True)[r + '_CHsensor_Y'], color='b', alpha=0.7)
            axs[i, 0].plot(subject[subject['classes'] == activity_id].reset_index(drop=True)[r + '_CHsensor_Z'], color='y', alpha=0.7)
            axs[i, 0].set_title(f'{activity_map_acc[activity_id]} - MHEALTH Dataset', fontsize=11)
            axs[i, 0].set_ylabel('\nAcceleration (m/s^2)', fontsize=10)
            axs[i, 0].set_xlabel('Sample points', fontsize=9.5)
            axs[i, 0].legend(["acc. x", "acc. y", "acc. z"], fontsize=9, loc="upper left")
# Display plots
plt.tight_layout()
plt.show()

In [None]:
# Plot and display peformed activities "after filtering has been done" on the datasets (mhealth and movesense measurements)
# create a plot for the acceleration measurements for the Movesense dataset 100 to 500 samples to the re-ordered columns
fig, axs = plt.subplots(len(activity_map_accrl), 2, figsize=(12,4 * len(activity_map_accrl)))

for idx, i in enumerate(activity_map_accrl.keys()):
    data_subset = df_filtered_acc[df_filtered_acc['classes'] == i].reset_index(drop=True)[100:501]  # select samples 100 to 500
    axs[idx, 1].plot(data_subset['y'], c='r', alpha=0.9, label='acc. y')  # acc.y
    axs[idx, 1].plot(data_subset['z'], c='b', alpha=0.9, label='acc. z')  # acc.z
    axs[idx, 1].plot(data_subset['x'], c='y', alpha=0.9, label='acc. x')  # acc.x
    axs[idx, 1].set_xlabel('Sample points', fontsize=9.5)  # x_label
    axs[idx, 1].set_ylabel('Acceleration (m/s^2)', fontsize=10)  # y_label
    axs[idx, 1].set_title(f'{activity_map_accrl[i]} -  Movesense Dataset', fontsize=11)  # title
    axs[idx, 1].legend(loc='upper left', fontsize=9)

# Select a subject from the list of subjects in the experiment
subject = filtered_movement_data[filtered_movement_data['subject'] == 'subject_f']
readings = ['acc']
# Plot the acceleration measurements for the Mhealth datasets for samples 100 to 500
for idx, (activity_id, activity_name) in enumerate(activity_map_accrl.items()):
    for r in readings:
        # acceleration chest sensor activity
        if r == 'acc':
            data_subset = subject[subject['classes'] == activity_id].reset_index(drop=True)[100:501]  # Select samples 100 to 500
            axs[idx, 0].plot(data_subset[r + '_CHsensor_X'], color='r', alpha=0.9)  # plot acc.x
            axs[idx, 0].plot(data_subset[r + '_CHsensor_Y'], color='b', alpha=0.9)  # plot acc.y
            axs[idx, 0].plot(data_subset[r + '_CHsensor_Z'], color='y', alpha=0.9)  # plot acc.z
            axs[idx, 0].set_title(f'{activity_name} - MHEALTH Dataset', fontsize=11)  # title
            axs[idx, 0].set_ylabel('\nAcceleration (m/s^2)', fontsize=10)  # y_label
            axs[idx, 0].set_xlabel('Sample points', fontsize=9.5)  # x_label
            axs[idx, 0].legend(["acc. x", "acc. y", "acc. z"], fontsize=9, loc="upper left")
# Display plots
plt.tight_layout()
plt.show()

# Feature selection

In [None]:
# Feature selection accelration (y,z,x) and label of the test data
_feature_ = df_filtered_acc.copy() #features /input
_label_ = _feature_.pop('classes') # labels/output
# display feature and label shape
_feature_.shape , _label_.shape

In [None]:
# Display columns of the features (test data)
_feature_.columns

# Normalize the test data

In [None]:
# Creating an instance of StandardScaler to normalize the imu_acc test_data (input)
scaler = StandardScaler()
# Fit and transform the test acceleration data
feature_scaled = scaler.fit_transform(_feature_)

# Apply data segmentation function to the Test data

In [None]:
# Creating the testing_data sequence using the already defined function initially
X_test_seq,y_test_seq = create_sequences(feature_scaled, _label_, num_time_step=100,stepsize=50)
print (X_test_seq.shape, y_test_seq.shape)

# Evaluate the model on the Test data

In [None]:
# Model evaluation on the movesense_test_data to ascertain the trained model's performance
loss, accuracy =model.evaluate(X_test_seq,y_test_seq,verbose=0)
print(f'Test loss: {loss:.4f}')
print(f'\nTest accuracy: {accuracy:.4f}')

In [None]:
# Prediction testing on the test_data to get classification result
classify_test_data =model.predict(X_test_seq,verbose=0)
classify_test_data= np.argmax(classify_test_data, axis=1)

In [None]:
# Create confusion matrix for the classifications > test data
cm = confusion_matrix(y_test_seq, classify_test_data)
class_labels  = ['null','Standing still','Sitting and relaxing','Lying down','Walking', 'Climbing stairs',
                       'Waist bends forward', 'Frontal elevation of arms','Knees bending (crouching)','Cycling',
                      'Jogging','Running','Jump front & back' ]#null
# Plot the confusion matrix for the classification >> test data
plt.figure(figsize=(12, 9),dpi=75)
sns.heatmap(cm,fmt="d",annot=True, cmap='cividis', xticklabels=class_labels, yticklabels=class_labels,linewidths = 0.1)
plt.title('\nConfusion Matrix : Classification-testing', fontsize=14, fontweight='bold')  #title
plt.xlabel('\nPredicted Activities', fontsize=12, fontweight='bold') #x-axis label
plt.ylabel('\nActual Activities', fontsize=12, fontweight='bold') #y-axis label
#display cm plot
plt.show()