- Training and Testing with IMU-(Acceleration) measurements (Movesense dataset)


In [None]:
# Importing libraries
import pandas as pd
import seaborn as sns
import numpy as np
import os
import matplotlib.pyplot as plt
%matplotlib inline
from keras import layers
import keras
from keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, confusion_matrix
from scipy.signal import butter, filtfilt
import warnings
warnings.filterwarnings('ignore')

In [None]:
# load the labelled movensense dataset
folder_path_imu ='C:/Users/val-c/Desktop/movesense_lblled_imu'
# Get the list of all files in the folder_location
file_list = os.listdir(folder_path_imu)
# Loop via the list of files & read them into pd
dataframes = []
for file_name in file_list:
    if file_name.endswith(".csv"):  # files in csv format
        file_path = os.path.join(folder_path_imu, file_name)
        df = pd.read_csv(file_path)
        dataframes.append(df)
# Combine all the data_frame into one
movement_data = pd.concat(dataframes, ignore_index=True)
movement_data.head(n=5) # display first-five rows

In [None]:
# Drop the gyroscope measurements from the dataframe
movement_data=movement_data.drop(['gx','gy','gz'],axis=1)

In [None]:
# Display info of the dataframe
movement_data.info()

In [None]:
# Check for missing values
movement_data.isna().sum(axis=0).to_frame('Total')

In [None]:
# Class labels in the dataframe (df)
unique_label = np.unique(movement_data.classes)
class_label = np.sort(unique_label)
print(class_label)

In [None]:
# Plot the number of samples present in each class_label
class_label = ['Standing still','Sitting and relaxing','Lying down','Walking', 'Climbing stairs',
               'Waist bends forward', 'Frontal elevation of arms','Knees bending (crouching)',
               'Cycling','Jogging','Running','Jump front & back']
plt.figure(figsize=(6, 6)) # figure sizing
num_of_classes = len(class_label)
# Creating barplot for the activitiy distribution in the df
sns.barplot(x=class_label, y=movement_data["classes"].value_counts(), color='k', width=0.7)
# Replace the numerical x-axis ticks with class labels
plt.xticks(range(num_of_classes), class_label, rotation=45, ha='right')
# the x-axis and y-axis labels, title
plt.xlabel('\nClasses', fontsize=10)
plt.ylabel('\nCount', fontsize=10)
plt.title("\n Activity distribution of the movesense dataset", fontsize=12)
#display plot
plt.tight_layout()
plt.show()

In [None]:
# Statistics of the dataframe
movement_data.describe().T

In [None]:
# Data filtering
# Columns of the dataframe to apply filtering
cols=['x', 'y', 'z']
# filtering the sensor signal from noise
# low pass filtering > remove high frequency noise
## Defining the low-pass filter parameters
cutoff_freq = 20 # hz
fs = 52  # sampling rate (Hz)
nyquist_freq = 0.5 * fs
n_order = 3 #filter_order
# Designing the Butterworth filter
b, a = butter(n_order , cutoff_freq/nyquist_freq, btype='low')
# Applying the filter to the selected data_columns
filtered_data = filtfilt(b, a,movement_data[cols].values, axis=0)
#assign the filtered data to the dataframe
filtered_movement_data= pd.DataFrame(filtered_data, columns=cols) # put the filtered data to df
filtered_movement_data['classes'] = movement_data['classes'] # assign back the labels
# Display the filtered DataFrame
filtered_movement_data.head() # display first_five rows

In [None]:
# features and class from the dataframe
Features =filtered_movement_data.copy() #features/input
Label = Features.pop('classes') # class_labels/output

In [None]:
# Normalize the input variables
scaler = StandardScaler()
normalized_feature = scaler.fit_transform(Features)

In [None]:
# Data-Segmentation
# Creating a function for the sliding_window
#num_time_step: which specifies the length of each segment
#step_size: which specifies the step size used to slide the window over the data
def create_sequences(X, y, num_time_step, stepsize=1): # X:input y: output/target
    segment, label = [],[]
    for i in range(0,len(X) - num_time_step+1, stepsize):
        x = X[i:i + num_time_step]
        labels = y[i + num_time_step-1]
        segment.append(x)
        label.append(labels)
    return np.array(segment), np.array(label)

In [None]:
# Creating the data sequence
X_seq, y_seq = create_sequences(normalized_feature, Label ,num_time_step=104, stepsize=52)
# print shape of the data
print(X_seq.shape, y_seq.shape)

In [None]:
# Extract window_length,number of features, number of outputs to be used in the input & ouput of the neural network
window_length,num_features,num_outputs= X_seq.shape[1], X_seq.shape[2],to_categorical(y_seq).shape[1]
print(window_length,num_features,num_outputs)

In [None]:
# Perform cross-validation
scores = []
# Creating the k-fold cross-validator
kfold = KFold(n_splits=5, shuffle=True) #k(n_splits)=5 >> total nos of samples/total nos of samples * % to be used as test data
for i, (train, test) in enumerate(kfold.split(X_seq, y_seq)):
    print(f'Fold {i}:')
    print(train.shape)
    print(test.shape)
    # Convert output variables to categorical (one-hot encoding)
    y_train_seq = to_categorical(y_seq[train])
    y_test_seq = to_categorical(y_seq[test])

    # Configuring the CNN-1D_LSTM network architecture
    input_shape = (window_length, num_features)
    model = keras.Sequential([
        # Input layer
        layers.Input(input_shape),
        layers.Conv1D(filters=64, kernel_size=3, padding='same'), # CNN-1D layer 1
        layers.BatchNormalization(), # batch_normalization
        layers.Activation('relu'), # activation function
        # Hidden Conv Layer
        layers.Conv1D(filters=64, kernel_size=3, padding='same'), # CNN-1D layer 2
        layers.BatchNormalization(), # batch_normalization
        layers.Activation('relu'), # activation function
        # Recurrent LSTM Layers
        layers.LSTM(units=128, return_sequences=True), # LSTM layer 1
        layers.Activation('relu'), # activation function
        layers.LSTM(units=128, return_sequences=False), # LSTM layer 2
        layers.Activation('relu'), # activation function
        # Dense layers
        layers.Dense(128, activation="relu"),
        # Activation function for the output layer 'softmax' for Multi-classification
        layers.Dense(num_outputs, activation='softmax')])

    # Compiling the model
    initial_learning_rate = 0.001
    lr_schedule = keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate,
        decay_steps=10,
        decay_rate=0.9)
    opt = keras.optimizers.RMSprop(learning_rate=lr_schedule)#optimizer
    model.compile(loss= keras.losses.CategoricalCrossentropy(), optimizer=opt, metrics=['accuracy'])

    # Train the model
    model.fit(X_seq[train], y_train_seq , validation_data=(X_seq[test], y_test_seq), epochs=10, batch_size=64)

    # Predicting on the testing set
    y_pred = model.predict(X_seq[test])
    # Converting predictions to class_labels
    predctd_label = np.argmax(y_pred, axis=1)
    true_label = y_seq[test]

    # Calculate accuracy score for the current fold
    fold_accuracy = accuracy_score(true_label, predctd_label)
    # Storing the accuracy score for the fold
    scores.append(fold_accuracy)

# Print the accuracy scores for each fold and finally compute the Mean accuracy
print('\n')
print(scores)
print('\n')
avg_acc = np.mean(scores)
print("Mean_Accuracy:", avg_acc)

In [None]:
# Performance of the model on the testing set
## Calculating the mean accuracy in percentage (%)
avg_accuracy_percent=avg_acc * 100
# Print the mean accuracy
print(f"Mean_Accuracy: {avg_accuracy_percent:.1f} %")

In [None]:
# Classes of the the performed activities
class_labels  = ['Standing still','Sitting and relaxing','Lying down','Walking','Climbing stairs',
                 'Waist bends forward', 'Frontal elevation of arms','Knees bending (crouching)','Cycling',
                 'Jogging','Running','Jump front & back']
# Create a confusion matrix >> test set
conf_matrix = confusion_matrix(np.argmax(y_test_seq, axis=1),predctd_label)
# Calculate accuracy percentages for each predicted class labels
class_lbl_accuracy = conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis] * 100
# Plot the confusion matrix with accuracy percentages
plt.figure(figsize=(10, 7), dpi=85)
sns.heatmap(class_lbl_accuracy, annot=True, fmt='.1f', cmap='Purples', xticklabels=class_labels, yticklabels=class_labels, linewidths=0.5) #plot data
plt.title('\nConfusion Matrix : Testing Classification Accuracy (%)', fontsize=13, fontweight='bold')
plt.xlabel('\nPredicted Activities', fontsize=12, fontweight='bold')
plt.ylabel('\nActual Activities', fontsize=12, fontweight='bold')
plt.show() # display plot

- Visualize accelerometer measurements

In [None]:
# Performed activities
activity_acc = {1:'Standing still(1 min)',2:'Sitting and relaxing (1 min)',3:'Lying down (1 min)',4:'Walking (1 min)',
                    5: 'Climbing stairs (1 min)',6:'Waist bends forward (20x)',7:'Frontal elevation of arms (20x)',
                    8:'Knees bending (crouching) (20x)',9:'Cycling', 10:'Jogging (1 min)',11:'Running (1 min)',12:'Jump front & back (20x)'}
# defining axes and figure size
fig, axes = plt.subplots(6,2, figsize=(12,14),dpi=80)
# plot the acceleration measurements for the activities perfromed
for i, ax in zip(activity_acc.keys(), axes.flat):
    ax.plot(filtered_movement_data[filtered_movement_data['classes']==i].reset_index(drop=True)['x'],c='r',alpha=0.9,label='acc. x')
    ax.plot(filtered_movement_data[filtered_movement_data['classes']==i].reset_index(drop=True)['y'],c='b',alpha=0.9,label='acc. y')
    ax.plot(filtered_movement_data[filtered_movement_data['classes']==i].reset_index(drop=True)['z'],c='y',alpha=0.9,label='acc. z')
    ax.set_title(f'\n{activity_acc[i]} - Accelerometer', fontsize=11) # title
    ax.set_xlabel('\nSample points', fontsize=10) #x_label
    ax.set_ylabel('\nAcceleration (m/s^2)', fontsize=10)  #y_label
    ax.legend(loc="upper left", fontsize=9)
# Adjust the layout
plt.tight_layout()
plt.show() #display plots