# IMU Data Classification

In [1]:
# Statistics
import pandas as pd
import numpy as np

# Visualisation
import matplotlib.pyplot as plt
from tabulate import tabulate
import seaborn as sns
import warnings

# Data processing
from sklearn.impute import KNNImputer

# Machine learning
from sklearn.model_selection import train_test_split

# Helper functions
from helper.helper_filter import *
from helper.helper_preprocess import *
from helper.helper_train import *

## Filter data

In [2]:
# Read the raw data from each target action and store them in a list
lqw_raw = load_data("./IMU_Data/LGW")
ramp_ascend_raw = load_data("./IMU_Data/Ramp_ascend")
ramp_descend_raw = load_data("./IMU_Data/Ramp_descend")
sit_to_stand_raw = load_data("./IMU_Data/Sit_to_stand")
stand_to_sit_raw = load_data("./IMU_Data/Stand_to_sit")

folders = [lqw_raw, ramp_ascend_raw, ramp_descend_raw, sit_to_stand_raw, stand_to_sit_raw]

In [3]:
# Drop all columns that contain sync, annotations and offset timestamps
for folder in folders:
    for file in folder:
        # Drop all columns that contain sync, annotations and offset timestamps
        file.data_filtered.drop(columns=[col for col in file.data_filtered.columns if 
                                any(info in col.lower() for info in ["sync", "offset", "annotation"])], inplace=True)
        
        # Drop all timestamp columns that are not "Shank_L_Timestamp"
        for column in file.data_filtered.columns:
            if "timestamp" in column.lower():
                if column.lower() != "shank_l_timestamp":
                    file.data_filtered.drop(columns=column, inplace=True)
        
        # Replace column name and place as the first index 
        file.data_filtered.rename(columns={'Shank_L_Timestamp': 'Timestamp'}, inplace=True)
        col = file.data_filtered.pop('Timestamp')
        file.data_filtered.insert(0, col.name, col)

In [4]:
# Replace NaN values with the k-Nearest Neighbor
for folder in folders:
    for file in folder:
        if file.data_filtered.isnull().sum().sum() > 0:
            imputer = KNNImputer(n_neighbors=5)
            file.data_filtered = pd.DataFrame(imputer.fit_transform(file.data_filtered), 
                                              columns = file.data_filtered.columns)
            
# Check if any NaN values are left
for folder in folders:
    for file in folder:
        if file.data_filtered.isnull().sum().sum() > 0: 
            print("NaN values left")

## Preprocess

In [5]:
# Remove outliers and smooth curve using a low pass filter
for folder in folders:
    for file in folder:
        # Extract sampling time
        ts = file.data_filtered["Timestamp"].diff().median() # Median sampling time

        # Remove outliers
        for name, data in file.data_filtered.items():
            if name != 'Timestamp':
                data = low_pass_filter(ts, data)

In [6]:
# Apply the slinding window technique
tw = 350        # window size
dt = 50         # window step

# Apply the moving average filter to the data and get all features
for folder in folders:
    for file in folder:
        # Apply the slinding window to the data
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=pd.errors.PerformanceWarning)
            file.data_processed = generate_features(file.data_filtered, tw, dt)

        # Drop first row where the gradient is 0
        file.data_processed = file.data_processed.iloc[1:]

In [7]:
# Combine all five actions into one dataframe and set the target labels using one-hot encoding 
iterator = 1
all_df = []

for folder in folders:    
    # Create single dataframe for action
    df = pd.DataFrame()
    df = pd.concat([file.data_processed for file in folder[:2]])

    # Add target labels
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=pd.errors.PerformanceWarning)
        df["Action"] = iterator
        iterator = iterator + 1
    
    # Add dataframe to the list
    all_df.append(df)

# Combine all dataframes into one
df = pd.concat(all_df)

In [None]:
df.to_csv("combined_data.csv", index=False)

## Train models

In [8]:
# Split data
X = df.iloc[:, :-1]     # Input features
y = df.iloc[:, -1:]     # Target labels

# Split data into training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=109) # 70% training and 30% test

In [None]:
# ANN
ann = ANN(X_train, y_train, X_test, y_test)
ann.run_pipeline()
ann.evaluate()

In [None]:
# SVM
svm = SVM(X_train, y_train, X_test, y_test)
svm.run_pipeline()
svm.evaluate()

In [None]:
# CNN
import keras
from keras import Sequential
from keras.layers import (Conv1D, Dense, Flatten)

def create_model(X_tra, y_tra, X_tes, y_tes):
    n_timesteps, n_features, n_outputs = 1003, 441, 6

    # Init model
    model = Sequential()

    # Add layers
    model.add(Conv1D(filters=32, kernel_size=2, activation='relu', input_shape=(n_features, 1), padding='same'))
    model.add(Flatten())   # 1D array
    model.add(Dense(n_outputs, activation='softmax'))    
    model.compile(loss="sparse_categorical_crossentropy", optimizer='adam', metrics=['accuracy'])

    history = model.fit(X_tra, y_tra, epochs=10, batch_size=32, verbose=2, validation_data=(X_tes, y_tes))
    return model, history

model, hist = create_model(X_train.to_numpy().reshape(-1, 441, 1), y_train.to_numpy(), X_test.to_numpy().reshape(-1, 441, 1), y_test.to_numpy())
model.summary()

## Comparison

In [None]:
# 15 most relevant features ANN
ann_features, ann_scores = ann.get_most_relevant_features(X_train.columns,
                                                          X_train.values,
                                                          y_train.values)

# Plot
plt.barh(ann_features, ann_scores)
plt.xlabel("Permutation Importance")

In [None]:
# 15 most relevant features SVM
svm_features, svm_scores = svm.get_most_relevant_features(X_train.columns,
                                                          X_train.values,
                                                          y_train.values)

# Plot 
plt.barh(svm_features, svm_scores)
plt.xlabel("Permutation Importance")

In [None]:
# Separate data based on the segment
sensors = ["Foot_L", "Foot_R", "Thigh_L", "Thigh_R", "Shank_L", "Shank_L", "Pelvis"]
sensors_data = {}

# Get all columns that contain the sensor name
for sensor in sensors:
    columns = [col for col in df.columns if (sensor in col or "Action" in col)]
    sensors_data[sensor] = df[columns]

In [None]:
# Get the highest accuracy 
models = []

# Get the accuracy of the ANN on each separate data
for key, data in sensors_data.items():
    # Split data
    X = data.iloc[:, :-1]     # Input features
    y = data.iloc[:, -1:]     # Target labels

    # Split data into training (70%) and testing set (30%)
    X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.7, random_state=42)

    # Create network and get accuracy
    sensor_ann = ANN(X_tr, X_te, y_tr, y_te)
    sensor_ann.run_pipeline()

    # Add model to the list
    models.append(ModelToCompare(key , sensor_ann.get_accuracy(), sensor_ann, X_tr, X_te, y_tr, y_te))

# Check highest accuacy
max_acc_model = max(models, key=lambda item: item.accuracy) 
max_acc_model.model.evaluate()

In [None]:
# Using only the most relevant feature, calculate the clasification error using ANN
print(f"Classification error of the ANN using {max_acc_model.sensor}: {max_acc_model.model.get_classification_error()}")

In [None]:
# Using only the most relevant feature, calculate the clasification error using SVM
sensor = sensors_data[max_acc_model.sensor]

# Split data
X = sensor.iloc[:, :-1]     # Input features
y = sensor.iloc[:, -1:]     # Target labels

# Split data into training (70%) and testing set (30%)
X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.7, random_state=42)

max_svm_model = SVM(X_tr, X_te, y_tr, y_te)
max_svm_model.run_pipeline()
print(f"Accuracy of the SVM using {max_acc_model.sensor}: {max_svm_model.get_accuracy()}")
print(f"Classification error of the SVM using {max_acc_model.sensor}: {max_svm_model.get_classification_error()}")
max_svm_model.evaluate()