# IMU Data Classification

In [1]:
import matplotlib.pyplot as plt
from tabulate import tabulate

from helper_filter import *

## Filter data
### Extract data tables

In [2]:
# Read the raw data from each target action and store them in a list
lqw_raw = load_data("./IMU_Data/LGW")
ramp_ascend_raw = load_data("./IMU_Data/Ramp_ascend")
ramp_descend_raw = load_data("./IMU_Data/Ramp_descend")
sit_to_stand_raw = load_data("./IMU_Data/Sit_to_stand")
stand_to_sit_raw = load_data("./IMU_Data/Stand_to_sit")

In [3]:
# Preview some of the data to check format
lqw_raw[0].data.head()

Unnamed: 0,Thigh_R_Timestamp,Thigh_R_Gyroscope_X,Thigh_R_Gyroscope_Y,Thigh_R_Gyroscope_Z,Thigh_R_Accelerometer_X,Thigh_R_Accelerometer_Y,Thigh_R_Accelerometer_Z,Thigh_R_Magnetometer_X,Thigh_R_Magnetometer_Y,Thigh_R_Magnetometer_Z,...,Pelvis_Gyroscope_Y,Pelvis_Gyroscope_Z,Pelvis_Accelerometer_X,Pelvis_Accelerometer_Y,Pelvis_Accelerometer_Z,Pelvis_Magnetometer_X,Pelvis_Magnetometer_Y,Pelvis_Magnetometer_Z,Annotation_Pulse,Annotation_Level
0,241787.1094,0.5345,1.222,-0.8404,-2.1075,9.1086,3.2014,-0.2819,-0.7146,-0.2299,...,0.56,0.0212,-0.1209,9.6397,-1.0211,-0.1688,-0.6494,-0.1636,0.0,0.0
1,241796.875,0.5345,1.222,-0.8404,-2.109,9.0322,3.0518,-0.2819,-0.7223,-0.2321,...,0.56,0.0212,-0.1209,9.7162,-1.0218,-0.1725,-0.6264,-0.1728,0.0,0.0
2,241806.6406,-0.2664,0.2175,-1.1036,-2.1872,9.185,3.2015,-0.28,-0.7049,-0.2148,...,0.56,0.0212,-0.1209,9.7926,-1.0226,-0.1781,-0.6533,-0.1751,0.0,0.0
3,241816.4062,-0.2664,0.2175,-1.1036,-2.1082,9.185,3.1255,-0.2876,-0.7049,-0.2169,...,1.0746,-0.0255,-0.2044,9.7123,-1.4106,-0.1744,-0.6513,-0.1728,0.0,0.0
4,241826.1719,-0.2664,0.2175,-1.1036,-2.0293,9.0322,3.051,-0.2743,-0.6893,-0.2169,...,1.0746,-0.0255,-0.2793,9.7934,-0.9448,-0.1725,-0.6284,-0.1728,0.0,0.0


In [4]:
# Plot histograms to visualize all data
%matplotlib inline
#lqw_raw[0].hist(bins=50,figsize=(30,30))

### Remove unwanted columns

In [5]:
# Check number of columns in each dataframe
data_table = []
head = ["Action","File name", "Column Nr", "Non-standard columns names"]
folders = [lqw_raw, ramp_ascend_raw, ramp_descend_raw, sit_to_stand_raw, stand_to_sit_raw]

for folder in folders:
    for file in folder:
        filtered_columns =[col for col in file.data.columns if 
                           not any(info in col.lower() for info in ["accelerometer", "magnetometer", "gyroscope"])]
        data_table.append([file.folder_name, file.file_name, len(file.data.columns), filtered_columns])

print(tabulate(data_table, headers=head, tablefmt='grid'))

+--------------+---------------------------------------+-------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Action       | File name                             |   Column Nr | Non-standard columns names                                                                                                                                                                                                                                                                                                                                                 |
| LGW          | normal_walk_J_trial_02.dat            |          72 | ['Thigh_R_Timestamp', 'Shank_R_Timestamp', 'Foot_R_Timest

From the table above, we can see how multiple timestamps have been used across different files. It was decided to investigate further whether the timestamps are aligned and can be ignored. It can be seen how, under the LGW some files are missing "Sync" and "Offset" timestamp files, so it was decided to remove all columns that contains them to ensure consistency across the data. Additionally, the LWR from SV is missing the timestamp from the Right sensors and Thigh.

In [15]:
# Drop all columns that contain sync and offset timestamps
for folder in folders:
    for file in folder:
        file.data.drop(columns=[col for col in file.data.columns if 
                                any(info in col for info in ["Sync", "Offset"])], inplace=True)

In [31]:
# Check if all timestamps columns have the same data inside a dataframe and check what is the difference in time between them

for file in folders[0]:
    # Filter columns to get only those containing time
    time_columns = [col for col in file.data.columns if 'timestamp' in col.lower()]

    # Reference column for comparison
    identical_data = pd.Series([True] * len(file.data), index=file.data.index)
    ref_column = file.data[time_columns[0]]

    for col in time_columns[1:]:
        identical_data &= (file.data[col] == ref_column)  # Element-wise AND operation
        print(f"Time difference between {time_columns[0]} and {col}: {file.data[col] - ref_column}")

    are_time_columns_identical = identical_data.all()
    #print(f"Identical data? {are_time_columns_identical}")

Time difference between Thigh_R_Timestamp and Shank_R_Timestamp: 0       0.0
1       0.0
2       0.0
3       0.0
4       0.0
       ... 
1621    0.0
1622    0.0
1623    0.0
1624    0.0
1625    0.0
Length: 1626, dtype: float64
Time difference between Thigh_R_Timestamp and Foot_R_Timestamp: 0       0.0
1       0.0
2       0.0
3       0.0
4       0.0
       ... 
1621    0.0
1622    0.0
1623    0.0
1624    0.0
1625    0.0
Length: 1626, dtype: float64
Time difference between Thigh_R_Timestamp and Thigh_L_Timestamp: 0       0.0
1       0.0
2       0.0
3       0.0
4       0.0
       ... 
1621    0.0
1622    0.0
1623    0.0
1624    0.0
1625    0.0
Length: 1626, dtype: float64
Time difference between Thigh_R_Timestamp and Shank_L_Timestamp: 0       0.0
1       0.0
2       0.0
3       0.0
4       0.0
       ... 
1621    0.0
1622    0.0
1623    0.0
1624    0.0
1625    0.0
Length: 1626, dtype: float64
Time difference between Thigh_R_Timestamp and Foot_L_Timestamp: 0       0.0
1       0.0
2       0

## Preprocess data
### Apply filtering

In [9]:
# TODO 

### Apply the slinding window technique

In [10]:
# TODO

## Train models
### ANN

In [11]:
# TODO

### SVM

In [12]:
# TODO

### CNN

In [13]:
# TODO

### Comparison

In [14]:
# TODO