# This files' purpose is to Preprocess self-collected data. Includes Trimming, Manual Labelling, Calibrate and Scale values

In [None]:
# Import Libraries

from google.colab import drive
import pandas as pd
import numpy as np
import torch
from torch.optim import Adam
import torch.nn as nn


drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Save self gathered dataset into DataFrame

In [None]:
# Define the path to the CSV file in Google Drive
file_path = "/content/drive/My Drive/PROJECT/data_sheets/sensor_data2.csv"  # Raw values from self-collected data

# Load CSV into DataFrame
df = pd.read_csv(file_path)

# Display the DataFrame
df.head()


Unnamed: 0,RArm_ax,RArm_ay,RArm_az,RArm_gx,RArm_gy,RArm_gz,RArm_mx,RArm_my,RArm_mz,RShank_ax,...,RThigh_mz,LArm_ax,LArm_ay,LArm_az,LArm_gx,LArm_gy,LArm_gz,LArm_mx,LArm_my,LArm_mz
0,15984,-1492,3184,-270,122,-15,-5,-109,9,16572,...,100,12308,-3056,10940,-2381,743,441,17,-67,147
1,15996,-1776,3236,-125,144,-54,-7,-105,9,16672,...,107,12372,-3356,10408,-2948,603,243,19,-65,147
2,16144,-1720,3076,-1,189,-30,-6,-114,6,16608,...,98,12248,-3648,10332,-2813,288,-19,24,-64,148
3,16064,-1580,3128,-3,227,-27,-9,-109,9,16540,...,105,12364,-3980,10200,-2476,-91,-143,18,-68,152
4,16112,-1656,3248,-276,221,-96,-8,-104,12,16644,...,103,12528,-3852,10104,-2116,-373,-200,20,-62,150


In [None]:
print(f"Test Data shape: {df.shape}")

Test Data shape: (14768, 54)


Trim dataset in beginning and end, as no activities in first and last seconds of the dataset (These seconds spent on starting and terminating the session).

In [None]:
df_trim = df.iloc[58:-85]
print(df_trim.shape, " - trimmed shape")

(14625, 54)  - trimmed shape


# Create label set for my dataset

17 sets of excercises were performed. Each of the sets consisted of 5 exercises. Each sets' length was recorded in seconds (with possible errors of 0.1 seconds). I will assume that length of each exercise was the same, therefore I will divide each set into equal exercise lengths within that set.

In [None]:
import statistics

# Lengths of each set in seconds
set_to_sec = [14.4, 14.8, 16.1, 16.5, 18.6, 16.9, 13.9, 16.8, 14.6, 17.4, 21, 22.2, 21.4, 16.4, 15.6, 19, 16.9]

label_num_in_seq = [4, 21, 10, 22, 19]  # Acivity labels in correct order: Jumps, hand claps, trunk twist, hand crossing, arm elevation
lens = []  # Lengths of each exercise
labels = []  # y_test

#
for sec in set_to_sec:
  len_of_exercise = sec*1000/5  # Convert set to milliseconds and divide on 5
  lens.append(len_of_exercise)
  frames_per_exercise = len_of_exercise / 20  # 20 = milliseconds in 1 frame
  # Add labels for each activity in the set in order
  for label in label_num_in_seq:
    for i in range(int(frames_per_exercise)):
      labels.append(label)

print(f"Mean length of exercise is: {statistics.mean(lens)/1000} seconds")

Mean length of exercise is: 3.441176470588235 seconds


In [None]:
# Add labels to dataframe
df_trim["labels"] = labels

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_trim["labels"] = labels


Calibration and Scaling factors for each sensor

list goes in order of: [X_Acc_Offset, Y_Acc_Offset, Z_Acc_Offset, X_Gyr_Offset, Y_Gyr_Offset, Z_Gyr_Offset, X_Mag_Offset, Y_Mag_Offset, Z_Mag_Offset, X_Mag_Scale, Y_Mag_Scale, Z_Mag_Scale]

In [None]:
calibration_bases = {"Back" : [-2182.9, -1486.6, -16023.6, -359.6, -207.7, 233.4, -32.0, 71.0, -34.5, 48.0, 49.0, 38.5],
                     "RThigh": [-2663.2, -461.2, 18270.2, 22.6, 145.3, 9.6, -34.0, -27.5, 158.5, 43.0, 42.0, 23.5],
                     "LArm": [-977.2, -1529.1, 17148.9, -221.9, -79.2, 68.1, -20.5, 34.0, 127.5, 48.5, 62.0, 31.5],
                     "RArm": [-2677.8, -518.6, 16873.4, -72.1, 102.4, -2.8, -37.5, 52.0, 71.0, 42.5, 44.0, 26.0],
                     "RShank": [1269.5, -348.9, 16164.3, -2143.2, 1893.4, -143.2, 15.5, -32.0, 114.0, 47.5, 46.0, 41.0],
                     "LShank": [-375.4, -753.7, 16413.5, -87.1, 84.7, 160.8, -22.5, 126.0, 118.0, 49.5, 50.0, 26.0]
                     }

#sensors_in_order = ["RArm", "RShank", "LShank", "Back", "RThigh", "LArm"]
sensors_in_order = ["LArm", "Back", "RThigh", "RShank", "LShank", "RArm"]

Sensor Ordering: 1 -RLA;  3 - BACK; 5 - LLA; 6 - RC; 7 - RT; 9 - LC
Sensors to Remove: 2, 4, 8

In [None]:
def convert_raw_to_real(sensor_name, df):
    df2 = pd.DataFrame()
    df2[sensor_name + "_ax"] = ( df[sensor_name + "_ax"] - calibration_bases[sensor_name][0] ) / 16384
    df2[sensor_name + "_ay"] = ( df[sensor_name + "_ay"] - calibration_bases[sensor_name][1] ) / 16384
    df2[sensor_name + "_az"] = ( df[sensor_name + "_az"] - calibration_bases[sensor_name][2] ) / 16384

    df2[sensor_name + "_gx"] = ( df[sensor_name + "_gx"] - calibration_bases[sensor_name][3] ) / 131
    df2[sensor_name + "_gy"] = ( df[sensor_name + "_gy"] - calibration_bases[sensor_name][4] ) / 131
    df2[sensor_name + "_gz"] = ( df[sensor_name + "_gz"] - calibration_bases[sensor_name][5] ) / 131

    df2[sensor_name + "_mx"] = ( df[sensor_name + "_mx"] - calibration_bases[sensor_name][6] ) / calibration_bases[sensor_name][9]
    df2[sensor_name + "_my"] = ( df[sensor_name + "_my"] - calibration_bases[sensor_name][7] ) / calibration_bases[sensor_name][10]
    df2[sensor_name + "_mz"] = ( df[sensor_name + "_mz"] - calibration_bases[sensor_name][8] ) / calibration_bases[sensor_name][11]

    return df2

#Save processed values in DataFrame

In [None]:
df_trim2 = pd.DataFrame()
for sensor in sensors_in_order:
  df_c = convert_raw_to_real(sensor, df_trim)
  print(df_c.shape)
  df_trim2 = pd.concat([df_trim2, df_c], axis=1)


(14625, 9)
(14625, 9)
(14625, 9)
(14625, 9)
(14625, 9)
(14625, 9)


#Add label set in DataFrame

In [None]:
df_trim2["labels"] = labels
df_trim2[:10]

Unnamed: 0,LArm_ax,LArm_ay,LArm_az,LArm_gx,LArm_gy,LArm_gz,LArm_mx,LArm_my,LArm_mz,Back_ax,...,RArm_ax,RArm_ay,RArm_az,RArm_gx,RArm_gy,RArm_gz,RArm_mx,RArm_my,RArm_mz,labels
58,2.059583,-0.744562,-0.604547,135.510687,-9.036641,-40.932061,0.443299,-2.080645,-1.412698,1.562189,...,1.595813,0.3422,-1.60116,30.458779,-23.514504,-38.459542,-1.023529,-2.840909,-5.307692,4
59,2.059583,-0.751154,-0.649957,126.083206,22.665649,-67.183969,0.443299,-2.016129,-1.222222,1.867609,...,2.163379,0.114172,-1.599695,134.038931,-5.461069,-21.818321,-0.929412,-2.75,-5.461538,4
60,2.059583,-0.412531,-0.713434,82.693893,9.589313,-54.725954,0.257732,-2.032258,-1.31746,2.133173,...,1.930774,0.375403,-2.003015,70.88626,9.974046,-15.741985,-0.6,-2.75,-5.307692,4
61,2.059583,-0.104181,-0.780084,17.167176,11.963359,8.083206,0.195876,-2.048387,-1.222222,2.133173,...,1.384387,0.123694,-2.239832,-9.594656,22.775573,-13.635115,-0.764706,-2.681818,-5.346154,4
62,2.059583,0.153387,-0.674615,-50.527481,4.520611,47.670992,0.154639,-2.112903,-1.222222,2.133173,...,1.363391,0.297766,-2.363611,-48.022137,60.042748,-6.207634,-0.717647,-2.954545,-5.346154,4
63,2.059583,-0.166437,-0.658014,-75.580916,-6.181679,30.739695,0.319588,-2.016129,-1.15873,2.133173,...,1.774768,0.436438,-2.304529,-50.396183,99.340458,0.326718,-0.835294,-2.886364,-5.153846,4
64,2.059583,-0.559503,-0.528619,-28.199237,-14.044275,-15.199237,0.319588,-1.887097,-1.412698,2.133173,...,2.163379,0.317297,-2.252527,1.435878,135.050382,22.39542,-0.788235,-2.931818,-5.0,4
65,2.059583,-0.580499,-0.489557,27.098473,-18.01374,-32.88626,0.298969,-2.064516,-1.380952,2.133173,...,2.163379,-8.5e-05,-1.943445,61.985496,166.287023,53.937405,-0.811765,-3.045455,-4.807692,4
66,1.732495,-0.431329,-0.535211,25.396183,-16.349618,-30.932061,0.402062,-2.048387,-1.539683,1.854181,...,2.163379,-0.331873,-1.15072,97.145802,192.00458,74.196947,-0.788235,-3.25,-4.769231,4
67,1.127026,-0.293878,-0.61651,5.167176,-9.952672,-8.535115,0.175258,-2.064516,-1.31746,1.41546,...,2.058704,-0.482507,-0.318201,140.283206,213.951145,73.975573,-0.505882,-3.386364,-4.538462,4


In [None]:
print(df_trim2.shape)

(14625, 55)


#Save .csv File

In [None]:
df_trim2.to_csv("sensor_data_updated3.csv")