In [1]:
import pandas as pd
import os

In [2]:
data_dir = "./processed/imu/"
print(os.listdir(data_dir))

['participant_01.csv', 'participant_02.csv', 'participant_03.csv', 'participant_04.csv', 'participant_05.csv', 'participant_06.csv', 'participant_07.csv', 'participant_08.csv', 'participant_09.csv', 'participant_10.csv', 'participant_11.csv', 'participant_12.csv', 'participant_13.csv', 'participant_14.csv']


# **Data Preparation**

In [27]:
participant = 14
df = pd.read_csv(data_dir+ os.listdir(data_dir)[participant-1])
print(os.listdir(data_dir)[participant-1])
print(df.head(10))

participant_14.csv
   Timestamp   Sensor         QuatI     QuatJ     QuatK       QuatSum
0      0.000        0 -4.329637e-17 -0.707107  0.707107 -4.329637e-17
1      0.001        1 -5.000000e-01 -0.500000  0.500000 -5.000000e-01
2      0.001        2 -5.000000e-01 -0.500000  0.500000 -5.000000e-01
3      0.001        3 -5.000000e-01 -0.500000  0.500000 -5.000000e-01
4      0.001        4 -5.000000e-01 -0.500000  0.500000 -5.000000e-01
5      0.002        5 -5.000000e-01 -0.500000  0.500000 -5.000000e-01
6      0.034        0 -4.329637e-17 -0.707107  0.707107 -4.329637e-17
7      0.034        1 -5.000000e-01 -0.500000  0.500000 -5.000000e-01
8      0.035        2 -5.000000e-01 -0.500000  0.500000 -5.000000e-01
9      0.035        3 -5.000000e-01 -0.500000  0.500000 -5.000000e-01


In [None]:
#Extracting Sensor 0 timestamps
sensor_0_rows = df[df[' Sensor'] == 0].reset_index(drop=True)

#Iterate over Sensor 0 rows and collect the next five sensor readings
aligned_data = []

for idx, row in sensor_0_rows.iterrows():
    timestamp = row['Timestamp']
    
    #Collect the next five sensors(assuming they appear sequentially)
    sensor_group = df[df['Timestamp'] > timestamp].head(5).reset_index(drop=True)
    
    if len(sensor_group) == 5:  #Ensure all sensors exist before aligning
        aligned_row = {
            'Timestamp': timestamp,  #Preserve Sensor 0 timestamp
            'QuatI_0': row['QuatI'], 'QuatJ_0': row['QuatJ'], 'QuatK_0': row['QuatK'], 'QuatSum_0': row['QuatSum']
        }
        
        for i in range(5):  #Assign sensors 1 to 5 in the same row
            aligned_row.update({
                f'QuatI_{i+1}': sensor_group.loc[i, 'QuatI'],
                f'QuatJ_{i+1}': sensor_group.loc[i, 'QuatJ'],
                f'QuatK_{i+1}': sensor_group.loc[i, 'QuatK'],
                f'QuatSum_{i+1}': sensor_group.loc[i, 'QuatSum']
            })
        
        aligned_data.append(aligned_row)

#Convert aligned data into a new DataFrame
aligned_df = pd.DataFrame(aligned_data)

# Display the final structured dataframe
aligned_df.head(10)

## **Data Augmentation**

## **Ensuring Data Integrity**

In [21]:
dummy = pd.DataFrame()
for i in range(6):  # Check for all six sensors
    dummy[f'Quaternion_Norm_{i}'] = (
        aligned_df[f'QuatI_{i}']**2 +
        aligned_df[f'QuatJ_{i}']**2 +
        aligned_df[f'QuatK_{i}']**2 +
        aligned_df[f'QuatSum_{i}']**2
    ) ** 0.5

print("Quaternion norms:\n", dummy[[f'Quaternion_Norm_{i}' for i in range(6)]].describe())

Quaternion norms:
        Quaternion_Norm_0  Quaternion_Norm_1  Quaternion_Norm_2  \
count             7977.0             7977.0             7977.0   
mean                 1.0                1.0                1.0   
std                  0.0                0.0                0.0   
min                  1.0                1.0                1.0   
25%                  1.0                1.0                1.0   
50%                  1.0                1.0                1.0   
75%                  1.0                1.0                1.0   
max                  1.0                1.0                1.0   

       Quaternion_Norm_3  Quaternion_Norm_4  Quaternion_Norm_5  
count             7977.0             7977.0             7977.0  
mean                 1.0                1.0                1.0  
std                  0.0                0.0                0.0  
min                  1.0                1.0                1.0  
25%                  1.0                1.0                1.

In [22]:
expected_sensors = ['QuatI_0', 'QuatI_1', 'QuatI_2', 'QuatI_3', 'QuatI_4', 'QuatI_5']
missing_sensors = [sensor for sensor in expected_sensors if sensor not in aligned_df.columns]

if missing_sensors:
    print(f"Warning! Missing sensor columns: {missing_sensors}")
else:
    print("Sensor order is correctly preserved.")

Sensor order is correctly preserved.


In [23]:
missing_values = aligned_df.isnull().sum()
print("Missing Values Per Column:\n", missing_values)

Missing Values Per Column:
 Timestamp    0
QuatI_0      0
QuatJ_0      0
QuatK_0      0
QuatSum_0    0
QuatI_1      0
QuatJ_1      0
QuatK_1      0
QuatSum_1    0
QuatI_2      0
QuatJ_2      0
QuatK_2      0
QuatSum_2    0
QuatI_3      0
QuatJ_3      0
QuatK_3      0
QuatSum_3    0
QuatI_4      0
QuatJ_4      0
QuatK_4      0
QuatSum_4    0
QuatI_5      0
QuatJ_5      0
QuatK_5      0
QuatSum_5    0
dtype: int64


### **Annotating data**

In [24]:
activity_datadir = "../activity_timestamps/"
os.listdir(activity_datadir)

['participant_01.csv',
 'participant_02.csv',
 'participant_03.csv',
 'participant_04.csv',
 'participant_05.csv',
 'participant_06.csv',
 'participant_07.csv',
 'participant_08.csv',
 'participant_09.csv',
 'participant_10.csv',
 'participant_11.csv',
 'participant_12.csv',
 'participant_13.csv',
 'participant_14.csv']

In [25]:
activity_df = pd.read_csv(activity_datadir+os.listdir(activity_datadir)[participant-1])
print(os.listdir(activity_datadir)[participant-1])
activity_df.head()

participant_13.csv


Unnamed: 0,Activity,Timestamp
0,pouring water,5.070043
1,pointing finger,50.375619
2,thumbs up,58.205179
3,writing,62.371104
4,using controller,116.814917


In [26]:
aligned_df['Timestamp'] = pd.to_numeric(aligned_df['Timestamp'])  # Ensure numeric timestamps

activity_df['Timestamp'] = pd.to_numeric(activity_df['Timestamp'])  # Ensure numeric timestamps

aligned_df = aligned_df.sort_values(by='Timestamp').reset_index(drop=True)
activity_df = activity_df.sort_values(by='Timestamp').reset_index(drop=True)

# Function to find the nearest activity timestamp
def find_nearest_activity(imu_time, activity_df):
    nearest_row = activity_df[activity_df['Timestamp'] <= imu_time].tail(1)  # Find the closest previous timestamp
    return nearest_row['Activity'].values[0] if not nearest_row.empty else "Unknown"

# Apply annotation by finding the closest activity timestamp for each IMU entry
aligned_df['Activity'] = aligned_df['Timestamp'].apply(lambda t: find_nearest_activity(t, activity_df))

# Save the annotated IMU data
aligned_df.to_csv(f"./processed/annotated/participant_{participant}.csv", index=False)

#Display the first few rows of the annotated dataset
print(aligned_df.head())

   Timestamp       QuatI_0   QuatJ_0   QuatK_0     QuatSum_0  QuatI_1  \
0      0.000 -4.329637e-17 -0.707107  0.707107 -4.329637e-17     -0.5   
1      0.031 -4.329637e-17 -0.707107  0.707107 -4.329637e-17     -0.5   
2      0.066 -4.329637e-17 -0.707107  0.707107 -4.329637e-17     -0.5   
3      0.100 -4.329637e-17 -0.707107  0.707107 -4.329637e-17     -0.5   
4      0.131 -4.329637e-17 -0.707107  0.707107 -4.329637e-17     -0.5   

   QuatJ_1  QuatK_1  QuatSum_1  QuatI_2  ...     QuatSum_3       QuatI_4  \
0     -0.5      0.5       -0.5     -0.5  ... -4.329637e-17 -5.000000e-01   
1     -0.5      0.5       -0.5     -0.5  ... -4.329637e-17 -5.000000e-01   
2     -0.5      0.5       -0.5     -0.5  ... -5.000000e-01 -4.329637e-17   
3     -0.5      0.5       -0.5     -0.5  ... -5.000000e-01 -4.329637e-17   
4     -0.5      0.5       -0.5     -0.5  ... -5.000000e-01 -4.329637e-17   

    QuatJ_4   QuatK_4     QuatSum_4  QuatI_5  QuatJ_5  QuatK_5  QuatSum_5  \
0 -0.500000  0.500000 -5.00