In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import os

from datetime import datetime, timedelta

In [33]:
### indicate the file paths
files = {
    'f1': ['RawData/20240712_Trials/Extract data from database/data/download/S7_20240712_rawDataLeft.txt', 'RawData/20240712_Trials/Extract data from database/data/download/S7_20240712_rawDataRight.txt'],
    'f2': ['RawData/20240712_Trials/Extract data from database/data/download/S8_20240712_rawDataLeft.txt', 'RawData/20240712_Trials/Extract data from database/data/download/S8_20240712_rawDataRight.txt'],
    'f3': ['RawData/20240712_Trials/Extract data from database/data/download/S9_20240712_rawDataLeft.txt', 'RawData/20240712_Trials/Extract data from database/data/download/S9_20240712_rawDataRight.txt'],
    'f4': ['RawData/20240712_Trials/Extract data from database/data/download/S10_20240712_rawDataLeft.txt', 'RawData/20240712_Trials/Extract data from database/data/download/S10_20240712_rawDataRight.txt'],
    }

### indicate date of the activity
date = datetime(2024, 7, 12)

### participants
participants = {
    'id': ['P1',        'P2',       'P3',       'P4',       'P5',       'P6',       'P7',       'P8',       'P9',       'P10',      'P11',  'P12'],
    'name': ['Syawal',  'Salihin',  'Luther',   'Ilyas',    'Bryan',    'Benyamin', 'Ridwan',   'Asrudin',  'Sean',     'Shawn',    'Afiq', 'STAR'],
    'weight': [77.4,    60.9,       69.2,       67.85,      63.8,       68.9,],
    'shoesize': [9,     8,          10,         9,          9,          9.5,        8,          7.5,        10.5,       9.5,        9.5,    10.5]
}


In [30]:
# load files
def load_file(path):
    df = pd.read_csv(path, sep=',', header=None)
    if df.shape[1] == 26:
        df = df.iloc[:, :-2]
    df.columns = [
        'index_no', 'raw_1', 'raw_2', 'raw_3', 'raw_4', 'raw_5', 'raw_6', 'raw_7',
        'raw_8', 'raw_9', 'raw_10', 'raw_11', 'raw_12', 'raw_13','raw_14',
        'acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z', 'sess_no', 'Timestamp', 'distance_ph'
    ]
    df.drop(columns=['index_no', 'distance_ph'], inplace=True)
    return df

In [31]:
def resample(df):

    ms = []
    timestamps = df['Timestamp'].unique()
    # create timestamp for each reading within a second
    for second in timestamps:
        data = df[df['Timestamp'] == second]
        instances = len(data)
        ms_interval = 1000 // instances         # 1000 milliseconds in one second
        for i in range(instances):
            instance = datetime.combine(date.date(), (datetime.strptime(str(second), '%H%M%S') + timedelta(milliseconds=ms_interval*i)).time())
            ms.append(instance)
    # resample timeseries to 10ms
    df['ms'] = ms
    df = df.set_index('ms')
    df = df.resample('10ms').ffill()
    df = df.reset_index()
    return df.drop(columns=['Timestamp'])

In [32]:
def merge_lr(left, right):
    df = pd.merge(left, right, on='ms', how='inner', suffixes=('_left', '_right'))
    df['session'] = df['sess_no_left']
    df.set_index('session', inplace=True)
    df.reset_index(inplace=True)
    return df.drop(columns=['sess_no_left', 'sess_no_right'])

In [6]:
# left = resample(load_file(files['f1'][0]))
# right = resample(load_file(files['f1'][1]))

# df = merge_lr(left, right)
# df

In [34]:
### load other dataframes if applicable

df1 = merge_lr(resample(load_file(files['f1'][0])), resample(load_file(files['f1'][1])))
df2 = merge_lr(resample(load_file(files['f2'][0])), resample(load_file(files['f2'][1])))
df3 = merge_lr(resample(load_file(files['f3'][0])), resample(load_file(files['f3'][1])))
df4 = merge_lr(resample(load_file(files['f4'][0])), resample(load_file(files['f4'][1])))
# df5 = merge_lr(resample(load_file(files['f5'][0])), resample(load_file(files['f5'][1])))
# df6 = merge_lr(resample(load_file(files['f6'][0])), resample(load_file(files['f6'][1])))
# df7 = merge_lr(resample(load_file(files['f7'][0])), resample(load_file(files['f7'][1])))
df1

Unnamed: 0,session,ms,raw_1_left,raw_2_left,raw_3_left,raw_4_left,raw_5_left,raw_6_left,raw_7_left,raw_8_left,...,raw_11_right,raw_12_right,raw_13_right,raw_14_right,acc_x_right,acc_y_right,acc_z_right,gyro_x_right,gyro_y_right,gyro_z_right
0,7,2024-07-12 08:10:48.000,14,24,0,19,2,20,0,0,...,1,-1,2,4,-3261,1662,-1957,78,151,-67
1,7,2024-07-12 08:10:48.010,14,24,0,19,2,20,0,0,...,1,-1,2,4,-3261,1662,-1957,78,151,-67
2,7,2024-07-12 08:10:48.020,13,24,1,18,2,19,0,1,...,-2,0,3,4,-3234,1693,-1984,79,131,-61
3,7,2024-07-12 08:10:48.030,13,24,1,18,2,19,0,1,...,-2,0,3,4,-3234,1693,-1984,79,131,-61
4,7,2024-07-12 08:10:48.040,12,25,0,18,2,21,-1,0,...,1,-2,3,4,-3212,1707,-1986,82,122,-52
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
243189,7,2024-07-12 08:51:19.890,5,10,4,7,2,7,-5,4,...,1,3,3,0,-3649,1089,-1612,-12,-7,17
243190,7,2024-07-12 08:51:19.900,6,9,4,8,3,8,-4,4,...,1,3,3,0,-3649,1089,-1612,-12,-7,17
243191,7,2024-07-12 08:51:19.910,6,9,4,8,3,8,-4,4,...,1,3,3,0,-3649,1089,-1612,-12,-7,17
243192,7,2024-07-12 08:51:19.920,6,9,4,8,3,8,-4,4,...,1,3,3,0,-3649,1089,-1612,-12,-7,17


In [15]:
### join dataframes if applicable
# df = pd.concat([df2, df3], ignore_index=True)

In [38]:
# refer to participants list for htxid to participant id
df1.to_csv('S{}_20240712_rawData_{}.csv'.format(df1['session'][0], participants['id'][7]), index=False)