Goal: To integrate multiple files to a single file
---
+ Single file columns -> SubjectID, Device, Activity, Acc, Gyr  
+ Dataset -> FallAllD, SisFall, UMAFall

In [1]:
import numpy as np
import pandas as pd

from pathlib import Path

### Preprocessing - FallAllD

In [2]:
# Read dataset
df_fallAllD = pd.read_pickle(Path.cwd().joinpath('raw', 'FallAllD.pkl'))

df_fallAllD['Activity'] = np.where(df_fallAllD['ActivityID'] >= 100, 'Fall', 'ADL')       # ID > 100 is Fall, < 100 is NonFall（ADL）
df_fallAllD = df_fallAllD[['SubjectID', 'Device', 'Activity', 'Acc', 'Gyr']]
pd.to_pickle(df_fallAllD, Path.cwd().joinpath('processed', 'FallAllD-Preliminary.pkl'))

### Preprocessing - UMAFall

In [3]:
UMA_dataFolder = Path.cwd().joinpath('raw', 'UMAFall', 'UMAFall_Dataset')
UMA_dataFiles = list(UMA_dataFolder.glob('*.csv'))

In [4]:
list_sugjectID = []
list_device = []
list_activity = []
list_acc = []
list_gyr = []

sensor_ID = {
    0: 'Right Pocket',
    1: 'Chest',
    2: 'Waist',
    3: 'Wrist',
    4: 'Ankle'
}

sensor_type = {
    0: 'Accelerometer',
    1: 'Gyroscope'
}

for dataFile in UMA_dataFiles:
    df = pd.read_csv(dataFile, header=0, skiprows=40, sep=';')
    df = df.iloc[: , :-1]
    df.columns = ['Time', 'Sample No', 'X-Axis', 'Y-Axis', 'Z-Axis', 'Sensor Type', 'Sensor ID']
    
    fileName = str(dataFile.stem)
    # UMAFall_Subject_01_ADL_Aplausing_1_2017-04-14_23-38-23
    subjectID = fileName.split('_')[2]
    activity = fileName.split('_')[3]
    for id in sensor_ID:
        device = sensor_ID[id]
        acc = df.loc[(df['Sensor Type'] == 0) & (df['Sensor ID'] == id), ['X-Axis', 'Y-Axis', 'Z-Axis']].to_numpy()
        gyr = df.loc[(df['Sensor Type'] == 1) & (df['Sensor ID'] == id), ['X-Axis', 'Y-Axis', 'Z-Axis']].to_numpy()

        # append to data list
        list_sugjectID.append(np.uint8(subjectID))
        list_activity.append(activity)
        list_device.append(device)
        list_acc.append(acc)
        list_gyr.append(gyr)

df_UMAFall = pd.DataFrame(list(zip(list_sugjectID,list_device,list_activity,list_acc,list_gyr)), columns=['SubjectID','Device','Activity','Acc','Gyr'])
pd.to_pickle(df_UMAFall, Path.cwd().joinpath('processed', 'UMAFall-Preliminary.pkl'))

### Preprocessing - SisFall

In [5]:
Sis_dataFolder = Path.cwd().joinpath('raw', 'SisFall', 'SisFall_Dataset')
Sis_dataFolderName = [f"SA{i:02d}" for i in range(1, 24)]

In [6]:
list_sugjectID = []
list_device = []
list_activity = []
list_acc = []
list_gyr = []

In [7]:
for i, folderName in enumerate(Sis_dataFolderName):
    Sis_dataFiles = list(Sis_dataFolder.joinpath(folderName).glob('*.txt'))
    for dataFile in Sis_dataFiles:
        # read data
        df = pd.read_csv(dataFile, sep=',', header=None)
        # remove the last column symbol ';'
        df.iloc[:, -1] = df.iloc[:, -1].apply(lambda x: x.split(';')[0])
        # ADXL345 -> 0, 1, 2 (Acc)
        # TIG3200 -> 6, 7, 8 (Gyr)
        acc = df.iloc[:, [0, 1, 2]].to_numpy()
        gyr = df.iloc[:, [6, 7, 8]].to_numpy()

        # check ADL or Fall
        if dataFile.stem[0] == 'D':
            activity = 'ADL'
        elif dataFile.stem[0] == 'F':
            activity = 'Fall'

        # append to data list
        list_sugjectID.append(np.uint8(i+1))
        list_device.append('Waist')
        list_activity.append(activity)
        list_acc.append(acc)
        list_gyr.append(gyr)

df_SisFall = pd.DataFrame(list(zip(list_sugjectID,list_device,list_activity,list_acc,list_gyr)), columns=['SubjectID','Device','Activity','Acc','Gyr'])
pd.to_pickle(df_SisFall, Path.cwd().joinpath('processed', 'SisFall-Preliminary.pkl'))