# Demo of importing dataset from csv
Aside from a basic-import, we do some sanity-checks too.

In [71]:
import pandas as pd
import os

In [72]:
library = {
    "60deg": 60,
    "minus90deg": -90,
    "0DEG": 0,
    "180DEG": -180,
    "90deg": 90,
    "minus60": -60,
}

In [73]:
base_dir = 'data/'

# Create empty dataframe
df = pd.DataFrame(data=None, columns=['subject', 'force', 'angle', 'trial', 'type', 'trial_id', 'step', 'handrail'], index=None)

for subject in os.listdir(base_dir):
    for dataset in os.listdir(os.path.join(base_dir, subject)):
        if 'with' in dataset:    # Skip empty dummy datasets
            continue

        # Read into pandas dataframe and add metadata from filename to dataframe as columns
        tmp = pd.read_csv(os.path.join(base_dir, subject, dataset), sep=';')
        tmp['subject'] = subject
        tmp['force'] = int(dataset.split('_')[0][:-1])  # Remove 'N' at the end
        tmp['angle'] = library.get(dataset.split('_')[1])   # Get numeric value from 'library', a lookup dictionary
        tmp['type'] = dataset.split('_')[2]
        tmp['trial_id'] = (subject + '_' + dataset).split('.')[0]   # Remove file extension

        # Add temporary dataframe to main dataframe
        df = pd.concat([df, tmp], ignore_index=True)

## Some stats and checks on the dataset

In [83]:
print(f"Found a total of {len(df['trial_id'].unique())} different datasets across {len(df['subject'].unique())} different subjects.")

# Ensure that when we have type=walking we have no values in the 'step' column
print(f"Sum of 'step' column when performing a 'walking' trial: {df[df['type'] == 'walking']['step'].sum()}")
# Another way to ensure we have actually the value 'nan' in every field of column 'step' when type is 'walking'
print(df[df['type'] == 'walking']['step'].isna().sum() == len(df[df['type'] == 'walking']))

Found a total of 36 different datasets across 3 different subjects.
Sum of 'step' column when performing a 'walking' trial: 0
True


In [75]:
for subject in df['subject'].unique():
    df_sub = df[df['subject'] == subject]
    print(f"\n\n|--- Subject {subject} has: " + "-"*50 + "|")
    print(f"    Directions: {df_sub['angle'].unique()}      Forces: {df_sub['force'].unique()}      Total trials: {len(df_sub)}\n")
    # print(f"    Forces: {df_sub['force'].unique()}")
    # print(f"    Total trials: {len(df_sub)}")

    # Statistics
    print(f"    Walking trials ({len(df_sub[df_sub['type']=='walking'])}):")
    print(f"            sum(handrail): {df_sub[df_sub['type'] == 'walking']['handrail'].sum()}      mean(handrail): {df_sub[df_sub['type'] == 'walking']['handrail'].mean()}")
    print(f"    Standing trials ({len(df_sub[df_sub['type']=='standing'])}):")
    print(f"            sum(handrail): {df_sub[df_sub['type'] == 'standing']['handrail'].sum()}      mean(handrail): {df_sub[df_sub['type'] == 'standing']['handrail'].mean()}")

    for force in df_sub['force'].unique():
        df_sub_force = df_sub[df_sub['force'] == force]
        print(f"    Force {force}N ({len(df_sub_force)} trials):")
        print(f"            Touched handrail:   {df_sub_force['handrail'].sum()}/{len(df_sub_force)}")
        print(f"            Steps:              {df_sub_force['step'].sum()}/{len(df_sub_force[df_sub_force['type'] == 'standing'])}")
        



|--- Subject sub02 has: --------------------------------------------------|
    Directions: [60 -90]      Forces: [100 200 50]      Total trials: 61

    Walking trials (30):
            sum(handrail): 18      mean(handrail): 0.6
    Standing trials (31):
            sum(handrail): 5      mean(handrail): 0.16129032258064516
    Force 100N (20 trials):
            Touched handrail:   5/20
            Steps:              0/10
    Force 200N (20 trials):
            Touched handrail:   15/20
            Steps:              10/10
    Force 50N (21 trials):
            Touched handrail:   3/21
            Steps:              0/11


|--- Subject sub03 has: --------------------------------------------------|
    Directions: [0 -180]      Forces: [100 200 50]      Total trials: 62

    Walking trials (31):
            sum(handrail): 0      mean(handrail): 0.0
    Standing trials (31):
            sum(handrail): 2      mean(handrail): 0.06451612903225806
    Force 100N (20 trials):
          