# Data exploration PAMAP2

In [1]:
columns = [
    'timestamp',
    'activityID',
    'heart_rate',
    # IMU - HAND
    'imu_hand-temp',
    'imu_hand-acc_x',
    'imu_hand-acc_y',
    'imu_hand-acc_z',
    '_1',
    '_2',
    '_3',
    'imu_hand-gyro_x',
    'imu_hand-gyro_y',
    'imu_hand-gyro_z',
    'imu_hand-magneto_x',
    'imu_hand-magneto_y',
    'imu_hand-magneto_z',
    '_4',
    '_5',
    '_6',
    '_26',
    # IMU - chest
    'imu_chest-temp',
    'imu_chest-acc_x',
    'imu_chest-acc_y',
    'imu_chest-acc_z',
    '_7',
    '_8',
    '_9',
    'imu_chest-gyro_x',
    'imu_chest-gyro_y',
    'imu_chest-gyro_z',
    'imu_chest-magneto_x',
    'imu_chest-magneto_y',
    'imu_chest-magneto_z',
    '_10',
    '_11',
    '_12',
    '_22',
    # IMU - ankle
    'imu_ankle-temp',
    'imu_ankle-acc_x',
    'imu_ankle-acc_y',
    'imu_ankle-acc_z',
    '_13',
    '_14',
    '_15',
    'imu_ankle-gyro_x',
    'imu_ankle-gyro_y',
    'imu_ankle-gyro_z',
    'imu_ankle-magneto_x',
    'imu_ankle-magneto_y',
    'imu_ankle-magneto_z',
    '_16',
    '_17',
    '_18',
    '_28',
]

import pandas as pd
path = '/mnt/data/PAMAP2'

def load_data_for_user(user):
    df = pd.read_csv(f'{path}/Protocol/subject10{user}.dat', sep=' ', names=columns)
    return df

df = load_data_for_user(1)
print(df.describe())
print(df.head())

           timestamp     activityID    heart_rate  imu_hand-temp  \
count  376417.000000  376417.000000  34389.000000  374963.000000   
mean     1890.460000       5.525765    124.135479      32.429895   
std      1086.623725       6.617002     24.000340       1.236379   
min         8.380000       0.000000     78.000000      30.000000   
25%       949.420000       0.000000    104.000000      31.437500   
50%      1890.460000       3.000000    125.000000      32.812500   
75%      2831.500000       7.000000    136.000000      33.500000   
max      3772.540000      24.000000    183.000000      33.875000   

       imu_hand-acc_x  imu_hand-acc_y  imu_hand-acc_z             _1  \
count   374963.000000   374963.000000   374963.000000  374963.000000   
mean        -3.349118        6.278528        3.407735      -3.290532   
std          6.365465        5.196436        3.383241       6.395511   
min       -127.649000      -81.234400      -34.124800     -56.984200   
25%         -7.840210      

In [2]:
activities = [
    1,
    2,
    3,
    4,
    5,
    6,
    7,
    9,
    10,
    11,
    12,
    13,
    16,
    17,
    18,
    19,
    20,
    24,
    0,
]


def split_data_into_activities(df):
    total_length = 0
    data_per_activity = {}
    for i in activities:
        df1 = df[df['activityID'] == i]
        data_per_activity[f'{i}'] = df1
        # print(len(df1))
        total_length = total_length + len(df1)
        # print(f'min: {df1["timestamp"].min()}')
        # print(f'max: {df1["timestamp"].max()}')

    return data_per_activity

data_per_activity = split_data_into_activities(df)
print(data_per_activity.keys())
print(data_per_activity)

dict_keys(['1', '2', '3', '4', '5', '6', '7', '9', '10', '11', '12', '13', '16', '17', '18', '19', '20', '24', '0'])
{'1':        timestamp  activityID  heart_rate  imu_hand-temp  imu_hand-acc_x  \
2928       37.66           1         NaN        30.3750         2.21530   
2929       37.67           1         NaN        30.3750         2.29196   
2930       37.68           1         NaN        30.3750         2.29090   
2931       37.69           1         NaN        30.3750         2.21800   
2932       37.70           1       100.0        30.3750         2.30106   
...          ...         ...         ...            ...             ...   
30110     309.48           1         NaN        31.9375         2.12997   
30111     309.49           1         NaN        31.9375         2.16723   
30112     309.50           1        90.0        31.9375         2.27471   
30113     309.51           1         NaN        31.9375         2.20261   
30114     309.52           1         NaN        31.9

In [16]:
def rolling_frame(df, length, hop):
    # one row equals 10ms
    start = 0
    end = len(df)

    frame_begin = start
    frame_end = start + length
    samples = []
    while frame_end < end:
        df1 = df[frame_begin:frame_end]
        samples.append(df1)
        frame_begin = frame_begin + hop
        frame_end = frame_end + hop
    return samples


print(len(rolling_frame(data_per_activity['1'], 300, 16)))

1681


In [14]:
df = data_per_activity['1']

print(df[1:3])

      timestamp  activityID  heart_rate  imu_hand-temp  imu_hand-acc_x  \
2929      37.67           1         NaN         30.375         2.29196   
2930      37.68           1         NaN         30.375         2.29090   

      imu_hand-acc_y  imu_hand-acc_z       _1       _2       _3  ...  \
2929         7.67288         5.74467  2.27373  8.14592  5.78739  ...   
2930         7.14240         5.82342  2.26966  7.66268  5.78846  ...   

      imu_ankle-gyro_x  imu_ankle-gyro_y  imu_ankle-gyro_z  \
2929          0.020882          0.000945          0.006007   
2930         -0.035392         -0.052422         -0.004882   

      imu_ankle-magneto_x  imu_ankle-magneto_y  imu_ankle-magneto_z  _16  _17  \
2929             -60.8916             -36.3197             -58.3656  1.0  0.0   
2930             -60.3407             -35.7842             -58.6119  1.0  0.0   

      _18  _28  
2929  0.0  0.0  
2930  0.0  0.0  

[2 rows x 54 columns]
