In [206]:
import numpy as np
import pandas as pd
import scipy.signal as signal
import altair as alt

from pathlib import Path
from operator import and_
from functools import reduce
from itertools import chain, groupby, product
from sklearn.preprocessing import LabelEncoder

alt.data_transformers.enable('default', max_rows=None)

DataTransformerRegistry.enable('default')

## Read Data

In [24]:
cols_names = [
    'acceleration, chest (X axis)',
    'acceleration, chest (Y axis)',
    'acceleration, chest (Z axis)',
    'ECG-1',
    'ECG-2',
    'acceleration, left-ankle (X axis)',
    'acceleration, left-ankle (Y axis)',
    'acceleration, left-ankle (Z axis)',
    'gyro, left-ankle (X axis)',
    'gyro, left-ankle (Y axis)',
    'gyro, left-ankle (Z axis)',
    'magnetometer, left-ankle (X axis)',
    'magnetometer, left-ankle (Y axis)',
    'magnetometer, left-ankle (Z axis)',
    'acceleration, right-lower-arm (X axis)',
    'acceleration, right-lower-arm (Y axis)',
    'acceleration, right-lower-arm (Z axis)',
    'gyro, right-lower-arm (X axis)',
    'gyro, right-lower-arm (Y axis)',
    'gyro, right-lower-arm (Z axis)',
    'magnetometer, right-lower-arm (X axis)',
    'magnetometer, right-lower-arm (Y axis)',
    'magnetometer, right-lower-arm (Z axis)',
    'activity'
]

In [64]:
activities_names = np.array([
    'N/A',
    'Standing still',
    'Sitting and relaxing',
    'Lying down',
    'Walking',
    'Climbing stairs',
    'Waist bends forward',
    'Frontal elevation of arms',
    'Knees bending (crouching)',
    'Cycling',
    'Jogging',
    'Running',
    'Jump front & back',
])

In [57]:
fs = 50

In [58]:
partial_dfs = []
for i in range(1, 11):
    partial_df = pd.read_table(Path('data')/ ('mHealth_subject' + str(i) + '.log'), header=None, names=cols_names)
    partial_df['timepoint'] =  np.arange(1/fs*len(partial_df), step=1/fs)[:len(partial_df)]
    partial_df['subject'] = i
    partial_dfs.append(partial_df)
    
df = pd.concat(partial_dfs)

## Data Exploration

In [54]:
df.head()

Unnamed: 0,"acceleration, chest (X axis)","acceleration, chest (Y axis)","acceleration, chest (Z axis)",ECG-1,ECG-2,"acceleration, left-ankle (X axis)","acceleration, left-ankle (Y axis)","acceleration, left-ankle (Z axis)","gyro, left-ankle (X axis)","gyro, left-ankle (Y axis)",...,"acceleration, right-lower-arm (Z axis)","gyro, right-lower-arm (X axis)","gyro, right-lower-arm (Y axis)","gyro, right-lower-arm (Z axis)","magnetometer, right-lower-arm (X axis)","magnetometer, right-lower-arm (Y axis)","magnetometer, right-lower-arm (Z axis)",activity,timepoint,subject
0,-9.8184,0.009971,0.29563,0.004186,0.004186,2.1849,-9.6967,0.63077,0.1039,-0.84053,...,0.18776,-0.44902,-1.0103,0.034483,-2.35,-1.6102,-0.030899,0,0.0,1
1,-9.8489,0.52404,0.37348,0.004186,0.016745,2.3876,-9.508,0.68389,0.085343,-0.83865,...,0.023595,-0.44902,-1.0103,0.034483,-2.1632,-0.88254,0.32657,0,0.02,1
2,-9.6602,0.18185,0.43742,0.016745,0.037677,2.4086,-9.5674,0.68113,0.085343,-0.83865,...,0.27572,-0.44902,-1.0103,0.034483,-1.6175,-0.16562,-0.030693,0,0.04,1
3,-9.6507,0.21422,0.24033,0.07954,0.11722,2.1814,-9.4301,0.55031,0.085343,-0.83865,...,0.36752,-0.45686,-1.0082,0.025862,-1.0771,0.006945,-0.38262,0,0.06,1
4,-9.703,0.30389,0.31156,0.22187,0.20513,2.4173,-9.3889,0.71098,0.085343,-0.83865,...,0.40729,-0.45686,-1.0082,0.025862,-0.53684,0.1759,-1.0955,0,0.08,1


In [67]:
df['activity'] = activities_names[df['activity']]

In [144]:
def get_activity_intervals_df(df):
    activity_intervals = []
    for i in df.groupby('subject'):
        prev = None
        timepoints = []
        activities = []

        for ind, val in enumerate(i[1]['activity']):
            if val != prev:
                prev = val
                timepoints.append(df.iloc[ind]['timepoint'])
                activities.append(val)
        timepoints.append(i[1].iloc[-1]['timepoint'])

        activity_df = pd.DataFrame({
            "start": pd.to_datetime(timepoints[:-1], unit='s'), 
            "end": pd.to_datetime(timepoints[1:], unit='s'), 
            'activity': activities,
            'subject': i[0]
        })
        activity_df['length'] = (activity_df['end'] - activity_df['start'])/ np.timedelta64(1, 's') 
        activity_intervals.append(activity_df)
    return pd.concat(activity_intervals)

In [145]:
activity_length_df = get_activity_intervals_df(df)

In [146]:
alt.Chart(activity_length_df).mark_rule().encode(
    y = alt.Y('activity:N'),
    x = alt.X('start:T', axis=alt.Axis(title='time, mm:ss', format =('%M:%S'))),
    x2 = alt.X2('end:T')
).properties(
    width = 800/2,
    height= 300
).facet(
    column='subject:Q', 
    columns=2
)

We can see, that even though we nearly 1 hour of recording, we have nearly 12 minutes of usefull signals

In [147]:
alt.Chart(activity_length_df[activity_length_df['activity'] != activities_names[0]]).encode(
    x = 'length:Q',
    y = alt.Y('activity:N', title='length, s'),
).mark_bar().facet(column='subject:N', columns=5)

Jump front & back is much shorter, then previous ones (20s vs 1m), so I decided to drop it. Also, subject #7 has much shorter Climbing stairs activity (only 15 seconds), so we can drop it to use larger window size.

In [195]:
def get_signal(subject, activity):
    def get_interval():
        tmp_df = activity_length_df[activity_length_df['subject'] == subject]
        res = tmp_df[tmp_df['activity'] == activity]
        return res['start'].values[0], res['end'].values[0]
    
    st, end = get_interval()
    to_seconds = lambda x: x.astype('float')/10**9
    get_index = lambda x: np.argmin(np.abs(df['timepoint'].values - x))
    st, end = to_seconds(st), to_seconds(end)
    
    return df.iloc[get_index(st): get_index(end)]

In [203]:
def plot_ecg(sig,lead=1):
    return alt.Chart(sig).mark_line().encode(
        x = alt.X('timepoint:Q', axis=alt.Axis(labels=True), title='time, s'),
        y = alt.Y('ECG-'+str(lead)+':Q', title='Voltage, mV'),
    )

In [None]:
reduce(and_, [plot_ecg(get_signal(1, activity)).properties(title=activity) for activity in activities_names[1:]])

1761.0