In [10]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
from collections import OrderedDict, Counter
import os
import sys
from tqdm import tqdm
import re

data_dir = '../data/raw_data'
data_res = '../data/processed_data'

freq = 50 # Hz
window_len = 2.56 #sec
overlap = 0.5

In [12]:
def create_sliding_window(X, y, data, window_len, freq, overlap):
    def strided_axis0(a, L, overlap=1):
        S = L - overlap
        nd0 = int(((len(a)-L)//S)+1)
        m,n = a.shape
        s0,s1 = a.strides
        return np.lib.stride_tricks.as_strided(a, shape=(nd0,L,n), strides=(S*s0,s0,s1))
    res = data.loc[(data['user ID'] == user) & (data['exp ID'] == exp) & (data['ID'].between(start, end)), col]
    L = int(window_len*freq)
    over = int(window_len*freq*overlap)
    sliding = strided_axis0(res.to_numpy(),L=L,overlap=over)
    
    a,b,c = sliding.shape
    
    if (a!=0) and (b==128) and (c==6):
        X.append(sliding.tolist())
        y.append([l for y in range(sliding.shape[0])])
    return X, y

# Aggregate data from raw txt files & label

In [13]:
filepath = np.array(sorted([i for i in os.listdir(data_dir) if 'user' in i])).reshape(2,-1)
filepath = [[i,j] for i,j in zip(filepath[0,:], filepath[1,:])]

label = pd.read_csv(os.path.join(data_dir, 'labels.txt'), header=None, delim_whitespace=True)
label.columns = ['exp ID','user ID','activity ID','start','end']

df = pd.DataFrame()
X,y = [], []
col = ['accX','accY','accZ','gyroX','gyroY','gyroZ']

for path in tqdm(filepath, position=0):
    user_acc, exp_acc = path[0].split('_')[2], path[0].split('_')[1]
    user_gyro, exp_gyro = path[1].split('_')[2], path[1].split('_')[1]
    
    assert user_acc == user_gyro
    assert exp_acc == exp_gyro 
    
    user, exp = int(re.sub('\D', '', user_acc)), int(re.sub('\D', '', exp_acc))
    acc = pd.read_csv(os.path.join(data_dir, path[0]), header=None, delim_whitespace=True)
    gyro = pd.read_csv(os.path.join(data_dir, path[1]), header=None, delim_whitespace=True)
    
    data = pd.concat((acc,gyro),axis=1)
    data.columns = col
    data['ID'] = np.arange(data.shape[0])
    data['timestep'] = np.arange(data.shape[0])/50
    data['user ID'] = np.array([user for i in range(data.shape[0])])
    data['exp ID'] = np.array([exp for i in range(data.shape[0])])
    data['label'] = [-1 for i in range(data.shape[0])]
    
    for index, row in label[(label['user ID'] == user) & (label['exp ID'] == exp)].iterrows():
        user, exp, l, start, end = row['user ID'], row['exp ID'], row['activity ID'], row['start'], row['end']
        data.loc[(data['user ID'] == user) & (data['exp ID'] == exp) & (data['ID'].between(start, end)), 'label'] = l
        X, y = create_sliding_window(X, y, data, window_len, freq, overlap)
    df = pd.concat((df,data),axis=0)

df = df[['ID','timestep','user ID','exp ID','accX','accY','accZ','gyroX','gyroY','gyroZ','label']]
X, y = np.vstack(X), np.array([i for s in y for i in s])

df.head()

100%|██████████████████████████████████████████████████████████████████████████████████| 61/61 [02:52<00:00,  2.82s/it]


Unnamed: 0,ID,timestep,user ID,exp ID,accX,accY,accZ,gyroX,gyroY,gyroZ,label
0,0,0.0,1,1,0.918056,-0.1125,0.509722,-0.054978,-0.069639,-0.030849,-1
1,1,0.02,1,1,0.911111,-0.093056,0.5375,-0.012523,0.019242,-0.038485,-1
2,2,0.04,1,1,0.881944,-0.086111,0.513889,-0.023518,0.276417,0.006414,-1
3,3,0.06,1,1,0.881944,-0.086111,0.513889,-0.093462,0.367741,0.001222,-1
4,4,0.08,1,1,0.879167,-0.1,0.505556,-0.124311,0.47678,-0.022907,-1


# Save results

In [17]:
with open(os.path.join(data_res, 'X.npy'), 'wb') as f:
    np.save(f, X)
with open(os.path.join(data_res, 'y.npy'), 'wb') as f:
    np.save(f, y)
print(X.shape, y.shape)
df.to_csv(os.path.join(data_res, 'data.csv'))

(10929, 128, 6) (10929,)
