<div class="alert" style="background-color:#29C5F6; color:white; padding:0px 10px; border-radius:5px;">
    <h1 style='margin:15px 15px; color:#000000; font-size:32px'><b>Data Generation (Processing)</b></h1>
        <h2 style='margin:15px 15px; color:#000000; font-size:24px'>Human Activity Recognition Problem</h2>
            <div style='color:#000000'>
                <ul>
                  <li>WISDM - WIreless Sensor Data Mining</li>
                  <li>UCI HAR - Human Activity Recognition using Smartphones at UCI</li>
                  <li><b>MotionSense</b></li>
                </ul>
            </div>
</div>

The work is under the **"Master Thesis"** by **Chau Tran** with the supervision from **Prof. Roland Olsson**.

<div class="alert" style="background-color:#29C5F6; border-radius:5px; padding:0px 10px; "><h3 style='margin:15px 15px'>6_3. MotionSense</h3></div>
<div>
    <p>
        Source1: <a href="url">https://github.com/mmalekzadeh/motion-sense</a> <br>
        Source2: <a href="url">https://www.kaggle.com/malekzadeh/motionsense-dataset</a> <br>
    </p>
    <p> Folder's format: </p>
    <ul>
        <li>A_DeviceMotion_data:  All accelerometer and gyroscope data for all 15 trials</li>
        <li>B_Accelerometer_data: Only accelerometer data for all 15 trials</li>
        <li>C_Gyroscope_data:     Only gyroscope for all 15 trials</li>
    </ul>
    <p>
        Raw's format: <b>[user],[activity],[x-acceleration],[y-accel],[z-accel]</b> <br>
        Number of samples for non-hand-oriented activities (6 activities): <b>???</b><br>
    </p> 
    <ul>
      <li>Walking - wlk:      ???</li>
      <li>Jogging - jog:      ???</li>
      <li>UpStairs - ups:     ???</li>
      <li>Sitting - sit:      ???</li>
      <li>Standing - std:     ???</li>
      <li>DownStairs - dws:   ???</li>
    </ul> 
    <p>Fields:<br></p>
    <ul>
      <li>user: 1..24</li>
      <li>activity: {Walking, Jogging, Sitting, Standing, Upstairs, <b>Downstairs</b>}</li>
      <li>timestamp: microsecond (Unix Time)</li>
      <li>x-accel: floating-point values between -20 .. 20</li>
      <li>y-accel: floating-point values between -20 .. 20</li>
      <li>z-accel: floating-point values between -20 .. 20</li>
    </ul>
    <p> The acceleration in the x direction as measured by the android phone's accelerometer. A value of 10 = 1g = 9.81 m/s^2, and 0 = no acceleration. The acceleration recorded includes gravitational acceleration toward the center of the Earth, so that when the phone is at rest on a flat surface the vertical axis will register +-10. <br></p>
</div>

In [19]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, RobustScaler
import sys, os

TIME_STEPS_arr = [90, 60, 50, 40]
isSTEPS_arr = [True, False]
SPLIT = 0.5
NO_IN, NO_OUT = 12, 6

def divideData_perUser(data, per=0.5):
    train_df = pd.DataFrame()
    val_df = pd.DataFrame()
    X_df = pd.DataFrame()
    for user in np.unique(data['user']):
        dataPerUser = data[data['user']==user]
        for tag in np.unique(dataPerUser['activity']):
            dataPerActivity = dataPerUser[dataPerUser['activity']==tag]
            n = len(dataPerActivity)
            train_df = train_df.append(dataPerActivity[0:int(n*per)])
            val_df = val_df.append(dataPerActivity[int(n*per):int(n)])
            X_df = X_df.append(dataPerActivity)        
    return X_df, train_df, val_df

# Utils functions for segmenting windows
def windows(data,window_size,step):
    start = 0
    while start< data.count():
        yield int(start), int(start + window_size)
        start+= step
def segment_signal(data, window_size = 90, step=40, columns=[]):
    segments = np.empty((0,window_size,len(columns)))
    labels= np.empty((0))
    for user in np.unique(data['user']):
        userdata = data[(data.user == user)]
        for tag in np.unique(userdata['activity']):
            sub_class_data = userdata[(userdata.activity == tag)]
            for (start, end) in windows(pd.Series(sub_class_data.index.values),window_size,step):
                if end > sub_class_data.shape[0] - 1:
                    end = sub_class_data.shape[0]
                    true_length = end - start
                    remaining_data_length = window_size - true_length
                    start -= remaining_data_length
                if (sub_class_data[start:end].isnull().values.any()):
                    print(sub_class_data[start:end].isnull().sum())
                if(sub_class_data[start:end].shape[0] == window_size):
                    segments = np.vstack([segments,np.dstack([sub_class_data[column][start:end] for column in columns])])
                    labels = np.append(labels, tag)
    return segments, labels.reshape(-1, 1)

motionsense_path = '../../../../Datasets/6_har/2_MotionSense/1_data'
motionsense_phone_path = f'{motionsense_path}/A_DeviceMotion_data'
os.mkdir(f"{motionsense_path}/A_DeviceMotion_data_processed/motionsense_w_overlap/") if os.path.isdir(f"{motionsense_path}/A_DeviceMotion_data_processed/motionsense_w_overlap/") == False else None
os.mkdir(f"{motionsense_path}/A_DeviceMotion_data_processed/motionsense_wt_overlap/") if os.path.isdir(f"{motionsense_path}/A_DeviceMotion_data_processed/motionsense_wt_overlap/") == False  else N

motionsense_phone_data = pd.DataFrame()
motionsense_phone_subpaths = os.listdir(motionsense_phone_path)
for motionsense_phone_subpath in motionsense_phone_subpaths:
    activity = motionsense_phone_subpath.split('_')[0]
    for user_data_path in os.listdir(f'{motionsense_phone_path}/{motionsense_phone_subpath}'):
        user_id = user_data_path.split('.')[0].split('_')[-1]
        user_data = pd.read_csv(f'{motionsense_phone_path}/{motionsense_phone_subpath}/{user_data_path}', header=0, index_col=0)
        user_data.insert(0, 'user', user_id)
    user_data.insert(1, 'activity', activity)
    motionsense_phone_data = motionsense_phone_data.append(user_data, ignore_index=True)

motionsense_phone_data.dropna(axis=0, how='any', inplace=True)
mapping_dict = {'wlk': 1,'jog': 2, 'sit': 3, 'std': 4, 'ups': 5, 'dws': 5, 'LyingDown': 6}
motionsense_phone_data['activity'] = motionsense_phone_data.activity.map(mapping_dict)

COLUMNS = list(motionsense_phone_data.columns)
COLUMNS.remove('user')
COLUMNS.remove('activity')
X_df, train_df, val_df = divideData_perUser(motionsense_phone_data, SPLIT)

for isSTEPS in isSTEPS_arr:
    for TIME_STEPS in TIME_STEPS_arr:
        STEP = int(round(TIME_STEPS/2,-1)) if isSTEPS else TIME_STEPS
        print(TIME_STEPS, STEP)

        X, y = segment_signal(X_df, window_size=TIME_STEPS, step=STEP,columns=COLUMNS)
        X_train, y_train = segment_signal(train_df, window_size=TIME_STEPS, step=STEP,columns=COLUMNS)
        X_val, y_val = segment_signal(val_df, window_size=TIME_STEPS, step=STEP,columns=COLUMNS)

        enc = OneHotEncoder().fit(np.array(list(mapping_dict.values())).reshape(-1,1))
        y_train = enc.transform(y_train).toarray()
        y_val   = enc.transform(y_val).toarray()
        y       = enc.transform(y).toarray()

        y_train = np.tile(y_train, TIME_STEPS).reshape((y_train.shape[0], TIME_STEPS, y_train.shape[1]))
        y_val   = np.tile(y_val, TIME_STEPS).reshape((y_val.shape[0], TIME_STEPS, y_val.shape[1]))
        y       = np.tile(y, TIME_STEPS).reshape((y.shape[0], TIME_STEPS, y.shape[1]))

        df_train = np.concatenate((X_train, y_train), axis=2).reshape((X_train.shape[0], -1))
        df_val = np.concatenate((X_val, y_val), axis=2).reshape((X_val.shape[0], -1))
        df = np.concatenate((X,y), axis=2).reshape((X.shape[0], -1))
        
        print(X_train.shape, y_train.shape, df_train.shape)
        print(X_val.shape, y_val.shape, df_val.shape)
        print(X.shape, y.shape, df.shape)

        motionsense_phone_result_path = f"{motionsense_path}/A_DeviceMotion_data_processed/motionsense_wt_overlap/" if TIME_STEPS==STEP else f"{motionsense_path}/A_DeviceMotion_data_processed/motionsense_w_overlap/"
        
        with open(fr"{motionsense_phone_result_path}/motionsense.ni={NO_IN}.no={NO_OUT}.ts={TIME_STEPS}.os={STEP}.spit={0}.all.csv",'w') as csvfile:
            np.savetxt(csvfile, np.array([[NO_IN, NO_OUT]]),fmt='%d', delimiter=",")
        with open(fr"{motionsense_phone_result_path}/motionsense.ni={NO_IN}.no={NO_OUT}.ts={TIME_STEPS}.os={STEP}.spit={0}.all.csv",'a') as csvfile:
            np.savetxt(csvfile, df, fmt='%.4f', delimiter=",")

        with open(fr"{motionsense_phone_result_path}/motionsense.ni={NO_IN}.no={NO_OUT}.ts={TIME_STEPS}.os={STEP}.spit={int(SPLIT*100)}.train.csv",'w') as csvfile:
            np.savetxt(csvfile, np.array([[NO_IN, NO_OUT]]),fmt='%d', delimiter=",")
        with open(fr"{motionsense_phone_result_path}/ucihar.ni={NO_IN}.no={NO_OUT}.ts={TIME_STEPS}.os={STEP}.spit={int(SPLIT*100)}.train.csv",'a') as csvfile:
            np.savetxt(csvfile, df_train, fmt='%.4f', delimiter=",")

        with open(fr"{motionsense_phone_result_path}/motionsense.ni={NO_IN}.no={NO_OUT}.ts={TIME_STEPS}.os={STEP}.spit={int(SPLIT*100)}.val.csv",'w') as csvfile:
            np.savetxt(csvfile, np.array([[NO_IN, NO_OUT]]),fmt='%d', delimiter=",")
        with open(fr"{motionsense_phone_result_path}/motionsense.ni={NO_IN}.no={NO_OUT}.ts={TIME_STEPS}.os={STEP}.spit={int(SPLIT*100)}.val.csv",'a') as csvfile:
            np.savetxt(csvfile, df_val, fmt='%.4f', delimiter=",")

Unnamed: 0,user,activity,attitude.roll,attitude.pitch,attitude.yaw,gravity.x,gravity.y,gravity.z,rotationRate.x,rotationRate.y,rotationRate.z,userAcceleration.x,userAcceleration.y,userAcceleration.z
0,9,dws,-0.678384,-1.421411,-0.311412,-0.093381,0.988863,-0.115859,-0.191071,2.035295,-0.258152,1.469968,1.067671,-0.357377
1,9,dws,-0.575516,-1.423473,-0.233990,-0.079881,0.989168,-0.123124,0.521395,-0.011679,0.599752,0.026322,0.183898,0.447267
2,9,dws,-0.543335,-1.422046,-0.182984,-0.076608,0.988957,-0.126840,0.055017,-1.245934,0.105626,-0.044059,0.069148,0.199380
3,9,dws,-0.574607,-1.422407,-0.192469,-0.080342,0.989011,-0.124083,-0.103405,-0.865276,-0.111749,-0.116756,-0.054013,-0.037539
4,9,dws,-0.613945,-1.422876,-0.222563,-0.084892,0.989080,-0.120448,-0.198397,-0.256292,-0.207197,-0.174278,-0.123067,0.220179
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57131,9,wlk,0.890518,-1.300928,0.089878,0.207249,0.963806,-0.167690,-0.920226,0.406762,-0.383194,0.044079,-0.204574,0.204906
57132,9,wlk,0.929499,-1.319499,0.124063,0.199247,0.968591,-0.148750,-0.923594,0.231559,-0.521862,0.066348,-0.222604,0.225136
57133,9,wlk,0.966103,-1.340284,0.159858,0.187951,0.973550,-0.129883,-0.970054,0.003083,-0.621238,-0.005959,-0.054040,0.240632
57134,9,wlk,1.005158,-1.362495,0.201540,0.174576,0.978384,-0.110827,-1.005371,-0.108297,-0.708937,-0.007767,-0.003927,0.194735


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
