<div class="alert" style="background-color:#29C5F6; color:white; padding:0px 10px; border-radius:5px;">
    <h1 style='margin:15px 15px; color:#000000; font-size:32px'><b>Data Generation (Processing)</b></h1>
        <h2 style='margin:15px 15px; color:#000000; font-size:24px'>Human Activity Recognition Problem</h2>
            <div style='color:#000000'>
                <ul>
                  <li>WISDM - WIreless Sensor Data Mining</li>
                  <li>UCI HAR - Human Activity Recognition using Smartphones at UCI</li>
                  <li><b>MotionSense</b></li>
                </ul>
            </div>
</div>

The work is under the **"Master Thesis"** by **Chau Tran** with the supervision from **Prof. Roland Olsson**.

<div class="alert" style="background-color:#29C5F6; border-radius:5px; padding:0px 10px; "><h3 style='margin:15px 15px'>6_3. MotionSense</h3></div>
<div>
    <p>
        Source: <a href="url">https://www.cis.fordham.edu/wisdm/dataset.php#activityprediction</a> <br>
        Raw's format: <b>[user],[activity],[timestamp],[x-acceleration],[y-accel],[z-accel]</b> <br>
        Number of samples for non-hand-oriented activities (6 activities): <b>1,098,207</b><br>
    </p> 
    <ul>
      <li>Walking:  424,400</li>
      <li>Jogging:  342,177</li>
      <li>UpStairs: 122,869</li>
      <li>Sitting: 100,427</li>
      <li>Standing:  59,939</li>
      <li><b>LyingDown: 48,395</b></li>
    </ul> 
    <p>Fields:<br></p>
    <ul>
      <li>user: 1..36</li>
      <li>activity: {Walking, Jogging, Sitting, Standing, Up/Downstairs, <b>LyingDown</b>}</li>
      <li>timestamp: microsecond (Unix Time)</li>
      <li>x-accel: floating-point values between -20 .. 20</li>
      <li>y-accel: floating-point values between -20 .. 20</li>
      <li>z-accel: floating-point values between -20 .. 20</li>
    </ul>
    <p> The acceleration in the x direction as measured by the android phone's accelerometer. A value of 10 = 1g = 9.81 m/s^2, and 0 = no acceleration. The acceleration recorded includes gravitational acceleration toward the center of the Earth, so that when the phone is at rest on a flat surface the vertical axis will register +-10. <br></p>
</div>

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, RobustScaler
import sys

TIME_STEPS_arr = [90, 60, 50, 40]
isSTEPS_arr = [True, False]
SPLIT = 0.5

def divideData_perUser(data, per=0.5):
    train_df = pd.DataFrame()
    val_df = pd.DataFrame()
    X_df = pd.DataFrame()
    for user in np.unique(data['user']):
        dataPerUser = data[data['user']==user]
        for tag in np.unique(dataPerUser['activity']):
            dataPerActivity = dataPerUser[dataPerUser['activity']==tag]
            n = len(dataPerActivity)
            train_df = train_df.append(dataPerActivity[0:int(n*per)])
            val_df = val_df.append(dataPerActivity[int(n*per):int(n)])
            X_df = X_df.append(dataPerActivity)        
    return X_df, train_df, val_df

# Utils functions for segmenting windows
def windows(data,window_size,step):
    start = 0
    while start< data.count():
        yield int(start), int(start + window_size)
        start+= step
def segment_signal(data, window_size = 90, step=40,columns=[]):
    segments = np.empty((0,window_size,len(columns)))
    labels= np.empty((0))
    for user in np.unique(data['user']):
        userdata = data[(data.user == user)]
        for tag in np.unique(userdata['activity']):
            sub_class_data = userdata[(userdata.activity == tag)]
            for (start, end) in windows(pd.Series(sub_class_data.index.values),window_size,step):
                if end > sub_class_data.shape[0] - 1:
                    end = sub_class_data.shape[0]
                    true_length = end - start
                    remaining_data_length = window_size - true_length
                    start -= remaining_data_length
                if (sub_class_data[start:end].isnull().values.any()):
                    print(sub_class_data[start:end].isnull().sum())
                if(sub_class_data[start:end].shape[0] == window_size):
                    segments = np.vstack([segments,np.dstack([sub_class_data[column][start:end] for column in columns])])
                    labels = np.append(labels, tag) 
    return segments, labels.reshape(-1, 1)

wisdmdataset_path = '../../../../Datasets/6_wisdm/WISDM_ar_v1.1/WISDM_ar_v1.1_source'
COLUMNS = ['x_axis', 'y_axis', 'z_axis']

rdf = pd.read_csv(f'{wisdmdataset_path}/WISDM_ar_v1.1_raw.txt', header=None, names=['user', 'activity', 'timestamp', 'x_axis', 'y_axis', 'z_axis'])
rdf.z_axis.replace(regex=True, inplace=True, to_replace=r';', value=r'')
rdf['x_axis'] = rdf.x_axis.astype(np.float64)
rdf['y_axis'] = rdf.y_axis.astype(np.float64)
rdf['z_axis'] = rdf.z_axis.astype(np.float64)
rdf['timestamp'].apply(lambda x: float(x))
rdf.dropna(axis=0, how='any', inplace=True)
rdf['activity'] = LabelEncoder().fit(np.unique(rdf['activity'])).transform(rdf['activity'])

X_df, train_df, val_df = divideData_perUser(rdf, SPLIT)

for isSTEPS in isSTEPS_arr:
    for TIME_STEPS in TIME_STEPS_arr:
        STEP = int(round(TIME_STEPS/2,-1)) if isSTEPS else TIME_STEPS
        print(TIME_STEPS, STEP)

        X, y = segment_signal(X_df, window_size=TIME_STEPS, step=STEP,columns=COLUMNS)
        X_train, y_train = segment_signal(train_df, window_size=TIME_STEPS, step=STEP,columns=COLUMNS)
        X_val, y_val = segment_signal(val_df, window_size=TIME_STEPS, step=STEP,columns=COLUMNS)

        y_train = OneHotEncoder().fit_transform(y_train).toarray()
        y_val = OneHotEncoder().fit_transform(y_val).toarray()
        y =  OneHotEncoder().fit_transform(y).toarray()

        y_train = np.tile(y_train, TIME_STEPS).reshape((y_train.shape[0], TIME_STEPS, y_train.shape[1]))
        y_val   = np.tile(y_val, TIME_STEPS).reshape((y_val.shape[0], TIME_STEPS, y_val.shape[1]))
        y       = np.tile(y, TIME_STEPS).reshape((y.shape[0], TIME_STEPS, y.shape[1]))

        df_train = np.concatenate((X_train, y_train), axis=2).reshape((X_train.shape[0], -1))
        df_val = np.concatenate((X_val, y_val), axis=2).reshape((X_val.shape[0], -1))
        df = np.concatenate((X,y), axis=2).reshape((X.shape[0], -1))
        
        print(X_train.shape, y_train.shape, df_train.shape)
        print(X_val.shape, y_val.shape, df_val.shape)
        print(X.shape, y.shape, df.shape)

        with open(f"{wisdmdataset_path}/../wisdm.ni={3}.no={6}.ts={TIME_STEPS}.os={STEP}.spit={0}.all.csv",'w') as csvfile:
            np.savetxt(csvfile, np.array([[3, 6]]),fmt='%d', delimiter=",")
        with open(f"{wisdmdataset_path}/../wisdm.ni={3}.no={6}.ts={TIME_STEPS}.os={STEP}.spit={0}.all.csv",'a') as csvfile:
            np.savetxt(csvfile, df, fmt='%.4f', delimiter=",")

        with open(f"{wisdmdataset_path}/../wisdm.ni={3}.no={6}.ts={TIME_STEPS}.os={STEP}.spit={int(SPLIT*100)}.train.csv",'w') as csvfile:
            np.savetxt(csvfile, np.array([[3, 6]]),fmt='%d', delimiter=",")
        with open(f"{wisdmdataset_path}/../wisdm.ni={3}.no={6}.ts={TIME_STEPS}.os={STEP}.spit={int(SPLIT*100)}.train.csv",'a') as csvfile:
            np.savetxt(csvfile, df_train, fmt='%.4f', delimiter=",")

        with open(f"{wisdmdataset_path}/../wisdm.ni={3}.no={6}.ts={TIME_STEPS}.os={STEP}.spit={int(SPLIT*100)}.val.csv",'w') as csvfile:
            np.savetxt(csvfile, np.array([[3, 6]]),fmt='%d', delimiter=",")
        with open(f"{wisdmdataset_path}/../wisdm.ni={3}.no={6}.ts={TIME_STEPS}.os={STEP}.spit={int(SPLIT*100)}.val.csv",'a') as csvfile:
            np.savetxt(csvfile, df_val, fmt='%.4f', delimiter=",")