In [14]:

%matplotlib inline
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from scipy import stats
warnings.filterwarnings('ignore')
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import glob
from keras.models import Sequential
from keras.layers import LSTM, Dense, Flatten, Dropout
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D

### Preparing Data

In [None]:
respeck_filepaths = glob.glob("../Respeck/*")
df1 = pd.DataFrame()
for rfp in respeck_filepaths:
    files = glob.glob(f"{rfp}/*")
    
    for file in files:
        [main_act,sub_act] = file.split(".csv")[0].split('_')[-2:]
        # main_activity = file.split(".csv")[0].split('_')[-2]
        
        df = pd.read_csv(file,index_col=0)
        df['activity'] = main_act
        df['sub_activity'] = sub_act
        df['user'] = rfp.split('\\')[-1]
        # print(df)
        df1 = df1.append(df)

df1 = df1[df1['sub_activity'] == 'breathingNormal']     
df1.loc[df1['activity'].isin(('sitting', 'standing')),'activity'] = 'sitting_standing'

In [None]:
columns = ['user','activity','timestamp', 'accel_x', 'accel_y', 'accel_z']
# df1 = df1[columns]
df_har = df1[columns]
# removing null values
df_har = df_har.dropna()
df_har.shape
# transforming the user to float
df_har['user'] = df_har['user'].str.replace('s', '')
df_har['user'] = df_har['user'].apply(lambda x:int(x))


In [None]:
df_har.to_csv('task1.csv',index=False)

In [6]:
# ONLY RUN THIS AFTER CSV GENERATION
general_act_df = pd.read_csv('task1.csv')
general_act_df

Unnamed: 0,user,activity,timestamp,accel_x,accel_y,accel_z
0,1,ascending,1697605965,0.011963,-0.855774,-0.029846
1,1,ascending,1697606005,-0.001709,-0.826233,-0.036194
2,1,ascending,1697606045,-0.058838,-0.933899,-0.032532
3,1,ascending,1697606085,-0.002441,-1.115051,-0.028870
4,1,ascending,1697606125,-0.036621,-1.035217,-0.076477
...,...,...,...,...,...,...
639844,98,sitting_standing,1697636165,-0.057617,-0.985901,0.050232
639845,98,sitting_standing,1697636205,-0.061523,-0.980042,0.061951
639846,98,sitting_standing,1697636245,-0.067627,-1.000793,0.043396
639847,98,sitting_standing,1697636285,-0.057617,-0.976379,0.068054


### Segment Generation

In [7]:
random_seed = 42   
n_time_steps = 50 
n_features = 3 
step = 10
n_epochs = 20      
batch_size = 32

In [8]:
def segments_overlap(data):
    segments = []
    labels = []

    for i in range(0,  data.shape[0]- n_time_steps, step):  

        xs = data['accel_x'].values[i: i + n_time_steps]

        ys = data['accel_y'].values[i: i + n_time_steps]

        zs = data['accel_z'].values[i: i + n_time_steps]

        label = stats.mode(data['activity'][i: i + 50])[0][0]

        segments.append([xs, ys, zs])

        labels.append(label)
        
    reshaped_segments = np.asarray(segments, dtype= np.float32).reshape(-1, n_time_steps, n_features)
    labels = np.asarray(pd.get_dummies(labels), dtype = np.float32)
    
    return reshaped_segments,labels

In [9]:
def segments_no_overlap(data):
    segments= []
    labels = []
    
    activities = data['activity'].unique()
    for act in activities:
        act_data = data[data['activity'] == act]
        
        # for i in range(0, len(all_data) - n_time_steps, step):
        for i in range(0,  act_data.shape[0]- n_time_steps, step):  

            xs = act_data['accel_x'].values[i: i + n_time_steps]
            ys = act_data['accel_y'].values[i: i + n_time_steps]
            zs = act_data['accel_z'].values[i: i + n_time_steps]

            segments.append([xs, ys, zs])
            labels.append(act)

    #reshape the segments which is (list of arrays) to a list
    reshaped_segments = np.asarray(segments, dtype= np.float32).reshape(-1, n_time_steps, n_features)

    labels = np.asarray(pd.get_dummies(labels), dtype = np.float32)
    
    return reshaped_segments,labels

### Model Training

In [10]:
from sklearn.model_selection import train_test_split

def test_train_split(seg,labls):    
    X_train, X_test, y_train, y_test = train_test_split(seg, labls, test_size = 0.2, random_state = random_seed)
    return X_train, X_test, y_train, y_test

In [11]:
def model_LSTM(X_train,y_train):
    model = Sequential()
    # RNN layer
    model.add(LSTM(units = 128, input_shape = (X_train.shape[1], X_train.shape[2])))
    # Dropout layer
    model.add(Dropout(0.5)) 
    # Dense layer with ReLu
    model.add(Dense(units = 64, activation='relu'))
    # Softmax layer
    model.add(Dense(y_train.shape[1], activation = 'softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    model.summary()
    return model 

In [15]:
def model_cnn(trainX, trainy):
    n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1]
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps,n_features)))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    # fit network
    model.fit(trainX, trainy, epochs=n_epochs, batch_size=batch_size, verbose=1)
    # evaluate model
    return model

### Driver Code

In [16]:
accuracies = []
for user in general_act_df['user'].unique():
    
    train_df = general_act_df[general_act_df['user'] != user]
    test_df = general_act_df[general_act_df['user'] == user]

    X_train, y_train = segments_overlap(train_df)
    X_test, y_test = segments_overlap(test_df)
    
    # model = model_LSTM(X_train,y_train)
    # history = model.fit(X_train, y_train, epochs = n_epochs, validation_split = 0.20, batch_size = batch_size, verbose = 1)
    model  = model_cnn(X_train,y_train)
    
    loss, accuracy = model.evaluate(X_test, y_test, batch_size = batch_size, verbose = 1)
    print(f"Test Accuracy ({user}):", accuracy)
    print(f"Test Loss ({user}):", loss)
    
    accuracies.append((user,loss,accuracy))
    break
    

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test Accuracy (1): 0.9746416807174683
Test Loss (1): 0.07231275737285614
