In [147]:

%matplotlib inline
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from scipy import stats
warnings.filterwarnings('ignore')
import os
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
import glob
from keras.models import Sequential
from keras.layers import LSTM, Dense, Flatten, Dropout, Conv2D  ,MaxPooling2D
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.optimizers import Adam
import tensorflow as tf


### Preparing Data

In [16]:
respeck_filepaths = glob.glob("../Respeck/*")
df1 = pd.DataFrame()
for rfp in respeck_filepaths:
    files = glob.glob(f"{rfp}/*")
    
    for file in files:
        # [main_act,sub_act] = file.split(".csv")[0].split('_')[-2:]
        main_activity = " ".join(file.split(".csv")[0].split('_')[-2:])
        
        df = pd.read_csv(file,index_col=0)
        df['activity'] = main_activity
        df['user'] = rfp.split('\\')[-1]
        # print(df)
        df1 = df1.append(df)


In [17]:
df1['activity'] = df1['activity'].apply(lambda x: x.replace('standing','sitting/standing'))
df1['activity'] = df1['activity'].apply(lambda x: x.replace('sitting ','sitting/standing '))
df1

Unnamed: 0,timestamp,accel_x,accel_y,accel_z,gyro_x,gyro_y,gyro_z,activity,user
0,1697605965,0.011963,-0.855774,-0.029846,-9.625000,-0.859375,-5.656250,ascending breathingNormal,s1
1,1697606005,-0.001709,-0.826233,-0.036194,0.359375,4.953125,-1.656250,ascending breathingNormal,s1
2,1697606045,-0.058838,-0.933899,-0.032532,3.437500,9.406250,-2.031250,ascending breathingNormal,s1
3,1697606085,-0.002441,-1.115051,-0.028870,3.078125,6.921875,-5.984375,ascending breathingNormal,s1
4,1697606125,-0.036621,-1.035217,-0.076477,4.328125,8.125000,0.625000,ascending breathingNormal,s1
...,...,...,...,...,...,...,...,...,...
755,1697636165,-0.206299,-0.975891,0.181335,0.859375,-1.125000,-0.625000,sitting/standing talking,s98
756,1697636205,-0.196289,-0.944153,0.163513,0.812500,0.390625,0.203125,sitting/standing talking,s98
757,1697636245,-0.194824,-0.959290,0.168640,1.390625,0.421875,0.312500,sitting/standing talking,s98
758,1697636285,-0.191406,-0.976868,0.154480,0.546875,0.859375,-0.406250,sitting/standing talking,s98


In [18]:
columns = ['user','activity','timestamp', 'accel_x', 'accel_y', 'accel_z']
# df1 = df1[columns]
df_har = df1[columns]
# removing null values
df_har = df_har.dropna()
df_har.shape
# transforming the user to float
df_har['user'] = df_har['user'].str.replace('s', '')
df_har['user'] = df_har['user'].apply(lambda x:int(x))


In [13]:
classes = ['lyingBack breathingNormal', 'lyingBack coughing',
       'lyingBack hyperventilating', 'lyingBack laughing',
       'lyingBack singing', 'lyingBack talking',
       'lyingLeft breathingNormal', 'lyingLeft coughing',
       'lyingLeft hyperventilating', 'lyingLeft laughing',
       'lyingLeft singing', 'lyingLeft talking',
       'lyingRight breathingNormal', 'lyingRight coughing',
       'lyingRight hyperventilating', 'lyingRight laughing',
       'lyingRight singing', 'lyingRight talking',
       'lyingStomach breathingNormal', 'lyingStomach coughing',
       'lyingStomach hyperventilating', 'lyingStomach laughing',
       'lyingStomach singing', 'lyingStomach talking',
       'sitting/standing breathingNormal', 'sitting/standing coughing',
       'sitting/standing eating', 'sitting/standing hyperventilating',
       'sitting/standing laughing', 'sitting/standing singing',
       'sitting/standing talking']


df_har = df_har[df_har['activity'].isin(classes)] 

In [26]:
df_har.to_csv('task3.csv',index=False)

In [115]:
# ONLY RUN THIS AFTER CSV GENERATION
all_df = pd.read_csv('task3.csv')
all_df

Unnamed: 0,user,activity,timestamp,accel_x,accel_y,accel_z
0,1,lyingBack breathingNormal,1697605965,-0.596436,-0.181213,0.870056
1,1,lyingBack breathingNormal,1697606005,-0.593994,-0.189026,0.880310
2,1,lyingBack breathingNormal,1697606045,-0.591064,-0.172668,0.883484
3,1,lyingBack breathingNormal,1697606085,-0.580322,-0.176819,0.875671
4,1,lyingBack breathingNormal,1697606125,-0.598145,-0.189758,0.882263
...,...,...,...,...,...,...
2031239,98,sitting/standing talking,1697636165,-0.206299,-0.975891,0.181335
2031240,98,sitting/standing talking,1697636205,-0.196289,-0.944153,0.163513
2031241,98,sitting/standing talking,1697636245,-0.194824,-0.959290,0.168640
2031242,98,sitting/standing talking,1697636285,-0.191406,-0.976868,0.154480


### Segment Generation

In [178]:
random_seed = 42   
n_time_steps = 50 
n_features = 3 
step = 5
n_epochs = 20      
batch_size = 32

In [179]:
def segments_overlap(data):
    segments = []
    labels = []

    for i in range(0,  all_df.shape[0]- n_time_steps, step):  

        xs = all_df['accel_x'].values[i: i + n_time_steps]

        ys = all_df['accel_y'].values[i: i + n_time_steps]

        zs = all_df['accel_z'].values[i: i + n_time_steps]

        label = stats.mode(all_df['activity'][i: i + n_time_steps])[0][0]

        segments.append([xs, ys, zs])

        labels.append(label)
        
    reshaped_segments = np.asarray(segments, dtype= np.float32).reshape(-1, n_time_steps, n_features)
    labels = np.asarray(labels).reshape(-1,1)

    enc = OneHotEncoder(handle_unknown='ignore').fit(labels)
    labels = enc.transform(labels).toarray()
    # labels = np.asarray(pd.get_dummies(labels), dtype = np.float32)
    # print(enc.categories_)
    return reshaped_segments,labels,enc.categories_

In [180]:
def segments_no_overlap(data):
    segments= []
    labels = []
    
    activities = data['activity'].unique()
    for act in activities:
        act_data = data[data['activity'] == act]
        
        # for i in range(0, len(all_data) - n_time_steps, step):
        for i in range(0,  act_data.shape[0]- n_time_steps, step):  

            xs = act_data['accel_x'].values[i: i + n_time_steps]
            ys = act_data['accel_y'].values[i: i + n_time_steps]
            zs = act_data['accel_z'].values[i: i + n_time_steps]

            segments.append([xs, ys, zs])
            labels.append(act)

    #reshape the segments which is (list of arrays) to a list
    reshaped_segments = np.asarray(segments, dtype= np.float32).reshape(-1, n_time_steps, n_features)
    labels_dummies = np.asarray(pd.get_dummies(labels), dtype = np.float32)
    
    return reshaped_segments,labels

### Model Training

In [181]:
from sklearn.model_selection import train_test_split

def test_train_split(seg,labls):    
    X_train, X_test, y_train, y_test = train_test_split(seg, labls, test_size = 0.2, random_state = random_seed)
    return X_train, X_test, y_train, y_test

In [182]:
def model_LSTM(X_train,y_train):
    model = Sequential()
    # RNN layer
    model.add(LSTM(units = 128, input_shape = (X_train.shape[1], X_train.shape[2])))
    # Dropout layer
    model.add(Dropout(0.5)) 
    # Dense layer with ReLu
    model.add(Dense(units = 64, activation='relu'))
    # Softmax layer
    model.add(Dense(y_train.shape[1], activation = 'softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    model.summary()
    return model 

In [183]:
def model_cnn(trainX, trainy):
    n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1]
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps,n_features)))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    # fit network
    model.fit(trainX, trainy, epochs=n_epochs, batch_size=batch_size, verbose=1)
    # evaluate model
    return model

In [184]:
def model_cnn2(trainX, trainy):
    n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1]
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_time_steps,n_features)))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    # fit network
    model.fit(trainX, trainy, epochs=20, batch_size=128, verbose=1)
    # evaluate model
    return model

In [185]:
def model_cnn3(trainX,trainy):
    model = Sequential()
    model.add(Conv2D(16, (2, 2), activation = 'relu', input_shape = (n_time_steps,n_features)))
    model.add(Dropout(0.1))

    model.add(Conv2D(32, (2, 2), activation='relu'))
    model.add(Dropout(0.2))

    model.add(Flatten())

    model.add(Dense(64, activation = 'relu'))
    model.add(Dropout(0.5))

    model.add(Dense(44, activation='softmax'))
    
    model.compile(optimizer=Adam(learning_rate = 0.001), loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
    model.fit(trainX, trainy, epochs =10, batch_size=32, verbose=1)
    return model

### Driver Code

In [186]:
accuracies = []
for user in all_df['user'].unique():
    
    train_df = all_df[all_df['user'] != user]
    test_df = all_df[all_df['user'] == user]

    X_train, y_train, categories = segments_overlap(train_df)
    X_test, y_test, _ = segments_overlap(test_df)
    
    # model = model_LSTM(X_train,y_train)
    # history = model.fit(X_train, y_train, epochs = n_epochs, validation_split = 0.20, batch_size = batch_size, verbose = 1)
    
    # model = model_cnn2(X_train,y_train)
    
    # loss, accuracy = model.evaluate(X_test, y_test, batch_size = batch_size, verbose = 1)
    # print(f"Test Accuracy ({user}):", accuracy)
    # print(f"Test Loss ({user}):", loss)
    # accuracies.append((user,loss,accuracy))
    
    model = model_cnn(X_train,y_train)
    
    loss, accuracy = model.evaluate(X_test, y_test, batch_size = batch_size, verbose = 1)
    print(f"Test Accuracy ({user}):", accuracy)
    print(f"Test Loss ({user}):", loss)

    accuracies.append((user,loss,accuracy))
    
    break
    

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test Accuracy (1): 0.7644686698913574
Test Loss (1): 0.6262620091438293


In [187]:
categories

[array(['lyingBack breathingNormal', 'lyingBack coughing',
        'lyingBack hyperventilating', 'lyingBack laughing',
        'lyingBack singing', 'lyingBack talking',
        'lyingLeft breathingNormal', 'lyingLeft coughing',
        'lyingLeft hyperventilating', 'lyingLeft laughing',
        'lyingLeft singing', 'lyingLeft talking',
        'lyingRight breathingNormal', 'lyingRight coughing',
        'lyingRight hyperventilating', 'lyingRight laughing',
        'lyingRight singing', 'lyingRight talking',
        'lyingStomach breathingNormal', 'lyingStomach coughing',
        'lyingStomach hyperventilating', 'lyingStomach laughing',
        'lyingStomach singing', 'lyingStomach talking',
        'sitting/standing breathingNormal', 'sitting/standing coughing',
        'sitting/standing eating', 'sitting/standing hyperventilating',
        'sitting/standing laughing', 'sitting/standing singing',
        'sitting/standing talking'], dtype='<U33')]

In [188]:
# Convert the model.
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the model.
with open(f'../models/cnn_model_task{3}_{n_time_steps}_{n_features}.tflite', 'wb') as f:
  f.write(tflite_model)



INFO:tensorflow:Assets written to: C:\Users\chhal\AppData\Local\Temp\tmpha2lg5ic\assets


INFO:tensorflow:Assets written to: C:\Users\chhal\AppData\Local\Temp\tmpha2lg5ic\assets
