<a href="https://colab.research.google.com/github/Aniket-tempest/HAR-Employee-Identification/blob/main/HAR_DeepLearning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [103]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf
from tensorflow.keras import backend as K

In [None]:
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
from keras import backend as K
from keras.models import Sequential, load_model
from keras.layers import LSTM, TimeDistributed, Conv1D, MaxPooling1D, Flatten
from keras.layers.core import Dense, Dropout

In [35]:
DATADIR = 'UCI HAR Dataset'

In [36]:
# Raw data signals
# Signals are from Accelerometer and Gyroscope
# The signals are in x,y,z directions
# Sensor signals are filtered to have only body acceleration
# excluding the acceleration due to gravity
# Triaxial acceleration from the accelerometer is total acceleration
SIGNALS = [
    "body_acc_x",
    "body_acc_y",
    "body_acc_z",
    "body_gyro_x",
    "body_gyro_y",
    "body_gyro_z",
    "total_acc_x",
    "total_acc_y",
    "total_acc_z"
]

In [37]:
# Utility function to read the data from csv file
def _read_csv(filename):
    return pd.read_csv(filename, delim_whitespace=True, header=None)

# Utility function to load the signals
def load_signals(subset):
    signals_data = []

    for signal in SIGNALS:
        filename = f'drive/MyDrive/{DATADIR}/{subset}/Inertial Signals/{signal}_{subset}.txt'
        signals_data.append(
            _read_csv(filename).to_numpy()
        ) 

    # Transpose is used to change the dimensionality of the output,
    # aggregating the signals by combination of sample/timestep.
    # Resultant shape is (7352 train/2947 test samples, 128 timesteps, 9 signals)
    return np.transpose(signals_data, (1, 2, 0))

In [38]:
def load_y(subset):
    """
    The objective that we are trying to predict is a integer, from 1 to 6,
    that represents a human activity. We return a binary representation of 
    every sample objective as a 6 bits vector using One Hot Encoding
    (https://pandas.pydata.org/pandas-docs/stable/generated/pandas.get_dummies.html)
    """
    filename = f'drive/MyDrive/{DATADIR}/{subset}/y_{subset}.txt'
    y = _read_csv(filename)[0]

    return pd.get_dummies(y).to_numpy()

In [39]:
def load_data():
    """
    Obtain the dataset from multiple files.
    Returns: X_train, X_test, y_train, y_test
    """
    X_train, X_test = load_signals('train'), load_signals('test')
    y_train, y_test = load_y('train'), load_y('test')

    return X_train, X_test, y_train, y_test

In [45]:
# Initializing parameters
epochs = 30
batch_size = 16
n_hidden = 32

In [46]:
# Utility function to count the number of classes
def _count_classes(y):
    return len(set([tuple(category) for category in y]))

In [40]:
# Loading the train and test data
X_train, X_test, Y_train, Y_test = load_data()

In [47]:
timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = _count_classes(Y_train)

print(timesteps)
print(input_dim)
print(len(X_train))
print(n_classes)

128
9
7352
6


In [None]:
X_train.shape

(7352, 128, 9)

## LSTM

In [None]:
# Initiliazing the sequential model
model1 = Sequential()
# Configuring the parameters
model1.add(LSTM(n_hidden, input_shape=(timesteps, input_dim)))
# Adding a dropout layer
model1.add(Dropout(0.5))
# Adding a dense output layer with sigmoid activation
model1.add(Dense(n_classes, activation='softmax'))
model1.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 32)                5376      
                                                                 
 dropout (Dropout)           (None, 32)                0         
                                                                 
 dense (Dense)               (None, 6)                 198       
                                                                 
Total params: 5,574
Trainable params: 5,574
Non-trainable params: 0
_________________________________________________________________


In [None]:
# Compiling the model
model1.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
# Training the model
model1.fit(X_train,
          Y_train,
          batch_size=batch_size,
          validation_data=(X_test, Y_test),
          epochs=epochs)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f8f606d3040>

In [None]:
# Training Evaluation
print("LSTM Training Accuracy")
score = model1.evaluate(X_train, Y_train)
print("Accuracy: ", score[1])
print("Loss: ", score[0])
print(confusion_matrix(np.argmax(Y_train, axis=1), np.argmax(model1.predict(X_train), axis=1)))

LSTM Training Accuracy
Accuracy:  0.946273148059845
Loss:  0.11491026729345322
[[1226    0    0    0    0    0]
 [   1 1071    1    0    0    0]
 [   0    1  985    0    0    0]
 [   0    0    0 1028  258    0]
 [   0    2    0  132 1240    0]
 [   0    0    0    0    0 1407]]


In [None]:
# Testing
print("LSTM Testing Accuracy")
score = model1.evaluate(X_test, Y_test)
print("Accuracy: ", score[1])
print("Loss: ", score[0])
print(confusion_matrix(np.argmax(Y_test, axis=1), np.argmax(model1.predict(X_test), axis=1)))

LSTM Testing Accuracy
Accuracy:  0.8988802433013916
Loss:  0.35367485880851746
[[462   6  24   0   4   0]
 [ 19 437  10   5   0   0]
 [  2   2 416   0   0   0]
 [  0   1   0 356 134   0]
 [  0   0   0  64 468   0]
 [  0  27   0   0   0 510]]


## CNN-LSTM

In [None]:
tf.keras.backend.clear_session()

In [48]:
n_steps, n_length = 4, 32
X_train = X_train.reshape((X_train.shape[0], n_steps, n_length, input_dim))
X_test = X_test.reshape((X_test.shape[0], n_steps, n_length, input_dim))

In [None]:
# define model
# CNN layers for feature extraction on input data combined with LSTMs to support sequence prediction
model2 = Sequential()
# CNN layers
model2.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu'), input_shape=(None,n_length,input_dim)))
model2.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu')))
model2.add(TimeDistributed(Dropout(0.5)))
model2.add(TimeDistributed(MaxPooling1D(pool_size=2)))
model2.add(TimeDistributed(Flatten()))
# LSTM layer
model2.add(LSTM(n_hidden))
model2.add(Dropout(0.5))
model2.add(Dense(n_hidden, activation='relu'))
model2.add(Dense(n_classes, activation='softmax'))
model2.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed (TimeDistr  (None, None, 30, 64)     1792      
 ibuted)                                                         
                                                                 
 time_distributed_1 (TimeDis  (None, None, 28, 64)     12352     
 tributed)                                                       
                                                                 
 time_distributed_2 (TimeDis  (None, None, 28, 64)     0         
 tributed)                                                       
                                                                 
 time_distributed_3 (TimeDis  (None, None, 14, 64)     0         
 tributed)                                                       
                                                                 
 time_distributed_4 (TimeDis  (None, None, 896)        0

In [None]:
model2.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
epochs = 50

In [None]:
# Training the model
model2.fit(X_train,
          Y_train,
          batch_size=batch_size,
          validation_data=(X_test, Y_test),
          epochs=epochs)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f9f3a028e50>

In [None]:
# Training evaluation
print("CNN-LSTM Training Accuracy")
score = model2.evaluate(X_train, Y_train)
print("Accuracy: ", score[1])
print("Loss: ", score[0])
print(confusion_matrix(np.argmax(Y_train, axis=1), np.argmax(model2.predict(X_train), axis=1)))

CNN-LSTM Training Accuracy
Accuracy:  0.9951033592224121
Loss:  0.015014450065791607
[[1226    0    0    0    0    0]
 [   0 1073    0    0    0    0]
 [   0    0  986    0    0    0]
 [   0    0    0 1284    2    0]
 [   0    0    0   34 1340    0]
 [   0    0    0    0    0 1407]]


In [None]:
# Testing
print("CNN-LSTM Testing Accuracy")
score = model2.evaluate(X_test, Y_test)
print("Accuracy: ", score[1])
print("Loss: ", score[0])
print(confusion_matrix(np.argmax(Y_test, axis=1), np.argmax(model2.predict(X_test), axis=1)))

CNN-LSTM Testing Accuracy
Accuracy:  0.910417377948761
Loss:  0.7681483626365662
[[493   0   3   0   0   0]
 [ 14 432  25   0   0   0]
 [ 23   2 395   0   0   0]
 [  1   3   0 393  94   0]
 [  2   0   0  70 460   0]
 [  0  27   0   0   0 510]]


In [None]:
y_pred = model2.predict(X_test)



### Classification Report: CNN-LSTM Model (Test Data)

In [None]:
from sklearn.metrics import classification_report

In [None]:
test_predictions = np.argmax(y_pred, axis=-1)
test_y = np.argmax(Y_test, axis=-1)
print(classification_report(test_y, test_predictions))

              precision    recall  f1-score   support

           0       0.92      0.99      0.96       496
           1       0.93      0.92      0.92       471
           2       0.93      0.94      0.94       420
           3       0.85      0.80      0.82       491
           4       0.83      0.86      0.85       532
           5       1.00      0.95      0.97       537

    accuracy                           0.91      2947
   macro avg       0.91      0.91      0.91      2947
weighted avg       0.91      0.91      0.91      2947



#### Save the Model

In [None]:
model2.save("cnn-lstm.h5")

## Testing model with real time data

In [None]:
# Loading the saved model
model = load_model("cnn-lstm.h5")

In [116]:
class_labels = {
    0: "WALKING",
    1: "WALKING_UPSTAIRS",
    2: "WALKING_DOWNSTAIRS",
    3: "SITTING",
    4: "STANDING",
    5: "LAYING"
  }

#### Using a recorded data of Walking

In [117]:
test = pd.read_csv("drive/MyDrive/UCI HAR Dataset/realtime/realtime-walk.csv")
test = test.iloc[100:228, 1:10]
print(test.shape)
test.head()

(128, 9)


Unnamed: 0,gFx,gFy,gFz,ax,ay,az,wx,wy,wz
100,-0.014,-0.2564,0.9831,0.0272,-0.0222,-0.0143,0.0003,-0.0027,-0.0
101,-0.0148,-0.2584,0.9831,0.0035,-0.0252,-0.0183,-0.0008,-0.0005,0.0011
102,-0.0209,-0.2645,0.9843,0.0024,0.0089,0.0396,-0.004,-0.0005,-0.0011
103,-0.0165,-0.2594,0.9885,0.0437,0.0259,-0.0222,-0.0008,0.0027,0.0011
104,-0.016,-0.2581,0.986,-0.0134,-0.0208,0.0129,-0.0008,0.0016,0.0011


In [118]:
test['gFx'] = test['ax'] - test['gFx']
test['gFy'] = test['ay'] - test['gFy']
test['gFz'] = test['az'] - test['gFz']
test.rename(columns = {'gFx' : 'bax', 'gFy' : 'bay', 'gFz' : 'baz'}, inplace = True)
test.head()

Unnamed: 0,bax,bay,baz,ax,ay,az,wx,wy,wz
100,0.0412,0.2342,-0.9974,0.0272,-0.0222,-0.0143,0.0003,-0.0027,-0.0
101,0.0183,0.2332,-1.0014,0.0035,-0.0252,-0.0183,-0.0008,-0.0005,0.0011
102,0.0233,0.2734,-0.9447,0.0024,0.0089,0.0396,-0.004,-0.0005,-0.0011
103,0.0602,0.2853,-1.0107,0.0437,0.0259,-0.0222,-0.0008,0.0027,0.0011
104,0.0026,0.2373,-0.9731,-0.0134,-0.0208,0.0129,-0.0008,0.0016,0.0011


In [119]:
scaler = MinMaxScaler()
t = scaler.fit_transform(test)

In [120]:
t = t.reshape(1, 4, 32, 9)

In [121]:
t.shape

(1, 4, 32, 9)

In [122]:
y_pred = model.predict(t).argmax(axis=-1)
print("The predicted class is: ", y_pred, class_labels[y_pred[0]])

The predicted class is:  [0] WALKING


#### Using a recorded data of Standing

In [123]:
test = pd.read_csv("drive/MyDrive/UCI HAR Dataset/realtime/realtime-stand.csv")
test = test.iloc[100:228, 1:10]
print(test.shape)
test.head()

(128, 9)


Unnamed: 0,gFx,gFy,gFz,ax,ay,az,wx,wy,wz
100,-0.3506,-0.8365,0.2053,-0.2149,0.4639,0.7502,0.0557,-1.6089,0.4957
101,-0.0436,-0.6935,0.2427,4.068,2.0821,3.6742,-0.2607,-2.1128,0.4435
102,-0.3103,-0.9653,0.0992,-1.5046,0.178,-1.564,0.3742,-0.5128,-0.4651
103,-0.5418,-1.0237,0.3525,-2.1094,-0.6024,2.683,0.666,3.4266,-0.7868
104,-0.8933,-0.9548,0.0308,-4.7839,-0.3658,-1.3908,0.0844,-1.2638,-0.0678


In [124]:
test['gFx'] = test['ax'] - test['gFx']
test['gFy'] = test['ay'] - test['gFy']
test['gFz'] = test['az'] - test['gFz']
test.rename(columns = {'gFx' : 'bax', 'gFy' : 'bay', 'gFz' : 'baz'}, inplace = True)
test.head()

Unnamed: 0,bax,bay,baz,ax,ay,az,wx,wy,wz
100,0.1357,1.3004,0.5449,-0.2149,0.4639,0.7502,0.0557,-1.6089,0.4957
101,4.1116,2.7756,3.4315,4.068,2.0821,3.6742,-0.2607,-2.1128,0.4435
102,-1.1943,1.1433,-1.6632,-1.5046,0.178,-1.564,0.3742,-0.5128,-0.4651
103,-1.5676,0.4213,2.3305,-2.1094,-0.6024,2.683,0.666,3.4266,-0.7868
104,-3.8906,0.589,-1.4216,-4.7839,-0.3658,-1.3908,0.0844,-1.2638,-0.0678


In [125]:
scaler = MinMaxScaler()
t = scaler.fit_transform(test)

In [126]:
t = t.reshape(1, 4, 32, 9)

In [127]:
t.shape

(1, 4, 32, 9)

In [129]:
y_pred = model.predict(t).argmax(axis=-1)
print("The predicted class is: ", y_pred, class_labels[y_pred[0]])

The predicted class is:  [4] STANDING
