### Mounted the google-drive with colab notebook to access the files/folder

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Building LSTM-model

In [3]:
# importing libraries
import numpy as np
import pandas as pd 

In [4]:
# Here activies are the class labels
# It is a 6 class classification

Activities = {
    0:'WALKING',
    1:'WALKING_UPSTAIRS',
    2:'WALKING_DOWNSTAIRS',
    3:'SITTING',
    4:'STANDING',
    5:'LAYING'
}

### Utility function to print Confusion matrix

In [5]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([Activities[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([Activities[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred,rownames=["Actual_value"], colnames=["Predicted_value"])
    

In [6]:
# Raw data signals
# Signals are from Accelerometer and Gyroscope
# The signals are in x,y,z directions
# Sensor signals are filtered to have only body acceleration
# excluding the acceleration due to gravity
# Triaxial acceleration from the accelerometer is total acceleration

SIGNALS = [
    "body_acc_x",
    "body_acc_y",
    "body_acc_z",
    "body_gyro_x",
    "body_gyro_y",
    "body_gyro_z",
    "total_acc_x",
    "total_acc_y",
    "total_acc_z"
]

### Utility functions to read data from csv_files

In [26]:
# this function will read the text file 
def _read_csv(filename):
    return pd.read_csv(filename, delim_whitespace=True,header=None)

# this function will load the different text files from Inertial signals
def load_signals(subject):
    signals_data = []

    for signal in SIGNALS:
        filename = f'/content/drive/MyDrive/UCI_HAR_Dataset/{subject}/Inertial Signals/{signal}_{subject}.txt'
        signals_data.append(_read_csv(filename).to_numpy())

    # Transpose is used to change the dimenstionality of data 
    # Aggregating the signal by the sample of times-steps
    # Resultant shape is 3-D sample, 128-timesteps, 9-classes

    return np.transpose(signals_data, (1,2,0))

In [25]:
# this funtion will load the file for class labels
def load_y(subject):
    """
    The objective that we are trying to predict is an integer, from 1 to 6 that represent the respective classes 
    We return a binary representation of every sample of activity as a 6 bit vector
    using One-Hot Encoding 
    (https://pandas.pydata.org/pandas-docs/stable/generated/pandas.get_dummies.html)
     """

    filename = f'/content/drive/MyDrive/UCI_HAR_Dataset/{subject}/y_{subject}.txt'
    y = _read_csv(filename)[0]
    return pd.get_dummies(y).to_numpy()

In [9]:
# load the data
# obtain the data from multi-files and return X-train, X_test and y_train, y_test

def load_data():
    X_train, X_test = load_signals('train'), load_signals('test')
    y_train, y_test = load_y('train'), load_y('test')

    return X_train, X_test, y_train, y_test

# Importing libraries 

In [10]:
np.random.seed(42)
import tensorflow as tf
tf.random.set_seed(42)

In [16]:
# make sure that we are using only cpu not gpu

import os
os.environ["CUDA_VISIBLE_DEVICES"]="-1"    

from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 1858319653676525184
xla_global_id: -1
]


In [15]:
# Importing libraries
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers.core import Dense, Dropout

In [37]:
# Initializing the parameters
epoch = 30
batch_size = 32
n_hidden = 64

In [18]:
def count_classes(y):
    return len(set([tuple(category) for category in y]))
    

In [27]:
# Loading the train and test data
X_train, X_test, Y_train, Y_test = load_data()

In [29]:
timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = count_classes(Y_train)

print(timesteps)
print(input_dim)
print(len(X_train))

128
9
7352


In [31]:
print(X_train)

[[[ 1.808515e-04  1.076681e-02  5.556068e-02 ...  1.012817e+00
   -1.232167e-01  1.029341e-01]
  [ 1.013856e-02  6.579480e-03  5.512483e-02 ...  1.022833e+00
   -1.268756e-01  1.056872e-01]
  [ 9.275574e-03  8.928878e-03  4.840473e-02 ...  1.022028e+00
   -1.240037e-01  1.021025e-01]
  ...
  [-1.147484e-03  1.714439e-04  2.647864e-03 ...  1.018445e+00
   -1.240696e-01  1.003852e-01]
  [-2.222655e-04  1.574181e-03  2.381057e-03 ...  1.019372e+00
   -1.227451e-01  9.987355e-02]
  [ 1.575500e-03  3.070189e-03 -2.269757e-03 ...  1.021171e+00
   -1.213260e-01  9.498741e-02]]

 [[ 1.093752e-03 -4.687588e-03 -2.685954e-02 ...  1.018851e+00
   -1.239760e-01  9.792958e-02]
  [ 4.550077e-03 -7.487894e-03 -2.509841e-02 ...  1.022380e+00
   -1.268078e-01  9.935086e-02]
  [ 2.879173e-03 -8.429991e-03 -2.597534e-02 ...  1.020781e+00
   -1.277862e-01  9.811381e-02]
  ...
  [-4.646144e-03 -5.479850e-03 -2.996671e-04 ...  1.014788e+00
   -1.290268e-01  9.353520e-02]
  [-2.941333e-03 -2.987481e-03 -4.88

In [49]:
# Initializing the sequential model
# initializer = tf.keras.initializer.he_normal
model = Sequential()
# configure the parameters
model.add(LSTM(64, input_shape = (timesteps, input_dim),return_sequences=True)) # layer 0

model.add(LSTM(32)) # layer 1

model.add(Dropout(0.5)) # adding a dropout layer

model.add(Dense(n_classes, activation = 'sigmoid'))
model.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_7 (LSTM)               (None, 128, 64)           18944     
                                                                 
 lstm_8 (LSTM)               (None, 32)                12416     
                                                                 
 dropout_4 (Dropout)         (None, 32)                0         
                                                                 
 dense_4 (Dense)             (None, 6)                 198       
                                                                 
Total params: 31,558
Trainable params: 31,558
Non-trainable params: 0
_________________________________________________________________


In [50]:
# Compiling the model
model.compile(loss='categorical_crossentropy', optimizer = 'rmsprop',metrics=['accuracy'])

In [51]:
# Training the model
model.fit(X_train,
          Y_train,
          batch_size=batch_size,
          validation_data=(X_test, Y_test),
          epochs=epoch)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f3665ed1f50>

In [52]:
# confusion matrix (cross-table form)
Y_pred = model.predict(X_test)
print(confusion_matrix(Y_test, Y_pred))

Predicted_value     LAYING  SITTING  STANDING  WALKING  WALKING_DOWNSTAIRS  \
Actual_value                                                                 
LAYING                 510        0         0        0                   0   
SITTING                  0      378       108        0                   0   
STANDING                 0       74       456        1                   0   
WALKING                  0        0         0      461                  34   
WALKING_DOWNSTAIRS       0        0         0        1                 419   
WALKING_UPSTAIRS         0        0         0       11                  19   

Predicted_value     WALKING_UPSTAIRS  
Actual_value                          
LAYING                            27  
SITTING                            5  
STANDING                           1  
WALKING                            1  
WALKING_DOWNSTAIRS                 0  
WALKING_UPSTAIRS                 441  


In [53]:
score = model.evaluate(X_test, Y_test)



In [54]:
print(score)

[0.48200029134750366, 0.9043094515800476]


### key take-away:
 __with this simple three layers of model we achieve 90.43%__
 
 __We can further improve the accuracy using hyperparameter tunnings__