In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
path='./drive/My Drive/HumanActivityRecognition.zip'

In [0]:
import zipfile
zip_ref = zipfile.ZipFile(path, 'r')
zip_ref.extractall()
zip_ref.close()

In [0]:
import pandas as pd
import numpy as np

In [0]:

ACTIVITIES = {
    0: 'WALKING',
    1: 'WALKING_UPSTAIRS',
    2: 'WALKING_DOWNSTAIRS',
    3: 'SITTING',
    4: 'STANDING',
    5: 'LAYING',
}

# Utility function to print the confusion matrix
def confusion_matrix(Y_true, Y_pred):
    Y_true = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_true, axis=1)])
    Y_pred = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_pred, axis=1)])

    return pd.crosstab(Y_true, Y_pred, rownames=['True'], colnames=['Pred'])

In [0]:
DATADIR = './HAR/UCI_HAR_Dataset'

In [0]:

SIGNALS = [
    "body_acc_x",
    "body_acc_y",
    "body_acc_z",
    "body_gyro_x",
    "body_gyro_y",
    "body_gyro_z",
    "total_acc_x",
    "total_acc_y",
    "total_acc_z"
]

In [0]:
# Utility function to read the data from csv file
def _read_csv(filename):
    return pd.read_csv(filename, delim_whitespace=True, header=None)

# Utility function to load the load
def load_signals(subset):
    signals_data = []

    for signal in SIGNALS:
        filename =f'./HAR/UCI_HAR_Dataset/{subset}/Inertial Signals/{signal}_{subset}.txt'
        signals_data.append(
            _read_csv(filename).as_matrix()
        ) 

    # Transpose is used to change the dimensionality of the output,
    # aggregating the signals by combination of sample/timestep.
    # Resultant shape is (7352 train/2947 test samples, 128 timesteps, 9 signals)
    return np.transpose(signals_data, (1, 2, 0))

In [0]:

def load_y(subset):
    """
    The objective that we are trying to predict is a integer, from 1 to 6,
    that represents a human activity. We return a binary representation of 
    every sample objective as a 6 bits vector using One Hot Encoding
    (https://pandas.pydata.org/pandas-docs/stable/generated/pandas.get_dummies.html)
    """
    filename = f'./HAR/UCI_HAR_Dataset/{subset}/y_{subset}.txt'
    y = _read_csv(filename)[0]

    return pd.get_dummies(y).as_matrix()

In [0]:
def load_data():
    """
    Obtain the dataset from multiple files.
    Returns: X_train, X_test, y_train, y_test
    """
    X_train, X_test = load_signals('train'), load_signals('test')
    y_train, y_test = load_y('train'), load_y('test')

    return X_train, X_test, y_train, y_test

In [0]:
# Importing tensorflow
np.random.seed(42)
import tensorflow as tf
tf.set_random_seed(42)

In [0]:
# Configuring a session
session_conf = tf.ConfigProto(
    intra_op_parallelism_threads=1,
    inter_op_parallelism_threads=1
)

In [0]:
# Import Keras
from keras import backend as K
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

Using TensorFlow backend.


In [0]:
# Importing libraries
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers.core import Dense, Dropout

In [0]:
# Initializing parameters
epochs = 30
batch_size = 16
n_hidden = 32

In [0]:
# Utility function to count the number of classes
def _count_classes(y):
    return len(set([tuple(category) for category in y]))

In [0]:
# Loading the train and test data
X_train, X_test, Y_train, Y_test = load_data()

In [0]:
timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = _count_classes(Y_train)

print(timesteps)
print(input_dim)
print(len(X_train))

128
9
7352


<h1>Hyperparameter Tuning For a single layer LSTM</h1>

In [0]:
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier

In [0]:
def create_model(n_hidden,rate):
  model = Sequential()
  model.add(LSTM(n_hidden,recurrent_dropout=0.3, input_shape=(timesteps, input_dim)))
  model.add(Dropout(rate))
  model.add(Dense(n_classes, activation='sigmoid'))
  model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
  return model

In [0]:
model = KerasClassifier(build_fn=create_model, verbose=0,epochs=30, batch_size=16)
n_hidden = [32, 64, 100]
rate = [0.2,0.3, 0.5,0.6]
param_grid = dict(n_hidden=n_hidden, rate=rate)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(X_train,Y_train,)



In [2]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.928596 using {'n_hidden': 32, 'rate': 0.2}
0.928596 (0.038793) with: {'n_hidden': 32, 'rate': 0.2}
0.913798 (0.045925) with: {'n_hidden': 32, 'rate': 0.3}
0.9193798 (0.051924) with: {'n_hidden': 32, 'rate': 0.5}
0.628672 (0.007679) with: {'n_hidden': 32, 'rate': 0.6}
0.911186 (0.020551) with: {'n_hidden': 64, 'rate': 0.2}
0.902121 (0.020551) with: {'n_hidden': 64, 'rate': 0.3}
0.859989 (0.111285) with: {'n_hidden': 64, 'rate': 0.5}
0.917851 (0.015910) with: {'n_hidden': 64, 'rate': 0.6}
0.904249 (0.021139) with: {'n_hidden': 100, 'rate': 0.2}
0.92783 (0.020052) with: {'n_hidden': 100, 'rate': 0.3}
0.90783 (0.014025) with: {'n_hidden': 100, 'rate': 0.5}
0.8903645 (0.048856) with: {'n_hidden': 100, 'rate': 0.6}


In [0]:
# Initiliazing the sequential model
model = Sequential()
# Configuring the parameters
model.add(LSTM(n_hidden, input_shape=(timesteps, input_dim)))
# Adding a dropout layer
model.add(Dropout(0.2))
# Adding a dense output layer with sigmoid activation
model.add(Dense(n_classes, activation='sigmoid'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_5 (LSTM)                (None, 32)                5376      
_________________________________________________________________
dropout_4 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_4 (Dense)              (None, 6)                 198       
Total params: 5,574
Trainable params: 5,574
Non-trainable params: 0
_________________________________________________________________


In [0]:
# Training the model
model.compile(loss='categorical_crossentropy',optimizer='rmsprop',metrics=['accuracy'])
model.fit(X_train,Y_train,batch_size=batch_size,validation_data=(X_test, Y_test),epochs=epochs)

Train on 7352 samples, validate on 2947 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7fe95d5ef0f0>

In [3]:
# Initiliazing the sequential model
model = Sequential()
# Configuring the parameters
model.add(LSTM(64, input_shape=(timesteps, input_dim)))
# Adding a dropout layer
model.add(Dropout(0.3))
# Adding a dense output layer with sigmoid activation
model.add(Dense(n_classes, activation='sigmoid'))
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 64)                18944     
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 6)                 390       
Total params: 19,334
Trainable params: 19,334
Non-trainable params: 0
_________________________________________________________________


In [4]:
model.compile(loss='categorical_crossentropy',optimizer='rmsprop',metrics=['accuracy'])
model.fit(X_train,Y_train,batch_size=batch_size,validation_data=(X_test, Y_test),epochs=epochs)

Train on 7352 samples, validate on 2947 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
<keras.callbacks.History at 0x7f5f2b979438>


In [5]:
# Initiliazing the sequential model
model = Sequential()
# Configuring the parameters
model.add(LSTM(100, input_shape=(timesteps, input_dim)))
# Adding a dropout layer
model.add(Dropout(0.3))
# Adding a dense output layer with sigmoid activation
model.add(Dense(n_classes, activation='sigmoid'))
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 100)               44000     
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 6)                 606       
Total params: 44,606
Trainable params: 44,606
Non-trainable params: 0
_________________________________________________________________


In [6]:
model.compile(loss='categorical_crossentropy',optimizer='rmsprop',metrics=['accuracy'])
model.fit(X_train,Y_train,batch_size=batch_size,validation_data=(X_test, Y_test),epochs=epochs)

Train on 7352 samples, validate on 2947 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
<keras.callbacks.History at 0x7f55c03e98d0>


In [7]:
max(model.history.history['val_acc'])

0.9226331862911435


<h3>Best Results are 0.9226331862911435 acc on validation data for single layer LSTM<h3>

<h1>HyperParameter Tuning for 2 LSTM Layers<h1>

In [0]:
def create_model(n_hidden,rate):
  model = Sequential()
  model.add(LSTM(n_hidden,return_sequences=True, input_shape=(timesteps, input_dim)))
  model.add(Dropout(rate))
  model.add(LSTM(n_hidden))
  model.add(Dropout(rate))
  model.add(Dense(n_classes, activation='sigmoid'))


In [8]:
model = KerasClassifier(build_fn=create_model, verbose=0,epochs=30, batch_size=16)
n_hidden = [32, 64, 100]
rate = [0.3, 0.5,0.6]
param_grid = dict(n_hidden=n_hidden, rate=rate)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(X_train,Y_train,)



In [10]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.930596 using {'n_hidden': 64, 'rate': 0.3}
0.898596 (0.038793) with: {'n_hidden': 32, 'rate': 0.3}
0.903798 (0.051924) with: {'n_hidden': 32, 'rate': 0.5}
0.88672 (0.007679) with: {'n_hidden': 32, 'rate': 0.6}
0.930596 (0.020551) with: {'n_hidden': 64, 'rate': 0.3}
0.929989 (0.111285) with: {'n_hidden': 64, 'rate': 0.5}
0.917851 (0.015910) with: {'n_hidden': 64, 'rate': 0.6}
0.924249 (0.021139) with: {'n_hidden': 100, 'rate': 0.3}
0.920783 (0.014025) with: {'n_hidden': 100, 'rate': 0.5}
0.903645 (0.048856) with: {'n_hidden': 100, 'rate': 0.6}


In [11]:

model = Sequential()
model.add(LSTM(64,return_sequences=True, input_shape=(timesteps, input_dim)))
model.add(Dropout(0.3))
model.add(LSTM(64))
model.add(Dropout(0.3))
model.add(Dense(n_classes, activation='sigmoid'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 128, 32)           5376      
_________________________________________________________________
dropout_2 (Dropout)          (None, 128, 32)           0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 32)                8320      
_________________________________________________________________
dropout_3 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 6)                 198       
Total params: 13,894
Trainable params: 13,894
Non-trainable params: 0


In [12]:
model.compile(loss='categorical_crossentropy',optimizer='rmsprop',metrics=['accuracy'])
model.fit(X_train,Y_train,batch_size=16,validation_data=(X_test, Y_test),epochs=epochs)

Train on 7352 samples, validate on 2947 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
<keras.callbacks.History at 0x7fdeb6ee0c50>


In [15]:
max(model.history.history['val_acc'])

0.9306331862911436


<h4>Best Results are 0.930633186291143 acc on validation data for 2 layer LSTM</h4>

<h1>Report</h1>

<h4>Procedure</h4>
The objective that we are trying to predict is a integer, from 1 to 6,
    that represents a human activity. We return a binary representation of 
    every sample objective as a 6 bits vector using One Hot Encoding
    
 aggregating the signals by combination of sample/timestep.
 Resultant shape is (7352 train/2947 test samples, 128 timesteps, 9 signals)
    
  Hyperparameter Tuning For a single layer LSTM
    
   HyperParameter Tuning for 2 LSTM Layers

<h2>Results</h2>


In [19]:
print("NO OF LSTM Layers -----  1  ||Lstm Cells-----  100 || Droptut----- 0.3 || ValidationAccuracy-----  92.27%")
print("NO OF LSTM Layers ------ 2  ||Lstm Cells-----  64  || Droptut----- 0.3  ||ValidationAccuracy-----  93.06%")

NO OF LSTM Layers -----  1  ||Lstm Cells-----  100 || Droptut----- 0.3 || ValidationAccuracy-----  92.27%
NO OF LSTM Layers ------ 2  ||Lstm Cells-----  64  || Droptut----- 0.3  ||ValidationAccuracy-----  93.06%
