#Human Activity Recognition

# Importing Data

In [0]:
import pandas as pd
import numpy as np

In [0]:
# Activities are the class labels
# It is a 6 class classification
ACTIVITIES = {
    0: 'WALKING',
    1: 'WALKING_UPSTAIRS',
    2: 'WALKING_DOWNSTAIRS',
    3: 'SITTING',
    4: 'STANDING',
    5: 'LAYING',
}

# Utility function to print the confusion matrix
def confusion_matrix(Y_true, Y_pred):
    Y_true = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_true, axis=1)])
    Y_pred = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_pred, axis=1)])

    return pd.crosstab(Y_true, Y_pred, rownames=['True'], colnames=['Pred'])

In [0]:
# Data directory
DATADIR = 'UCI_HAR_Dataset'

In [0]:
# Raw data signals
# Signals are from Accelerometer and Gyroscope
# The signals are in x,y,z directions
# Sensor signals are filtered to have only body acceleration
# excluding the acceleration due to gravity
# Triaxial acceleration from the accelerometer is total acceleration
SIGNALS = [
    "body_acc_x",
    "body_acc_y",
    "body_acc_z",
    "body_gyro_x",
    "body_gyro_y",
    "body_gyro_z",
    "total_acc_x",
    "total_acc_y",
    "total_acc_z"
]

In [0]:
# Utility function to read the data from csv file
def _read_csv(filename):
    return pd.read_csv(filename, delim_whitespace=True, header=None)

# Utility function to load the load
def load_signals(subset):
    signals_data = []

    for signal in SIGNALS:
        filename = f'/content/UCI_HAR_Dataset/{subset}/Inertial Signals/{signal}_{subset}.txt'
        signals_data.append(
            _read_csv(filename).as_matrix()
        ) 

    # Transpose is used to change the dimensionality of the output,
    # aggregating the signals by combination of sample/timestep.
    # Resultant shape is (7352 train/2947 test samples, 128 timesteps, 9 signals)
    return np.transpose(signals_data, (1, 2, 0))

In [0]:
def load_y(subset):
    """
    The objective that we are trying to predict is a integer, from 1 to 6,
    that represents a human activity. We return a binary representation of 
    every sample objective as a 6 bits vector using One Hot Encoding
    (https://pandas.pydata.org/pandas-docs/stable/generated/pandas.get_dummies.html)
    """
    filename = f'/content/UCI_HAR_Dataset/{subset}/y_{subset}.txt'
    y = _read_csv(filename)[0]

    return pd.get_dummies(y).as_matrix()

In [0]:
def load_data():
    """
    Obtain the dataset from multiple files.
    Returns: X_train, X_test, y_train, y_test
    """
    X_train, X_test = load_signals('train'), load_signals('test')
    y_train, y_test = load_y('train'), load_y('test')

    return X_train, X_test, y_train, y_test

In [0]:
# Importing tensorflow
np.random.seed(42)
import tensorflow as tf
tf.set_random_seed(42)

In [0]:
# Configuring a session
session_conf = tf.ConfigProto(
    intra_op_parallelism_threads=1,
    inter_op_parallelism_threads=1
)

In [0]:
# Import Keras
from keras import backend as K
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

Using TensorFlow backend.


In [0]:
# Importing libraries
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Input, Dense, Dropout

In [0]:
 # Utility function to count the number of classes
def _count_classes(y):
    return len(set([tuple(category) for category in y]))

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
import zipfile

zipref = zipfile.ZipFile('/content/drive/My Drive/UCI_HAR_Dataset.zip', mode = 'r')
zipref.extractall('/content/')
zipref.close()
# Loading the train and test data
X_train, X_test, Y_train, Y_test = load_data()

  # This is added back by InteractiveShellApp.init_path()


In [0]:
# Utility function to count the number of classes
def _count_classes(y):
    return len(set([tuple(category) for category in y]))

In [0]:
timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = _count_classes(Y_train)

print(timesteps)
print(input_dim)
print(len(X_train))

128
9
7352


#Training
####Training by means of LSTM model with one and two layers. One layer with 32 - 48 units typically yields performance of not more than 90 per cent. Increasing units and layers has been found to achieve performance of more than 93 perc ent on test set. 
#####Also, the test accuracy varies widly within an epoch for later epochs (After first 10), so model checkpoint is used to save the best model in each epoch and also the best amongst all epoch is saved after comparing with new generated model after each model. Ths way the file Best_model.hdf5 contains the best model at each epoch and is rewritten after an epoch. Also, the file HAR_Model.hdf5 contains the best model of all epochs. It gets overwritten is a better model than the already best model is discovered after an epoch.

In [0]:
from tensorflow.keras.optimizers import RMSprop
# Create a model object

def create_model(n_hidden = 32, dropout = 0.5, n_layers = 1):
  # Initiliazing the sequential model
  model = Sequential()
  model.add(Input(shape = (timesteps, input_dim)))
  
  if n_layers > 1:
    for layer in range(n_layers - 1):
      # Configuring the parameters
      model.add(LSTM(n_hidden, return_sequences = True))
      # Adding a dropout layer
      model.add(Dropout(dropout))
    # Configuring the parameters
    model.add(LSTM(units = n_hidden + 6))
    # Adding a dropout layer
    model.add(Dropout(dropout))  
  else:
    # Configuring the parameters
    model.add(LSTM(units = n_hidden))
    # Adding a dropout layer
    model.add(Dropout(dropout))  
    
  # Adding a dense output layer with sigmoid activation
  model.add(Dense(n_classes, activation='softmax'))
  
  # Compiling the mode
  model.compile(loss='categorical_crossentropy',
              optimizer= RMSprop(),
              metrics=['accuracy'])
  
  return model


# Viewing model summary
model = create_model()
model.summary()

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 32)                5376      
_________________________________________________________________
dropout (Dropout)            (None, 32)                0         
_________________________________________________________________
dense (Dense)                (None, 6)                 198       
Total params: 5,574
Trainable params: 5,574
Non-trainable params: 0
_________________________________________________________________


In [0]:
##################################################################
###############  Grid Search using Custom Code ##################
##################################################################


from prettytable import PrettyTable
import itertools
from tensorflow.keras.callbacks import ReduceLROnPlateau, LearningRateScheduler, EarlyStopping
from tensorflow.keras.models import load_model


lr_reducer = tf.keras.callbacks.ReduceLROnPlateau(
                               monitor = 'val_acc',
                               factor = np.sqrt(0.8),
                               cooldown = 0,
                               patience = 2,
                               min_lr = 1e-5)

def lr_schedule(epoch, lr):
  # For Linear Decay [1e-3 to 1e-5]
  new_lr = 1e-3 - ((1e-3 - 1e-4) / 29) * epoch
  
  # For Hyperbolic Decay [1e-3 to 3.3e-5]
  # new_lr = 1e-3 / (epoch + 1)
  
  # For Stepwise Decay
  #if epoch % 10 == 5:
  #  new_lr = lr * 0.6
  
  if new_lr < lr:
    lr = new_lr
    
  print("For epoch {}, the LR is {}".format(epoch + 1, lr)) 
  return lr

#early_stopping = EarlyStopping(monitor = 'val_acc', patience = 7, verbose = 1, mode='auto')



# Setting parameters
dropout = [0.5, 0.7]
n_hidden = [48, 64]
n_layers = [2, 1]
epochs = [30] # can be changed in later versions
batch_size = [32] # can be changed in later versions


params = {'epochs': [], 'n_layers' : [], 'n_hidden': [], 'dropout': [], 'batch_size' : [], 'acc': [0], 'val_acc' : [0]}


for n_layers, n_hidden, dropout, epochs, batch_size in itertools.product(n_layers, n_hidden, dropout, epochs, batch_size):
  
  checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath = '/content/drive/My Drive/Models/Best_model.hdf5', monitor = 'val_acc', verbose = 0, save_best_only = True)
  lr_scheduler = tf.keras.callbacks.LearningRateScheduler(lr_schedule)

  model = create_model(n_hidden = n_hidden, dropout = dropout, n_layers = n_layers)
  
  print("For n_layers = {}, n_hidden = {}, dropout = {} and batch_size = {} ........................................"\
        .format(n_layers, n_hidden, dropout, batch_size))

  # Training the model
  fit_model = model.fit(X_train,
          Y_train,
          batch_size = batch_size,
          validation_data = (X_test, Y_test),
          epochs = epochs,
          callbacks = [lr_reducer, checkpoint, lr_scheduler],
          verbose = 2)
  
  model = load_model('/content/drive/My Drive/Models/Best_model.hdf5') 
  score_train = model.evaluate(X_train, Y_train)
  score_val = model.evaluate(X_test, Y_test)
  
  params['epochs'].append(epochs)
  params['n_layers'].append(n_layers)
  params['n_hidden'].append(n_hidden)
  params['dropout'].append(dropout)
  params['batch_size'].append(batch_size)
  params['acc'].append(score_train[1])
  params['val_acc'].append(score_val[1])
  
  acc_train = params['acc']
  acc_valid = params['val_acc']
  
  if acc_valid[-1] > np.max(acc_valid[:-1]):
    model_best = model
    param_best = {'epochs': epochs, 'n_layers' : n_layers, 'n_hidden': n_hidden, 'dropout': dropout, 'batch_size' : batch_size,\
                  'acc':acc_train[-1], 'val_acc' : acc_valid[-1]}
    model_best.save('/content/drive/My Drive/Models/HAR_Model.hdf5')
    print('Found better model, replacing earlier')
    
  print("With n_layers = {}, n_hidden= {} and dropout = {}, and batch_size = {}, the Train_Acc is = {} and Val_Acc is = {} \n\n".\
        format(n_layers, n_hidden, dropout, batch_size, acc_train[-1], acc_valid[-1]))

     
# Display Pretty Table using PrettyTable library
params['acc'].pop(0)
params['val_acc'].pop(0)
df = pd.DataFrame(params)  
DF = PrettyTable()
DF.field_names = df.columns
for row in range(df.count().max()):
  DF.add_row(df.iloc[row])
print(DF)

# Display best parameters
print("The Best parameters are:", param_best, sep = '\n')
                           

For n_layers = 2, n_hidden = 48, dropout = 0.5 and batch_size = 32 ........................................
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 7352 samples, validate on 2947 samples
For epoch 1, the LR is 0.001
Epoch 1/30
7352/7352 - 83s - loss: 1.0626 - acc: 0.5499 - val_loss: 0.8014 - val_acc: 0.6814
For epoch 2, the LR is 0.0009689655172413793
Epoch 2/30
7352/7352 - 82s - loss: 0.7738 - acc: 0.7029 - val_loss: 0.6408 - val_acc: 0.7462
For epoch 3, the LR is 0.0009379310344827586
Epoch 3/30
7352/7352 - 81s - loss: 0.4684 - acc: 0.8322 - val_loss: 0.5324 - val_acc: 0.8079
For epoch 4, the LR is 0.000906896551724138
Epoch 4/30
7352/7352 - 80s - loss: 0.3430 - acc: 0.8833 - val_loss: 0.4747 - val_acc: 0.8480
For epoch 5, the LR is 0.0008758620689655173
Epoch 5/30
7352/7352 - 81s - loss: 0.2728 - acc: 0.9066 - val_loss: 0.5230 - val_acc: 0.8493
For epoch 6, the LR is 0.0008448275862068965
Epoch 6/30
7352/7352 - 81s - los

In [0]:
#Load and test overall best model
model_best = load_model('/content/drive/My Drive/Models/HAR_Model.hdf5')
model_best.evaluate(X_test, Y_test)



[0.2886682210203786, 0.9321344]

In [0]:
#All Parameters
params

{'acc': [0.95579433,
  0.95361805,
  0.96205115,
  0.95905876,
  0.95729053,
  0.9538901,
  0.9576986,
  0.9541621],
 'batch_size': [32, 32, 32, 32, 32, 32, 32, 32],
 'dropout': [0.5, 0.7, 0.5, 0.7, 0.5, 0.7, 0.5, 0.7],
 'epochs': [30, 30, 30, 30, 30, 30, 30, 30],
 'n_hidden': [48, 48, 64, 64, 48, 48, 64, 64],
 'n_layers': [2, 2, 2, 2, 1, 1, 1, 1],
 'val_acc': [0.9205972,
  0.9277231,
  0.9321344,
  0.91007805,
  0.91415,
  0.9036308,
  0.9178826,
  0.9104174]}

In [0]:
# Confusion Matrix
pd.set_option('display.max_columns', 30)
print(confusion_matrix(Y_test, model_best.predict(X_test)))

Pred                LAYING  SITTING  STANDING  WALKING  WALKING_DOWNSTAIRS  \
True                                                                         
LAYING                 536        0         0        0                   0   
SITTING                  5      399        85        0                   0   
STANDING                 0       67       462        2                   0   
WALKING                  0        0         0      473                  10   
WALKING_DOWNSTAIRS       0        0         0        0                 417   
WALKING_UPSTAIRS         0        0         0        6                   5   

Pred                WALKING_UPSTAIRS  
True                                  
LAYING                             1  
SITTING                            2  
STANDING                           1  
WALKING                           13  
WALKING_DOWNSTAIRS                 3  
WALKING_UPSTAIRS                 460  


In [0]:
# Confusion Matrix
df = confusion_matrix(Y_test, model_best.predict(X_test)) 
DF = PrettyTable()
DF.field_names = df.columns
for row in range(df.count().max()):
  DF.add_row(df.iloc[row])
print(DF)

+--------+---------+----------+---------+--------------------+------------------+
| LAYING | SITTING | STANDING | WALKING | WALKING_DOWNSTAIRS | WALKING_UPSTAIRS |
+--------+---------+----------+---------+--------------------+------------------+
|  536   |    0    |    0     |    0    |         0          |        1         |
|   5    |   399   |    85    |    0    |         0          |        2         |
|   0    |    67   |   462    |    2    |         0          |        1         |
|   0    |    0    |    0     |   473   |         10         |        13        |
|   0    |    0    |    0     |    0    |        417         |        3         |
|   0    |    0    |    0     |    6    |         5          |       460        |
+--------+---------+----------+---------+--------------------+------------------+


#FineTuning
####(To be used scarcely. Does not show much improvement when used in this case.)
#####Tried various means, such as decreasing LR for another 30 epochs, training only the last Dense layer weights, Adding another dense layer and training it only, training everything apart from the first LSTM layer and so on. But very little improvement was observed. Although on one instance an accuracy of 93.3 per cent was obtained as an improvement from 92 per cent. Maybe checkpoints on loss ought to yield better results.

In [0]:
#Upload best model
from google.colab import files

uploaded = files.upload()

Saving HAR_Model_2.hdf5 to HAR_Model_2.hdf5


In [0]:
# Load Best Model. Name has to be changed accordingly.
from tensorflow.keras.models import load_model
model = load_model('/content/HAR_Model_2.hdf5')

In [0]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_4 (LSTM)                (None, 128, 48)           11136     
_________________________________________________________________
dropout_4 (Dropout)          (None, 128, 48)           0         
_________________________________________________________________
lstm_5 (LSTM)                (None, 54)                22248     
_________________________________________________________________
dropout_5 (Dropout)          (None, 54)                0         
_________________________________________________________________
dense_4 (Dense)              (None, 6)                 330       
Total params: 44,850
Trainable params: 33,714
Non-trainable params: 11,136
_________________________________________________________________


In [0]:
model.evaluate(X_test, Y_test)



[0.38214929368716866, 0.9239905]

In [0]:
#Make first LSTM layer untrainable
model.layers[0].trainable = False
new_model = tensorflow.keras.Sequential([model])

In [0]:
new_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_4 (Sequential)    (None, 6)                 33714     
Total params: 33,714
Trainable params: 22,578
Non-trainable params: 11,136
_________________________________________________________________


In [0]:
#Adding a new dense layer if required

#from tensorflow.keras.models import Model
#from tensorflow.keras.layers import Dense
#import tensorflow

#model.trainable = False
#for layer in model.layers[2:]:
#  layer.trainable = True
  


#new_model = tensorflow.keras.Sequential(
#[
#    model,
#    Dense(6, activation = 'softmax')
#])

#new_model.summary()


In [0]:
from tensorflow.keras.optimizers import SGD

new_model.compile(loss='categorical_crossentropy',
              optimizer= SGD(lr = 1e-4),
              metrics=['accuracy'])
 

In [0]:
lr_reducer = tf.keras.callbacks.ReduceLROnPlateau(
                               monitor = 'val_acc',
                               factor = np.sqrt(0.8),
                               cooldown = 0,
                               patience = 2,
                               min_lr = 1e-6)

checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath = '/content/drive/My Drive/Models/HAR_model_fine_tuned.hdf5', monitor = 'val_acc', verbose = 0, save_best_only = True)


fit_model = new_model.fit(X_train,
          Y_train,
          batch_size = 16,
          validation_data = (X_test, Y_test),
          epochs = 60,
          initial_epoch = 30,
          callbacks = [lr_reducer, checkpoint],
          verbose = 1)

Train on 7352 samples, validate on 2947 samples
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


In [0]:
fit_model.model.optimizer.get_config()

{'amsgrad': False,
 'beta_1': 0.9,
 'beta_2': 0.999,
 'decay': 0.0,
 'epsilon': 1e-07,
 'learning_rate': 2.6214399e-05,
 'name': 'Adam'}

In [0]:
from tensorflow.keras.models import load_model
model = load_model('/content/drive/My Drive/Models/HAR_model_fine_tuned.hdf5')

model.evaluate(X_test, Y_test)



[0.35716692178012577, 0.9379029]