## Deep learning model forHAR

In [3]:
# Importing Dependencies
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Flatten, Dropout, Dense, LSTM
from keras.layers.core import Dense, Dropout
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.utils import to_categorical

In [4]:
# Activities are the class labels
# It is a 6 class classification
ACTIVITIES = {
    0: 'WALKING',
    1: 'WALKING_UPSTAIRS',
    2: 'WALKING_DOWNSTAIRS',
    3: 'SITTING',
    4: 'STANDING',
    5: 'LAYING',
}

# Utility function to print the confusion matrix
def confusion_matrix(Y_true, Y_pred):
    Y_true = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_true, axis=1)])
    Y_pred = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_pred, axis=1)])

    return pd.crosstab(Y_true, Y_pred, rownames=['True'], colnames=['Pred'])

In [5]:
# Data directory
DATADIR= os.path.join(os.getcwd(), 'datasets', 'UCI_HAR_Dataset')
# Signals are from Accelerometer and Gyroscope
# The signals are in x,y,z directions
SIGNALS = ["body_acc_x", "body_acc_y", "body_acc_z",
    "body_gyro_x", "body_gyro_y", "body_gyro_z",
    "total_acc_x", "total_acc_y", "total_acc_z"]

In [6]:
# Utility function to load the load
def load_signals(subset):
    signals_data = []

    for signal in SIGNALS:
        filename= DATADIR + f'/UCI_HAR_Dataset/{subset}/Inertial Signals/{signal}_{subset}.txt'
        signals_data.append(pd.read_csv(filename, delim_whitespace=True, header=None).values) 
    # Resultant shape is (7352 train/2947 test samples, 128 timesteps, 9 signals)
    return np.transpose(signals_data, (1, 2, 0))

In [7]:
def load_y(subset):
    filename= DATADIR + f'/UCI_HAR_Dataset/{subset}/y_{subset}.txt'
    y = pd.read_csv(filename, delim_whitespace=True, header=None)[0]
    return pd.get_dummies(y).values

In [8]:
def load_data():
    X_train, X_test = load_signals('train'), load_signals('test')
    y_train, y_test = load_y('train'), load_y('test')
    return X_train, y_train, X_test,  y_test

In [9]:
# setting up seed for random values
import tensorflow as tf
np.random.seed(42)
tf.random.set_seed(42)

# Initializing parameters
epochs = 30
batch_size = 16
n_hidden = 32

In [10]:
# Utility function to count the number of classes
def _count_classes(y):
    return len(set([tuple(category) for category in y]))

In [21]:
# Loading the train and test data
X_train, Y_train, X_test,  Y_test = load_data()

In [22]:
timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = _count_classes(Y_train)
#n_classes  = 6
print(timesteps)
print(input_dim)
print(len(X_train))

128
9
7352


### Base model

In [23]:
# Initiliazing the sequential model
base_model = Sequential()
# Configuring the parameters
base_model.add(LSTM(n_hidden, input_shape=(timesteps, input_dim)))
# Adding a dropout layer
base_model.add(Dropout(0.5))
# Adding a dense output layer with sigmoid activation
base_model.add(Dense(n_classes, activation='sigmoid'))
base_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 32)                5376      
                                                                 
 dropout (Dropout)           (None, 32)                0         
                                                                 
 dense (Dense)               (None, 6)                 198       
                                                                 
Total params: 5,574
Trainable params: 5,574
Non-trainable params: 0
_________________________________________________________________


In [24]:
# Compiling the model
base_model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

# Training the model
History= base_model.fit(X_train,
          Y_train,
          batch_size=batch_size,
          validation_data=(X_test, Y_test),
          epochs=epochs)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


### Multi-layer LSTM

In [25]:
# Initiliazing the sequential model
lstm_model = Sequential()
# Configuring the parameters
lstm_model.add(LSTM(32,return_sequences=True,input_shape=(timesteps, input_dim)))
# Adding a dropout layer
lstm_model.add(Dropout(0.5))

lstm_model.add(LSTM(28,input_shape=(timesteps, input_dim)))
# Adding a dropout layer
lstm_model.add(Dropout(0.6))
# Adding a dense output layer with sigmoid activation
lstm_model.add(Dense(n_classes, activation='sigmoid'))
lstm_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_1 (LSTM)               (None, 128, 32)           5376      
                                                                 
 dropout_1 (Dropout)         (None, 128, 32)           0         
                                                                 
 lstm_2 (LSTM)               (None, 28)                6832      
                                                                 
 dropout_2 (Dropout)         (None, 28)                0         
                                                                 
 dense_1 (Dense)             (None, 6)                 174       
                                                                 
Total params: 12,382
Trainable params: 12,382
Non-trainable params: 0
_________________________________________________________________


In [26]:
# Compiling the model
lstm_model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

# Training the model
lstm_History= lstm_model.fit(X_train,
          Y_train,
          batch_size=batch_size,
          validation_data=(X_test, Y_test),
          epochs=epochs)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


### Regularized multi-layer LSTM

In [27]:
from keras.regularizers import l2

# Initiliazing the sequential model
reg_lstm_model = Sequential()
# Configuring the parameters
reg_lstm_model.add(LSTM(32,recurrent_regularizer=l2(0.003),return_sequences=True,input_shape=(timesteps, input_dim)))
# Adding a dropout layer
reg_lstm_model.add(Dropout(0.5))

reg_lstm_model.add(LSTM(28,input_shape=(timesteps, input_dim)))
# Adding a dropout layer
reg_lstm_model.add(Dropout(0.6))
# Adding a dense output layer with sigmoid activation
reg_lstm_model.add(Dense(n_classes, activation='sigmoid'))
reg_lstm_model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_3 (LSTM)               (None, 128, 32)           5376      
                                                                 
 dropout_3 (Dropout)         (None, 128, 32)           0         
                                                                 
 lstm_4 (LSTM)               (None, 28)                6832      
                                                                 
 dropout_4 (Dropout)         (None, 28)                0         
                                                                 
 dense_2 (Dense)             (None, 6)                 174       
                                                                 
Total params: 12,382
Trainable params: 12,382
Non-trainable params: 0
_________________________________________________________________


In [28]:
# Compiling the model
reg_lstm_model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# Training the model
reg_lstm_History= reg_lstm_model.fit(X_train,
          Y_train,
          batch_size=batch_size,
          validation_data=(X_test, Y_test),
          epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Hyperparameter Tuning Using Hyperas

In [11]:
np.random.seed(36)
tf.random.set_seed(36)

In [12]:
import random
import keras
from keras.regularizers import l2
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers.core import Dense, Dropout

from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.utils import eval_hyperopt_space

In [13]:
def data():
    """
    Obtain the dataset from multiple files.
    Returns: X_train, X_test, y_train, y_test
    """
    # Data directory
    DATADIR= os.path.join(os.getcwd(), 'datasets', 'UCI_HAR_Dataset')
    SIGNALS= ["body_acc_x", "body_acc_y", "body_acc_z",
        "body_gyro_x", "body_gyro_y", "body_gyro_z",
        "total_acc_x", "total_acc_y", "total_acc_z"]    # Raw data signals

    def _read_csv(filename):
        return pd.read_csv(filename, delim_whitespace=True, header=None)

    def load_signals(subset):
        signals_data = []
        for signal in SIGNALS:
            filename= DATADIR + f'/UCI_HAR_Dataset/{subset}/Inertial Signals/{signal}_{subset}.txt'
            signals_data.append(pd.read_csv(filename, delim_whitespace=True, header=None).values) 
        return np.transpose(signals_data, (1, 2, 0))
    
    def load_y(subset):
        """
        The objective that we are trying to predict is a integer, from 1 to 6,
        that represents a human activity. We return a binary representation of 
        every sample objective as a 6 bits vector using One Hot Encoding
        (https://pandas.pydata.org/pandas-docs/stable/generated/pandas.get_dummies.html)
        """
        filename= DATADIR + f'/UCI_HAR_Dataset/{subset}/y_{subset}.txt'
        y = _read_csv(filename)[0]
        return pd.get_dummies(y).values
    
    X_train, X_val = load_signals('train'), load_signals('test')
    Y_train, Y_val = load_y('train'), load_y('test')

    return X_train, Y_train, X_val, Y_val

In [16]:
def model(X_train, Y_train, X_val, Y_val):
    # Importing tensorflow
    np.random.seed(36)
    import tensorflow as tf
    tf.random.set_seed(36)

    # Initiliazing the sequential model
    model= Sequential() 
    # if {{choice(['one', 'two'])}} == 'two':
    if (random.choice(['one', 'two']) == 'two'):
        # Configuring the parameters
        model.add(LSTM(random.choice([28,32,38]), recurrent_regularizer= l2(random.uniform(0,0.0002)), \
            return_sequences=True, input_shape=(128, 9),name='LSTM2_1'))
        # Adding a dropout layer
        model.add(Dropout(random.uniform(0.35,0.65), name='Dropout2_1'))
        model.add(LSTM(random.choice([26,32,36]), recurrent_regularizer= l2(random.uniform(0,0.001)),\
            input_shape=(128, 9),name='LSTM2_2'))
        model.add(Dropout(random.uniform(0.5,0.7), name='Dropout2_2'))
        # Adding a dense output layer with sigmoid activation
        model.add(Dense(6, activation='sigmoid'))
    else:
        # Configuring the parameters
        model.add(LSTM(random.choice([28,32,36]), recurrent_regularizer= l2(random.uniform(0,0.001)),input_shape=(128, 9),name='LSTM1_1'))
        # Adding a dropout layer
        model.add(Dropout(random.uniform(0.35,0.55),name='Dropout1_1'))
        # Adding a dense output layer with sigmoid activation
        model.add(Dense(6, activation='sigmoid'))
        
    adam= keras.optimizers.Adam(learning_rate= random.uniform(0.009,0.025))
    rmsprop= keras.optimizers.RMSprop(learning_rate= random.uniform(0.009,0.025))
    choiceval = random.choice(['adam', 'rmsprop'])
    
    if choiceval == 'adam':
        optim = adam
    else:
        optim = rmsprop
    print(model.summary())
    model.compile(loss='categorical_crossentropy', metrics=['accuracy'],optimizer=optim)

    result= model.fit(X_train, Y_train,
              batch_size=16,
              epochs=30,
              verbose=1,
              validation_data=(X_val, Y_val))
                       
    score, acc= model.evaluate(X_val, Y_val, verbose=0)
    print('Test accuracy:', acc)
    print('-------------------------------------------------------------------------------------')
    return {'loss': -acc, 'status': STATUS_OK, 'model': model}

In [17]:
X_train, Y_train, X_val, Y_val = data()
trials = Trials()
best_run, best_model, space = optim.minimize(model(X_train, Y_train, X_val, Y_val),
                                      data,
                                      tpe.suggest,
                                      15,
                                      trials)

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 LSTM2_1 (LSTM)              (None, 128, 32)           5376      
                                                                 
 Dropout2_1 (Dropout)        (None, 128, 32)           0         
                                                                 
 LSTM2_2 (LSTM)              (None, 36)                9936      
                                                                 
 Dropout2_2 (Dropout)        (None, 36)                0         
                                                                 
 dense_1 (Dense)             (None, 6)                 222       
                                                                 
Total params: 15,534
Trainable params: 15,534
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/30
Epoch 2/30

KeyboardInterrupt: 

In [None]:
total_trials = dict()
for t, trial in enumerate(trials):
        vals = trial.get('misc').get('vals')
        print('Model',t+1,'parameters')
        print(vals)
        print()
        z = eval_hyperopt_space(space, vals)
        total_trials['M'+str(t+1)] = z
        print(z)
        print('------------------------------------------------')

In [None]:
best_run

In [None]:
#BEST MODEL PARAMS
total_trials['M14']

In [None]:
_,val_acc = best_model.evaluate(X_val, Y_val, verbose=0)
_,train_acc = best_model.evaluate(X_train, Y_train, verbose=0)
print('Train_accuracy',val_acc)
print('validation accuracy',val_acc)

In [None]:
# Activities are the class labels
# It is a 6 class classification
ACTIVITIES = {
    0: 'WALKING',
    1: 'WALKING_UPSTAIRS',
    2: 'WALKING_DOWNSTAIRS',
    3: 'SITTING',
    4: 'STANDING',
    5: 'LAYING',
}

# Utility function to print the confusion matrix
def confusion_matrix_rnn(Y_true, Y_pred):
    Y_true = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_true, axis=1)])
    Y_pred = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_pred, axis=1)])

    #return pd.crosstab(Y_true, Y_pred, rownames=['True'], colnames=['Pred'])
    return metrics.confusion_matrix(Y_true, Y_pred)

In [None]:
# Confusion Matrix
print(confusion_matrix_rnn(Y_val, best_model.predict(X_val)))

In [None]:
plt.figure(figsize=(8,8))
cm = confusion_matrix_rnn(Y_val, best_model.predict(X_val))
plot_confusion_matrix(cm, classes=labels, normalize=True, title='Normalized confusion matrix', cmap = plt.cm.Greens)
plt.show()