In [1]:
!pip install scikeras

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting scikeras
  Downloading scikeras-0.9.0-py3-none-any.whl (27 kB)
Installing collected packages: scikeras
Successfully installed scikeras-0.9.0


In [2]:
import pandas as pd
import numpy as np
import os


In [14]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
os.chdir(r'/content/drive/My Drive/AI/Datasets/Human activity recognition/HAR/')

In [6]:
# Activities are the class labels
# It is a 6 class classification
ACTIVITIES = {
    0: 'WALKING',
    1: 'WALKING_UPSTAIRS',
    2: 'WALKING_DOWNSTAIRS',
    3: 'SITTING',
    4: 'STANDING',
    5: 'LAYING',
}

# Utility function to print the confusion matrix
def confusion_matrix(Y_true, Y_pred):
    Y_true = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_true, axis=1)])
    Y_pred = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_pred, axis=1)])

    return pd.crosstab(Y_true, Y_pred, rownames=['True'], colnames=['Pred'])

### Data

In [7]:
# Data directory
DATADIR = 'UCI_HAR_Dataset'

In [8]:
# Raw data signals
# Signals are from Accelerometer and Gyroscope
# The signals are in x,y,z directions
# Sensor signals are filtered to have only body acceleration
# excluding the acceleration due to gravity
# Triaxial acceleration from the accelerometer is total acceleration
SIGNALS = [
    "body_acc_x",
    "body_acc_y",
    "body_acc_z",
    "body_gyro_x",
    "body_gyro_y",
    "body_gyro_z",
    "total_acc_x",
    "total_acc_y",
    "total_acc_z"
]

In [9]:
# Utility function to read the data from csv file
def _read_csv(filename):
    return pd.read_csv(filename, delim_whitespace=True, header=None)

# Utility function to load the load
def load_signals(subset):
    signals_data = []

    for signal in SIGNALS:
        filename = f'UCI_HAR_Dataset/{subset}/Inertial Signals/{signal}_{subset}.txt'
        signals_data.append(
            _read_csv(filename).to_numpy()
        ) 

    # Transpose is used to change the dimensionality of the output,
    # aggregating the signals by combination of sample/timestep.
    # Resultant shape is (7352 train/2947 test samples, 128 timesteps, 9 signals)
    return np.transpose(signals_data, (1, 2, 0))

In [10]:

def load_y(subset):
    """
    The objective that we are trying to predict is a integer, from 1 to 6,
    that represents a human activity. We return a binary representation of 
    every sample objective as a 6 bits vector using One Hot Encoding
    (https://pandas.pydata.org/pandas-docs/stable/generated/pandas.get_dummies.html)
    """
    filename = f'UCI_HAR_Dataset/{subset}/y_{subset}.txt'
    y = _read_csv(filename)[0]

    return pd.get_dummies(y).to_numpy()

In [11]:
def load_data():
    """
    Obtain the dataset from multiple files.
    Returns: X_train, X_test, y_train, y_test
    """
    X_train, X_test = load_signals('train'), load_signals('test')
    y_train, y_test = load_y('train'), load_y('test')

    return X_train, X_test, y_train, y_test

In [12]:
# Importing tensorflow
np.random.seed(42)
import tensorflow as tf
tf.random.set_seed(42)

In [15]:
# Configuring a session
session_conf = tf.ConfigProto(
    intra_op_parallelism_threads=1,
    inter_op_parallelism_threads=1
)

In [16]:
# Import Keras
from keras import backend as K
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

In [33]:
# Importing libraries
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers.core import Dense, Dropout
from keras.layers import BatchNormalization
#from keras.optimizers import SGD
from tensorflow.keras.optimizers import SGD

In [48]:
# Initializing parameters
epochs = 30
batch_size = 24
n_hidden = 32

In [35]:
# Utility function to count the number of classes
def _count_classes(y):
    return len(set([tuple(category) for category in y]))

In [36]:
# Loading the train and test data
X_train, X_test, Y_train, Y_test = load_data()

In [37]:
timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = _count_classes(Y_train)
activation='sigmoid'

print(timesteps)
print(input_dim)
print(len(X_train))

128
9
7352


In [None]:
'''def lstm_model(dropout_rate, n_hidden, n_classes):
    # create model
    model = Sequential()
    # Configuring the parameters
    model.add(LSTM(n_hidden,return_sequences=True, input_shape=(timesteps, input_dim)))
    # Adding a dropout layer
    model.add(Dropout(dropout_rate))
    model.add(LSTM(n_hidden))
    # Adding a dense output layer with sigmoid activation
    model.add(Dense(n_classes, activation='sigmoid'))

    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model'''

In [None]:
'''#https://machinelearningmastery.com/grid-search-hyperparameters-deep-learning-models-python-keras/
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from scikeras.wrappers import KerasClassifier

seed = 7
tf.random.set_seed(seed)

X_train, X_test, Y_train, Y_test = load_data()

model = KerasClassifier(model=lstm_model, verbose=0)

batch_size = [32]
epochs = [200]
neurons = [48, 64]
dropout_rate = [0.4, 0.5,]
n_classes = [_count_classes(Y_train)]

param_grid = dict(model__dropout_rate=dropout_rate, model__n_hidden=neurons, model__n_classes = n_classes,
                   batch_size=batch_size, epochs=epochs)

grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)

grid_result = grid.fit(X_train, Y_train)
'''

- Defining the Architecture of LSTM

In [42]:
# Initiliazing the sequential model
model = Sequential()
# Configuring the parameters
model.add(LSTM(n_hidden,return_sequences=True, input_shape=(timesteps, input_dim)))
model.add(BatchNormalization())
model.add(LSTM(n_hidden))

#model.add(Dropout(0.7))
#model.add(LSTM(n_hidden))
#opt = SGD(lr=0.01, momentum=0.9)
# Adding a dense output layer with sigmoid activation
model.add(Dense(n_classes, activation=activation))
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_9 (LSTM)               (None, 128, 32)           5376      
                                                                 
 batch_normalization_4 (Batc  (None, 128, 32)          128       
 hNormalization)                                                 
                                                                 
 lstm_10 (LSTM)              (None, 32)                8320      
                                                                 
 dense_3 (Dense)             (None, 6)                 198       
                                                                 
Total params: 14,022
Trainable params: 13,958
Non-trainable params: 64
_________________________________________________________________


In [53]:
# Compiling the model
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [54]:
# Training the model
model.fit(X_train,
          Y_train,
          batch_size=batch_size,
          validation_data=(X_test, Y_test),
          epochs=epochs)

Train on 7352 samples, validate on 2947 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f377c4c7050>

In [56]:
# Confusion Matrix
print(confusion_matrix(Y_test, model.predict(X_test)))

Pred                LAYING  SITTING  STANDING  WALKING  WALKING_DOWNSTAIRS  \
True                                                                         
LAYING                 537        0         0        0                   0   
SITTING                  2      383       104        0                   0   
STANDING                 0       43       488        1                   0   
WALKING                  0        0         0      464                  29   
WALKING_DOWNSTAIRS       0        0         0        0                 418   
WALKING_UPSTAIRS         0        0         0        0                   7   

Pred                WALKING_UPSTAIRS  
True                                  
LAYING                             0  
SITTING                            2  
STANDING                           0  
WALKING                            3  
WALKING_DOWNSTAIRS                 2  
WALKING_UPSTAIRS                 464  


In [57]:
score = model.evaluate(X_test, Y_test)

In [58]:
score

[0.310381090344931, 0.9345097]

### Obsertvations
- Experimented with various hyperparameters like batch size, number of hidden layers, dropouts values and Batch normalization
- The best accuracy achieved is 0.9345
- Model is still ambiguous in predicting the sitting and stading values