In [1]:
# Importing all the necessary packages and libraries

import pandas as pd
import numpy as np
import tensorflow as tf
np.random.seed(42)
tf.set_random_seed(42)

from keras import backend as K
from keras.models import Sequential
from keras.layers import LSTM, Conv1D, MaxPooling1D, Flatten, BatchNormalization
from keras.layers.core import Dense, Dropout
from keras.regularizers import l1, l2, l1_l2
from sklearn.metrics import accuracy_score

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [30]:
# Labelling the 6 classes
ACTIVITIES = {
    0: 'WALKING',
    1: 'WALKING_UPSTAIRS',
    2: 'WALKING_DOWNSTAIRS',
    3: 'SITTING',
    4: 'STANDING',
    5: 'LAYING',
}

# Function for Confusion Matrix
def confusion_matrix2(Y_true, Y_pred, ACTIVITIES):
    Y_true = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_true, axis=1)])
    Y_pred = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_pred, axis=1)])

    return pd.crosstab(Y_true, Y_pred, rownames=['True'], colnames=['Pred'])

In [3]:
DATADIR = 'UCI_HAR_Dataset'

SIGNALS = [
    "body_acc_x",
    "body_acc_y",
    "body_acc_z",
    "body_gyro_x",
    "body_gyro_y",
    "body_gyro_z",
    "total_acc_x",
    "total_acc_y",
    "total_acc_z"
]

In [4]:
# Function to read the data from csv file
def _read_csv(filename):
    return pd.read_csv(filename, delim_whitespace=True, header=None)

# Function to load the signals data
def load_signals(subset):
    signals_data = []

    for signal in SIGNALS:
        filename = f'UCI_HAR_Dataset/{subset}/Inertial Signals/{signal}_{subset}.txt'
        signals_data.append(
            _read_csv(filename).to_numpy()
        ) 
    return np.transpose(signals_data, (1, 2, 0))


def load_y(subset):
    """
    The objective that we are trying to predict is a integer, from 1 to 6,
    that represents a human activity. We return a binary representation of 
    every sample objective as a 6 bits vector using One Hot Encoding
    (https://pandas.pydata.org/pandas-docs/stable/generated/pandas.get_dummies.html)
    """
    filename = f'UCI_HAR_Dataset/{subset}/y_{subset}.txt'
    y = _read_csv(filename)[0]

    return y.values

def load_data():
    """
    Obtain the dataset from multiple files.
    Returns: X_train, X_test, y_train, y_test
    """
    X_train, X_test = load_signals('train'), load_signals('test')
    y_train, y_test = load_y('train'), load_y('test')

    return X_train, X_test, y_train, y_test

In [5]:
# Configuring a session
session_conf = tf.ConfigProto(
    intra_op_parallelism_threads=1,
    inter_op_parallelism_threads=1
)

sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

In [6]:
# Utility function to count the number of classes
def _count_classes(y):
    return len(set([tuple(category) for category in y]))

# Loading the Train and Test Data
X_train, X_test, Y_train, Y_test = load_data()

y_train_dif, y_test_dif = pd.Series(Y_train).map(dict(zip(range(1,7), [1]*3+[0]*3))).values, pd.Series(Y_test).map(dict(zip(range(1,7), [1]*3+[0]*3))).values

# Dynamic class data
X_train_Dynamic, X_test_Dynamic = X_train[y_train_dif==1], X_test[y_test_dif==1]
Y_train_Dynamic, Y_test_Dynamic = Y_train[y_train_dif==1], Y_test[y_test_dif==1] 

# Static class data
X_train_Static, X_test_Static = X_train[y_train_dif==0], X_test[y_test_dif==0]
Y_train_Static, Y_test_Static = Y_train[y_train_dif==0], Y_test[y_test_dif==0]

y_train_dif, y_test_dif = pd.get_dummies(y_train_dif).values,pd.get_dummies(y_test_dif).values
Y_train_Dynamic, Y_test_Dynamic = pd.get_dummies(Y_train_Dynamic).values, pd.get_dummies(Y_test_Dynamic).values
Y_train_Static, Y_test_Static = pd.get_dummies(Y_train_Static).values, pd.get_dummies(Y_test_Static).values

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])

print("Time steps : ", timesteps)
print("Input dimensions : ", input_dim)
print("Len of X_train : ", len(X_train))

Time steps :  128
Input dimensions :  9
Len of X_train :  7352


# Divide and Conquer CNN Model

## 2- class Classifier

In [7]:
model = Sequential()

model.add(Conv1D(16, 3, activation='relu', kernel_initializer='he_normal', kernel_regularizer=l2(0.0001), input_shape=(timesteps, input_dim)))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(16, activation='relu', kernel_initializer='he_normal', kernel_regularizer=l2(0.001)))
model.add(BatchNormalization())
model.add(Dropout(0.65))
model.add(Dense(2, activation='softmax'))

model.summary()


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 126, 16)           448       
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 63, 16)            0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 1008)              0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 1008)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 16)                16144     
_________________________________________________________________
batch_normalization_1 (Batch (None, 16)                64        
_________________________________________________________________
dropout_2 (Dropout)          (None, 16)              

In [8]:
# Compiling the model
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# Training the model
model.fit(X_train,
          y_train_dif,
          batch_size=8,
          validation_data=(X_test, y_test_dif),
          epochs=20)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Train on 7352 samples, validate on 2947 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.callbacks.History at 0x18f9bfd84c8>

In [9]:
# Confusion Matrix
print(confusion_matrix2(y_test_dif, model.predict(X_test), {0: 'Static', 1: 'Dynamic',}))

Pred     Dynamic  Static
True                    
Dynamic     1387       0
Static         0    1560


In [10]:
score = model.evaluate(X_test, y_test_dif)
print(score)

[0.01575663369774697, 1.0]


In [11]:
model.save('class_model.h5')

<h1><font color="red"> Observations </font></h1> 

- 2- class classifer has 100 % validation accuracy.
- Which means that our model can perfectly distinguish static and dynamic activities.

# Model for Dynamic Class

In [12]:
model = Sequential()

model.add(Conv1D(64, 3, activation='relu', kernel_initializer='he_normal', kernel_regularizer=l2(0.0001), input_shape=(timesteps, input_dim)))
model.add(Conv1D(32, 3, activation='relu', kernel_initializer='he_normal', kernel_regularizer=l2(0.001), input_shape=(timesteps, input_dim)))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dropout(0.6))
model.add(Dense(32, activation='relu', kernel_initializer='he_normal', kernel_regularizer=l2(0.001)))
model.add(BatchNormalization())
model.add(Dropout(0.6))
model.add(Dense(3, activation='softmax'))

model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_2 (Conv1D)            (None, 126, 64)           1792      
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 124, 32)           6176      
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 62, 32)            0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 1984)              0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 1984)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 32)                63520     
_________________________________________________________________
batch_normalization_2 (Batch (None, 32)               

In [13]:
# Compiling the model
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# Training the model
model.fit(X_train_Dynamic,
          Y_train_Dynamic,
          batch_size=8,
          validation_data=(X_test_Dynamic, Y_test_Dynamic),
          epochs=49)

Train on 3285 samples, validate on 1387 samples
Epoch 1/49
Epoch 2/49
Epoch 3/49
Epoch 4/49
Epoch 5/49
Epoch 6/49
Epoch 7/49
Epoch 8/49
Epoch 9/49
Epoch 10/49
Epoch 11/49
Epoch 12/49
Epoch 13/49
Epoch 14/49
Epoch 15/49
Epoch 16/49
Epoch 17/49
Epoch 18/49
Epoch 19/49
Epoch 20/49
Epoch 21/49
Epoch 22/49
Epoch 23/49
Epoch 24/49
Epoch 25/49
Epoch 26/49
Epoch 27/49
Epoch 28/49
Epoch 29/49
Epoch 30/49
Epoch 31/49
Epoch 32/49
Epoch 33/49
Epoch 34/49
Epoch 35/49
Epoch 36/49
Epoch 37/49
Epoch 38/49
Epoch 39/49
Epoch 40/49
Epoch 41/49
Epoch 42/49
Epoch 43/49
Epoch 44/49
Epoch 45/49
Epoch 46/49
Epoch 47/49
Epoch 48/49
Epoch 49/49


<keras.callbacks.callbacks.History at 0x18ff9e906c8>

In [14]:
# Confusion Matrix
print(confusion_matrix2(Y_test_Dynamic, model.predict(X_test_Dynamic), {0: 'Walking', 1: 'Walking Upstairs', 2: 'Walking Downstairs',}))

Pred                Walking  Walking Downstairs  Walking Upstairs
True                                                             
Walking                 492                   3                 1
Walking Downstairs        2                 418                 0
Walking Upstairs          0                  24               447


In [15]:
score = model.evaluate(X_test_Dynamic, Y_test_Dynamic)
print(score)

[0.23537334086938821, 0.9783706068992615]


In [16]:
model.save('Dynamic_class_model.h5')

<h1><font color="red"> Observations </font></h1> 

- Dynamic class model has 97.83% validation accuracy.
- Our Dynamic class model also performs very good but it is having some issues while identifying walking upstairs and walking downstairs.

# Model for Static class

In [17]:
model = Sequential()

model.add(Conv1D(32, 5, activation='relu', kernel_initializer='he_normal', kernel_regularizer=l2(0.001), input_shape=(timesteps, input_dim)))
model.add(Conv1D(16, 3, activation='relu', kernel_initializer='he_normal', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.45))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(64, activation='relu', kernel_initializer='he_normal'))
model.add(Dense(3, activation='softmax'))

model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_4 (Conv1D)            (None, 124, 32)           1472      
_________________________________________________________________
conv1d_5 (Conv1D)            (None, 122, 16)           1552      
_________________________________________________________________
dropout_5 (Dropout)          (None, 122, 16)           0         
_________________________________________________________________
max_pooling1d_3 (MaxPooling1 (None, 61, 16)            0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 976)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 64)                62528     
_________________________________________________________________
dense_6 (Dense)              (None, 3)                

In [18]:
# Compiling the model
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

# Training the model
model.fit(X_train_Static,
          Y_train_Static,
          batch_size=64,
          validation_data=(X_test_Static, Y_test_Static),
          epochs=30)

Train on 4067 samples, validate on 1560 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.callbacks.History at 0x18ffb985948>

In [19]:
# Confusion Matrix
print(confusion_matrix2(Y_test_Static, model.predict(X_test_Static), {0: 'Laying', 1: 'Sitting', 2: 'Standing',}))

Pred      Laying  Sitting  Standing
True                               
Laying       400       91         0
Sitting       55      477         0
Standing       0       27       510


In [20]:
score = model.evaluate(X_test_Static, Y_test_Static)
print(score)

[0.7667308768209739, 0.889102578163147]


In [21]:
model.save('Static_class_model.h5')

<h1><font color="red"> Observations </font></h1> 

- Static class model has 88.91% validation accuracy.
- Our Static class model performs good but it is having issues while identifying Laying and Sitting.

# Final Model

In [22]:
from keras.models import load_model
from scipy.ndimage import gaussian_filter

class PredictActivity:
    def __init__(self):
        self.binary_model = None
        self.dynamic_model = None
        self.static_model = None

    def loadModels(self, binModelPath, dynamicModelpath, staticModelPath):
        self.binary_model = load_model(binModelPath)
        self.dynamic_model = load_model(dynamicModelpath)
        self.static_model = load_model(staticModelPath)
  
    def predict(self, X):
        y_bin = np.argmax(self.binary_model.predict(X), axis=1)

        X_dynamic = X[y_bin==1]
        X_static = X[y_bin==0]

        y_dynamic = np.argmax(self.dynamic_model.predict(X_dynamic), axis=1)
        y_static = np.argmax(self.static_model.predict(X_static), axis=1)

        y_dynamic = y_dynamic + 1
        y_static = y_static + 4

        output = np.zeros((X.shape[0]), dtype='int')
        output[np.where(y_bin==1)[0]] = y_dynamic
        output[np.where(y_bin==0)[0]] = y_static

        return output

In [23]:
# Loading saved models
predictactivity = PredictActivity()
predictactivity.loadModels('class_model.h5', 'Dynamic_class_model.h5', 'Static_class_model.h5')



In [24]:
# Checking and printing the accuracy score on validation Data
accuracy_score(Y_test, predictactivity.predict(X_test))

0.9311163895486936

In [34]:
from sklearn.metrics import confusion_matrix

print(confusion_matrix(Y_test, predictactivity.predict(X_test), labels=range(1,7)))

[[492   1   3   0   0   0]
 [  0 447  24   0   0   0]
 [  2   0 418   0   0   0]
 [  0   0   0 400  91   0]
 [  0   0   0  55 477   0]
 [  0   0   0   0  27 510]]


<h1><font color="red"> Observations </font></h1> 

- Final model has 93.11% validation accuracy.
- Our Final model performs very good but it is having some issues while identifying some classes.
- But the overall performance is preety good as compare to all the models I have previously tried.

In [35]:
from prettytable import PrettyTable

t = PrettyTable()
t.field_names= ("Model Name", "Validation accuracy")
t.add_row(["2 class classifier", "100%"])
t.add_row(["Dynamic class model", "97.83%"])
t.add_row(["Static class model", "88.91%"])
t.add_row(["Divide & Conquer CNN - Final Model", "93.11%"])

print(t)

+------------------------------------+---------------------+
|             Model Name             | Validation accuracy |
+------------------------------------+---------------------+
|         2 class classifier         |         100%        |
|        Dynamic class model         |        97.83%       |
|         Static class model         |        88.91%       |
| Divide & Conquer CNN - Final Model |        93.11%       |
+------------------------------------+---------------------+


<h1><font color="red"> Procedure </font></h1> 

<b> Step - 1 : </b> I have tried several architectures with LSTM but it was giving validation accuracy around 91-92 %.

<b> Step - 2 : </b> So as suggested I have tried Divide and Conquer CNN and I have achieved preety good results as compare to previous models. The steps are given below:

- So divide and Conqure is a stratergy in which we divide our program into smaller parts and after performing operations on smaller parts we combine them.
- Here for Human activity recognition too, we are first breaking our whole task into smaller tasks such as - Identifying Static class and Dyamic class. After identifying we are applying different models for both the classes.
- For the 2 class classifier I have achieved the validation accuracy as 100%.
- For the 2 Dynamic class model I have achieved the validation accuracy as 97.83%.
- For the 2 Static class model I have achieved the validation accuracy as 88.91%.
- After combing the final model gave the accuracy of 93.11%, which is very good because we have not taken any help from the experts and then also we are able to achieve this much accuracy.