# Human Activity Recognition

In [2]:
from IPython.display import HTML
HTML('<iframe width="560" height="315" src="https://www.youtube.com/embed/XOEN9W05_4A"' 
     'frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>')

Database built from the recordings of 30 subjects performing activities of daily living while carrying a waist-mounted smartphone with embedded inertial sensors.

Each person performed six activities (WALKING, WALKING_UPSTAIRS, WALKING_DOWNSTAIRS, SITTING, STANDING, LAYING)

Sources:

Project: https://github.com/guillaume-chevalier/LSTM-Human-Activity-Recognition

Data: https://archive.ics.uci.edu/ml/datasets/human+activity+recognition+using+smartphones

# Load Libraries

In [2]:
import numpy as np
from numpy import mean
from numpy import std
from numpy import dstack
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import ConvLSTM2D
from keras.utils import to_categorical
from hdfs import InsecureClient
import pandas as pd
from datetime import datetime
import mlflow

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


# Connect to Hadoop

In [7]:
client_hdfs = InsecureClient('http://awscdh6-ma.sap.local:9870', user='dr.who')

In [8]:
client_hdfs.list('/tmp/tbr/BARMER/DSP')

['data_labeled_performance',
 'data_labeled_training',
 'data_unlabeled_predictions',
 'model']

# Load Data (labeled) from Hadoop

In [9]:
# Check Trainings Data
client_hdfs.list('/tmp/tbr/BARMER/DSP/data_labeled_training/Inertial Signals')

['body_acc_x.txt',
 'body_acc_y.txt',
 'body_acc_z.txt',
 'body_gyro_x.txt',
 'body_gyro_y.txt',
 'body_gyro_z.txt',
 'total_acc_x.txt',
 'total_acc_y.txt',
 'total_acc_z.txt',
 'y_labels.txt']

Each axis of each signal is stored in a separate file, meaning that each of the train and test datasets have nine input files to load and one output file to load.

Function for loading the entire dataset:

In [10]:
# load the dataset, returns train and test X and y elements
def load_dataset(prefix):
    # load data and labels
    X, y = load_dataset_group(prefix)
    
    # zero-offset class values
    y = y - 1
    
    # one hot encode y
    y = to_categorical(y)
    
    # return dataset
    return X, y

A function for loading a dataset group of files

In [11]:
# load a dataset group, such as train or test
def load_dataset_group(group):
    
    # load all 9 files as a single array
    filenames = list()
    
    # total acceleration
    filenames += ['/Inertial Signals/total_acc_x.txt',
                  '/Inertial Signals/total_acc_y.txt',
                  '/Inertial Signals/total_acc_z.txt']
    
    # body acceleration
    filenames += ['/Inertial Signals/body_acc_x.txt',
                  '/Inertial Signals/body_acc_y.txt',
                  '/Inertial Signals/body_acc_z.txt']
    
    # body gyroscope
    filenames += ['/Inertial Signals/body_gyro_x.txt',
                  '/Inertial Signals/body_gyro_y.txt',
                  '/Inertial Signals/body_gyro_z.txt']
    
    # load input data
    X = load_group(filenames, group)
    
    # load class output
    y = load_file(group+'/Inertial Signals/y_labels.txt')
    
    # return X and y
    return X, y

A function for loading a group of files

In [12]:
# load a list of files and return as a 3d numpy array
def load_group(filenames, group):
    loaded = list()
    
    for name in filenames:

        data = load_file(group+name)
        loaded.append(data)
    
    # stack group so that features are the 3rd dimension
    loaded = dstack(loaded)
    return loaded

A function for loading a single file

In [13]:
# load a single file as a numpy array
def load_file(filepath):
    #dataframe = read_csv(filepath, header=None, delim_whitespace=True)
    
    path = '/tmp/tbr/BARMER/DSP/' + filepath
     
    with client_hdfs.read(path, encoding = 'utf-8') as reader:
        dataframe = read_csv(reader, header=None, delim_whitespace=True)
        
    return dataframe.values

Execute Function-Chain

In [14]:
# load training data
trainX, trainy = load_dataset('data_labeled_training')

In [15]:
# load test data
testX, testy = load_dataset('data_labeled_performance')

In [16]:
trainX[0]

array([[ 1.012817e+00, -1.232167e-01,  1.029341e-01, ...,  3.019122e-02,
         6.601362e-02,  2.285864e-02],
       [ 1.022833e+00, -1.268756e-01,  1.056872e-01, ...,  4.371071e-02,
         4.269897e-02,  1.031572e-02],
       [ 1.022028e+00, -1.240037e-01,  1.021025e-01, ...,  3.568780e-02,
         7.485018e-02,  1.324969e-02],
       ...,
       [ 1.018445e+00, -1.240696e-01,  1.003852e-01, ...,  3.985177e-02,
         1.909445e-03, -2.170124e-03],
       [ 1.019372e+00, -1.227451e-01,  9.987355e-02, ...,  3.744932e-02,
        -7.982483e-05, -5.642633e-03],
       [ 1.021171e+00, -1.213260e-01,  9.498741e-02, ...,  2.881781e-02,
        -3.771800e-05, -1.446006e-03]])

In [17]:
trainy[0]

array([0., 0., 0., 0., 1., 0.], dtype=float32)

- 0 WALKING
- 1 WALKING_UPSTAIRS
- 2 WALKING_DOWNSTAIRS
- 3 SITTING
- 4 STANDING
- 5 LAYING

# Analyse Data

There are three main signal types in the raw data:
- total acceleration
- body acceleration
- body gyroscope

Each has three axes of data. This means that there are a total of __nine variables for each time step__. 

Further, each serie sof data has been partitioned into overlapping windows of 2.65 seconds of data, or 128 time steps.

These windows of data correspond to the windows of engineered features (rows) in the previous section.

This means that one row of data has (128×9), or 1,152, elements

In [18]:
len(trainX)

7352

In [19]:
len(testX)

2947

In [20]:
len(trainX[0])

128

# Data Preprocessing

# Train Model

Define and train model:

In [21]:
# fit and evaluate a model
def train_model(trainX, trainy):
     
    # define parameters
    verbose = 1
    epochs = 2
    batch_size = 64
    n_outputs = 6 # number of classes    
    time_steps = 4
    rows = 1
    columns = 32
    channels = 9 #number of features
    samples = trainX.shape[0]
    
    # define model
    model = Sequential()
    model.add(ConvLSTM2D(filters=64, kernel_size=(1,3), activation='relu', input_shape=(time_steps, rows, columns, channels)))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    # reshape data into subsequences (samples, time steps, rows, cols, channels)
    trainX = trainX.reshape((samples, time_steps, rows, columns, channels))
        
    # fit network
    model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)    

    return model

In [22]:
model = train_model(trainX, trainy)

  if isinstance(loss, collections.Mapping):
  if not isinstance(values, collections.Sequence):


Epoch 1/2
Epoch 2/2


Try model

In [23]:
# prepare Data
samples = 1
time_steps = 4
rows = 1
columns = 32
channels = 9 #number of features

In [24]:
# reshape data into subsequences (samples, time steps, rows, cols, channels)
sample = trainX[:1].reshape((samples, time_steps, rows, columns, channels))

In [25]:
model.predict_proba(sample)

array([[1.45889982e-03, 1.18818134e-04, 5.19116884e-06, 7.05881715e-02,
        9.27821219e-01, 7.66064386e-06]], dtype=float32)

In [26]:
model.predict_classes(sample)

array([4], dtype=int64)

- 0 WALKING
- 1 WALKING_UPSTAIRS
- 2 WALKING_DOWNSTAIRS
- 3 SITTING
- 4 STANDING
- 5 LAYING

# Evaluate Performance

In [27]:
def evaluate_performance(testX, testy):

    # reshape data into subsequences (samples, time steps, rows, cols, channels)
    samples = testX.shape[0]
    time_steps = 4
    rows = 1
    columns = 32
    channels = 9 #number of features    
    testX = testX.reshape((samples, time_steps, rows, columns, channels))
    
    loss, accuracy = model.evaluate(testX, testy, verbose=0)
    return loss, accuracy   

In [28]:
loss, accuracy = evaluate_performance(testX, testy)

In [29]:
loss, accuracy

(0.43640102741498443, 0.8710553050041199)

# Track Performance on Hadoop

In [1]:
mlflow.start_run()
mlflow.log_metric("loss", loss)
mlflow.log_metric("accuracy", accuracy)
mlflow.end_run()

NameError: name 'mlflow' is not defined

In [41]:
local_path = mlflow.get_tracking_uri()
local_path

'/tmp/tbr/BARMER/DSP/model/model_performance.csv'

Upload to Hadoop

In [42]:
path = "/tmp/tbr/BARMER/DSP/model/mlflow/"
_ = client_hdfs.upload(hdfs_path=path, local_path=local_path, overwrite=True)

Check result

In [44]:
client_hdfs.list('/tmp/tbr/BARMER/DSP/model/mlflow')

['0']

# Store Model on Hadoop

## Save model structure

Serialize as JSON

In [171]:
model_json = model.to_json()
model_json[:100]

'{"class_name": "Sequential", "config": {"name": "sequential_2", "layers": [{"class_name": "ConvLSTM2'

Write to Hadoop

In [172]:
path = "/tmp/tbr/BARMER/DSP/model/model_structure.json"
with client_hdfs.write(path, encoding = 'utf-8', overwrite=True) as writer:
    writer.write(model_json)

Check result

In [173]:
client_hdfs.list('/tmp/tbr/BARMER/DSP/model/')

['model_performance.csv', 'model_structure.json', 'model_weights.h5']

## Save model weights

Serialize as local H5 file

In [174]:
# serialize weights to HDF5
model.save_weights("model_weights.h5")

Upload File to Hadoop

In [175]:
path = "/tmp/tbr/BARMER/DSP/model/model_weights.h5"
_ = client_hdfs.upload(hdfs_path=path, local_path="model_weights.h5", overwrite=True)

Check result

In [176]:
client_hdfs.list('/tmp/tbr/BARMER/DSP/model/')

['model_performance.csv', 'model_structure.json', 'model_weights.h5']