# Study based on Chapter 22 and 24

Human activity recognition is the problem of classifying sequences of accelerometer data recordedby specialized harnesses or smartphones into known well-defined movements. It is a challengingproblem given the large number of observations produced each second, the temporal nature ofthe observations, and the lack of a clear way to relate accelerometer data to known movements.
Classical approaches to the problem involve hand crafting features from the time series databased on fixed-sized windows and training machine learning models, such as ensembles ofdecision trees. The difficulty is that this feature engineering requires deep expertise in thefield. Recently, deep learning methods such as recurrent neural networks and one-dimensionalconvolutional neural networks, or CNNs, have been shown to provide state-of-the-art results onchallenging activity recognition tasks with little or no data feature engineering.
In this tutorial, you will discover the Activity Recognition Using Smartphones dataset fortime series classification and how to load and explore the dataset in order to make it ready forpredictive modeling. This dataset will provided the basis for the remaining tutorials in this partof the book. After completing this tutorial, you will know:
* How to download and load the dataset into memory.
* How to use line plots, histograms, and box plots to better understand the structure of the
motion data.
* How to model the problem, including framing, data preparation, modeling, and evaluation.

### 22.4 Load the Dataset

In [1]:
from math import sqrt
from numpy import mean
from pandas import read_csv
from sklearn.metrics import mean_squared_error

In [41]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.utils import to_categorical

In [4]:
# grid search simple forecasts
from math import sqrt
from numpy import mean
from numpy import median
from multiprocessing import cpu_count
from joblib import Parallel
from joblib import delayed
from warnings import catch_warnings
from warnings import filterwarnings
from sklearn.metrics import mean_squared_error
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
# load a single file as a numpy array
def load_file(filepath):
    dataframe = pd.read_csv(filepath, header=None, delim_whitespace=True)
    return dataframe.values

In [14]:
file_path = '../data_and_models/UCI_HAR_Dataset/UCI_HAR_Dataset/train/Inertial_Signals/total_acc_y_train.txt'


In [15]:
data = load_file(file_path)

In [16]:
print(data.shape)

(7352, 128)


In [19]:
from numpy import dstack
from pandas import read_csv

In [20]:
# load a list of files, such as x, y, z data for a given variable
def load_group(filenames, prefix=''):
    loaded = list()
    for name in filenames:
        data = load_file(prefix + name)
        loaded.append(data)
    
    # stack group so that features are the 3rd dimension
    loaded = dstack(loaded)
    return loaded

In [21]:
filenames = ['total_acc_x_train.txt', 'total_acc_y_train.txt', 'total_acc_z_train.txt']
total_acc = load_group(filenames, prefix='../data_and_models/UCI_HAR_Dataset/UCI_HAR_Dataset/train/Inertial_Signals/')
print(total_acc.shape)


(7352, 128, 3)


In [27]:
def load_dataset(group, prefix=''):
    filepath = prefix + group + '/Inertial_Signals/'
    
    # load all 9 files as a single array
    filenames = list()
    
    # total acceleration
    filenames += ['total_acc_x_'+group+'.txt', 'total_acc_y_'+group+'.txt',
    'total_acc_z_'+group+'.txt']
    
    # body acceleration
    filenames += ['body_acc_x_'+group+'.txt', 'body_acc_y_'+group+'.txt',
    'body_acc_z_'+group+'.txt']
    
    # body gyroscope
    filenames += ['body_gyro_x_'+group+'.txt', 'body_gyro_y_'+group+'.txt',
    'body_gyro_z_'+group+'.txt']
    
    # load input data
    X = load_group(filenames, filepath)
    
    # load class output
    y = load_file(prefix + group + '/y_'+group+'.txt')
    return X, y

In [32]:
# load all train
trainX, trainy = load_dataset('train', '/home/gabriel/Documents/Repos/time_series_study/data_and_models/UCI_HAR_Dataset/UCI_HAR_Dataset/')
print(trainX.shape, trainy.shape)

(7352, 128, 9) (7352, 1)


In [48]:
def load_dataset(prefix=''):
    # load all train
    trainX, trainy = load_dataset_group('train', prefix + 'HARDataset/')
    print(trainX.shape, trainy.shape)
    # load all test
    testX, testy = load_dataset_group('test', prefix + 'HARDataset/')
    print(testX.shape, testy.shape)
    # zero-offset class values
    trainy = trainy - 1
    testy = testy - 1
    # one hot encode y
    trainy = to_categorical(trainy)
    testy = to_categorical(testy)
    print(trainX.shape, trainy.shape, testX.shape, testy.shape)
    return trainX, trainy, testX, testy

In [49]:
# define data shape
n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1]

In [43]:
# define the CNN model
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu',
input_shape=(n_timesteps,n_features)))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# load all test
testX, testy = load_dataset('test', '/home/gabriel/Documents/Repos/time_series_study/data_and_models/UCI_HAR_Dataset/UCI_HAR_Dataset/')
print(testX.shape, testy.shape)

In [45]:
# fit and evaluate a model
def evaluate_model(trainX, trainy, testX, testy):
    verbose, epochs, batch_size = 0, 10, 32
    n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1]
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu',
    input_shape=(n_timesteps,n_features)))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    # fit network
    model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
    # evaluate model
    _, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
    return accuracy

In [46]:
# summarize scores
def summarize_results(scores):
    print(scores)
    m, s = mean(scores), std(scores)
    print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))

In [None]:
def run_experiment(repeats=10):
# load data
trainX, trainy, testX, testy = load_dataset()
# repeat experiment
scores = list()
for r in range(repeats):
score = evaluate_model(trainX, trainy, testX, testy)
score = score * 100.0
print(✬>#%d: %.3f✬ % (r+1, score))
scores.append(score)
# summarize results
summarize_results(scores)