## Tutorial PAMAP2 with mcfly

This turorial is intended to talk you through the functionalities of mcfly. As an example dataset we use the publicly available PAMAP2 dataset. It contains time series data from a variety of sensors worn by nine persons. The data is labelled with the activity types these individuals did.

Before you can start, please make sure you installed all the dependencies of mcfly (listed in requirements.txt) and make sure your jupyter notebook has a python3 kernel.

## Download data and pre-proces data

In [1]:
%matplotlib inline

We have created a function for you to fetch and pre-proces the data. Please specify the directory_to_extract_to and then execute the cells.

In [7]:
import sys
import os
sys.path.insert(0, os.path.abspath('..'))
from mcfly import tutorial_pamap2

In [3]:
# Specify in which directory you want to store the data:
directory_to_extract_to = "/media/windows-share"
# Specifcy which columns to use. You can leave this as it is
columns_to_use = ['hand_acc_16g_x', 'hand_acc_16g_y', 'hand_acc_16g_z',
                 'ankle_acc_16g_x', 'ankle_acc_16g_y', 'ankle_acc_16g_z',
                 'chest_acc_16g_x', 'chest_acc_16g_y', 'chest_acc_16g_z']

In [4]:
outputpath = tutorial_pamap2.fetch_and_preprocess(directory_to_extract_to,columns_to_use)

Data previously downloaded and stored in /media/windows-share/PAMAP2
Data previously pre-processed and np-files saved to /media/windows-share/PAMAP2/PAMAP2_Dataset/slidingwindow512cleaned/


## Load the pre-processed data

In [8]:
import numpy as np
from keras.utils.np_utils import to_categorical
import keras

In [9]:

X_train, y_train_binary, X_val, y_val_binary, X_test, y_test_binary = tutorial_pamap2.load_data(outputpath)

#datapath = outputpath
#ext = '.npy'
#X_train = np.load(datapath+'X_train'+ext)
#y_train_binary = np.load(datapath+'y_train_binary'+ext)
#_val = np.load(datapath+'X_val'+ext)
#y_val_binary = np.load(datapath+'y_val_binary'+ext)
#X_test = np.load(datapath+'X_test'+ext)
#y_test_binary = np.load(datapath+'y_test_binary'+ext)

NameError: name 'datapath' is not defined

In [None]:
num_classes = y_train_binary.shape[1]
print(num_classes)

## Generate models

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Convolution1D, Flatten, MaxPooling1D
from keras.optimizers import Adam
import sys
import os
import numpy as np
sys.path.insert(0, os.path.abspath('..'))
from mcfly import modelgen, find_architecture

In [None]:
%%time
models = modelgen.generate_models(X_train.shape,
                                  num_classes,
                                  number_of_models = 3, #10,
                                  model_type = 'CNN',
                                  cnn_max_layers=4,
                                  deepconvlstm_max_conv_layers=3,
                                  deepconvlstm_max_lstm_layers=2
                                 )

In [None]:
for model, params, model_types in models:
    print(params)
    model.summary()

## Compare models

In [None]:
%%time
histories, val_accuracies, val_losses = find_architecture.train_models_on_samples(X_train, y_train_binary,
                                                                                 X_val, y_val_binary,
                                                                                 models,nr_epochs=5,
                                                                                  subset_size=500,
                                                                                  verbose=True)

In [None]:
%%time
for i in range(len(models)):
    name = str(models[i][1])
    find_architecture.plotTrainingProcess(histories[i], name)

In [None]:
import pandas as pd
modelcomparisons = pd.DataFrame({'model':[str(params) for model, params, model_types in models],
                       'train_acc': [history.history['acc'][-1] for history in histories],
                       'train_loss': [history.history['loss'][-1] for history in histories],
                       'val_acc': [history.history['val_acc'][-1] for history in histories],
                       'val_loss': [history.history['val_loss'][-1] for history in histories]
                       })
modelcomparisons

In [None]:
resultpath = directory_to_extract_to + '/PAMAP2/PAMAP2_Dataset/results/' 
if not os.path.exists(resultpath):
        os.makedirs(resultpath)

In [None]:
modelcomparisons.to_csv(resultpath +'modelcomparisons.csv')

## Train the best model for real

In [None]:
best_model_index = np.argmax(val_accuracies)
#best_model_index = 3

In [None]:
best_model, best_params, best_model_types = models[best_model_index]
print(best_model_index, best_model_types, best_params)

In [None]:
#We make a copy of the model, to start training from fresh
best_model_copy = modelgen.generate_CNN_model(X_train.shape, num_classes, best_params['filters'], best_params['fc_hidden_nodes'],
                       best_params['learning_rate'], best_params['regularization_rate'])

In [None]:
# We can set some backend options to avoid NaNs
from keras import backend as K
print(K.floatx())
print(K.epsilon())
#K.set_epsilon(1e-5)

In [None]:
#We're going to train the model on the complete data set, and save the model after each epoch
nr_epochs = 10
datasize = X_train.shape[0]#1000
history = best_model_copy.fit(X_train[:datasize,:,:], y_train_binary[:datasize,:],
              nb_epoch=nr_epochs, validation_data=(X_val, y_val_binary), 
                              callbacks=[keras.callbacks.ModelCheckpoint(datapath+'weights.{epoch:02d}-{val_loss:.2f}.hdf5')])

In [None]:
# If we plot the training process, we see that the model is probably already overfitting in the first epoch. 
find_architecture.plotTrainingProcess(history)

In [None]:
#Load model from the just before we get the NaNs
model_copy = keras.models.copy.copy(best_model_copy)
model_copy.load_weights(datapath+'weights.{epoch:02d}-{val_loss:.2f}.hdf5'.format(epoch=3, val_loss=0.22))
model_copy.compile(best_model_copy.optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model_copy = best_model_copy
model_copy.evaluate(X_train, y_train_binary)

In [None]:
## Inspect model predictions
datasize = X_val.shape[0]
probs = best_model_copy.predict_proba(X_val[:datasize,:,:],batch_size=1)
probs

In [None]:
len(best_model.layers)

In [None]:
best_model_copy.layers[0].get_weights()

In [None]:
from keras import backend as K

# with a Sequential model
get_dens_layer_output = K.function([best_model_copy.layers[0].input, K.learning_phase()],
                                  [best_model_copy.layers[0].output])
layer_output = get_dens_layer_output([X_val, 0])[0]
layer_output

In [None]:
best_model.summary()

In [None]:
score_val = best_model_copy.evaluate(X_val, y_val_binary, verbose=True)
score_val

In [None]:
## Test on Testset

In [None]:
score_test = best_model.evaluate(X_test, y_test_binary, verbose=False)
print('Score of best model: ' + str(score_test))