# Experiment PAMAP2 with mcfly

This experiment finds an optimal model for the PAMAP2 dataset.

## Import required Python modules

In [2]:
import sys
import os
sys.path.insert(0, os.path.abspath('../..'))
import numpy as np
import pandas as pd
# mcfly
from mcfly import tutorial_pamap2, modelgen, find_architecture, storage
# Keras module is use for the deep learning
import keras
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Activation, Convolution1D, Flatten, MaxPooling1D
from keras.optimizers import Adam
# We can set some backend options to avoid NaNs
from keras import backend as K

Using Theano backend.


## Load the data

In [4]:
datapath = '/media/sf_VBox_Shared/timeseries/PAMAP2/PAMAP2_Dataset/cleaned_12activities_9vars/'
Xs = []
ys = []

ext = '.npy'
for i in range(9):
    Xs.append(np.load(datapath+'X_'+str(i)+ext))
    ys.append(np.load(datapath+'y_'+str(i)+ext))

In [6]:
# Define directory where the results, e.g. json file, will be stored
resultpath = '/media/sf_VBox_Shared/timeseries/PAMAP2/PAMAP2_Dataset/results_tutorial/' 

In [7]:
modelname = 'my_bestmodel'
model_reloaded = storage.loadmodel(resultpath,modelname)

In [10]:
def split_train_test(X_list, y_list, j):
    X_train = np.concatenate(X_list[0:j]+X_list[j+1:])
    X_test = X_list[j]
    y_train = np.concatenate(y_list[0:j]+y_list[j+1:])
    y_test = y_list[j]
    return X_train, y_train, X_test, y_test

def split_train_small_val(X_list, y_list, j, trainsize=500, valsize=500):
    X = np.concatenate(X_list[0:j]+X_list[j+1:])
    y = np.concatenate(y_list[0:j]+y_list[j+1:])
    rand_ind = np.random.choice(X.shape[0], trainsize+valsize, replace=False)
    X_train = X[rand_ind[:trainsize]]
    y_train = y[rand_ind[:trainsize]]
    X_val = X[rand_ind[trainsize:]]
    y_val = y[rand_ind[trainsize:]]
    return X_train, y_train, X_val, y_val

In [11]:
from keras.models import model_from_json

def get_fresh_copy(model, lr):
    model_json = model.to_json()
    model_copy = model_from_json(model_json)
    model_copy.compile(loss='categorical_crossentropy',
                  optimizer=Adam(lr=lr),
                  metrics=['accuracy'])
    #for layer in model_copy.layers:
    #    layer.build(layer.input_shape)
    return model_copy

## Train the best model for real

Now that we have identified the best model architecture out of our random pool of models we can continue by training the model on the full training sample. For the purpose of speeding up the example we only train the full model on the first 1000 values. You will need to replace this by 'datasize = X_train.shape[0]' in a real world example.

In [8]:
best_model = model_reloaded

In [16]:
import json
with open(resultpath+'modelcomparison.json', 'r') as outfile:
    model_json = json.load(outfile)

In [20]:
best_params = model_json[0]

In [22]:
nr_epochs = 2

np.random.seed(123)
histories, test_accuracies_list, models = [], [], []
for j in range(len(Xs)):
    X_train, y_train, X_test, y_test = split_train_test(Xs, ys, j)
    model_copy = get_fresh_copy(best_model, best_params['learning_rate'])
    datasize = X_train.shape[0]
    
    history = model_copy.fit(X_train[:datasize,:,:], y_train[:datasize,:],
              nb_epoch=nr_epochs, validation_data=(X_test, y_test))
    
    histories.append(history)
    test_accuracies_list.append(history.history['val_acc'][-1] )
    models.append(model_copy)

Train on 14663 samples, validate on 2155 samples
Epoch 1/2
Epoch 2/2
Train on 14528 samples, validate on 2290 samples
Epoch 1/2
Epoch 2/2
Train on 15344 samples, validate on 1474 samples
Epoch 1/2
Epoch 2/2
Train on 14799 samples, validate on 2019 samples
Epoch 1/2
Epoch 2/2
Train on 14438 samples, validate on 2380 samples
Epoch 1/2
Epoch 2/2
Train on 14639 samples, validate on 2179 samples
Epoch 1/2
Epoch 2/2
Train on 14811 samples, validate on 2007 samples
Epoch 1/2
Epoch 2/2
Train on 14543 samples, validate on 2275 samples
Epoch 1/2
Epoch 2/2
Train on 16779 samples, validate on 39 samples
Epoch 1/2
Epoch 2/2


In [23]:
print(np.mean(test_accuracies_list))
test_accuracies_list

0.835116382719


[0.82784222737819024,
 0.72576419198356856,
 0.94776119402985071,
 0.91877166914314012,
 0.81218487384940397,
 0.91005048189977267,
 0.9307424017132131,
 0.46857142858452849,
 0.97435897588729858]

In [24]:
# Calculate 1-NN for each fold:
nr_epochs = 2

np.random.seed(123)
knn_test_accuracies_list = []
for j in range(len(Xs)):
    print("fold ", j)
    X_train, y_train, X_test, y_test = split_train_test(Xs, ys, j)
    acc = find_architecture.kNN_accuracy(X_train, y_train, X_test, y_test, k=1)
    knn_test_accuracies_list.append(acc )

fold  0
fold  1
fold  2
fold  3
fold  4
fold  5
fold  6
fold  7
fold  8


In [25]:
print(np.mean(knn_test_accuracies_list))
accs_compared = pd.DataFrame({'CNN': test_accuracies_list, 'kNN':knn_test_accuracies_list})
accs_compared

0.53974709837


Unnamed: 0,CNN,kNN
0,0.827842,0.611601
1,0.725764,0.610044
2,0.947761,0.613976
3,0.918772,0.523031
4,0.812185,0.615966
5,0.91005,0.523176
6,0.930742,0.603886
7,0.468571,0.371429
8,0.974359,0.384615


### Saving, loading and comparing reloaded model with orignal model

The modoel can be saved for future use. The savemodel function will save two separate files: a json file for the architecture and a npy (numpy array) file for the weights.

In [26]:
modelname = 'my_bestmodel'

In [27]:
for i, model in enumerate(models):
    storage.savemodel(model,resultpath,modelname+str(i))