# Experiment PAMAP with mcfly

## Import required Python modules

In [1]:
import sys
import os
import numpy as np
import pandas as pd
# mcfly
from mcfly import modelgen, find_architecture, storage
from keras.models import load_model
np.random.seed(2)

Using TensorFlow backend.


In [2]:
sys.path.insert(0, os.path.abspath('../..'))
from utils import tutorial_pamap2

Load the preprocessed data as stored in Numpy-files. Please note that the data has already been split up in a training (training), validation (val), and test subsets. It is common practice to call the input data X and the labels y.

In [3]:
data_path = '/media/sf_VBox_Shared/timeseries/PAMAP_Dataset/cleaned_7act/'

In [4]:
X_train, y_train_binary, X_val, y_val_binary, X_test, y_test_binary, labels = tutorial_pamap2.load_data(data_path)

In [5]:
print('x shape:', X_train.shape)
print('y shape:', y_train_binary.shape)

x shape: (4400, 512, 9)
y shape: (4400, 7)


The data is split between train test and validation.

In [6]:
print('train set size:', X_train.shape[0])
print('validation set size:', X_val.shape[0])
print('test set size:', X_test.shape[0])

train set size: 4400
validation set size: 3000
test set size: 3000


Let's have a look at the distribution of the labels:

In [7]:
frequencies = y_train_binary.mean(axis=0)
frequencies_df = pd.DataFrame(frequencies, index=labels, columns=['frequency'])
frequencies_df

Unnamed: 0,frequency
lying,0.134318
sitting,0.130909
standing,0.137045
normal_walking,0.185682
cycling,0.117955
ironing,0.161364
vaccuum_cleaning,0.132727


## Generate models

In [8]:
num_classes = y_train_binary.shape[1]

models = modelgen.generate_models(X_train.shape,
                                  number_of_classes=num_classes,
                                  number_of_models = 5)

In [10]:
models_to_print = range(len(models))
for i, item in enumerate(models):
    if i in models_to_print:
        model, params, model_types = item
        print("-------------------------------------------------------------------------------------------------------")
        print("Model " + str(i))
        print(" ")
        print("Hyperparameters:")
        print(params)
        print(" ")
        print("Model description:")
        model.summary()
        print(" ")
        print("Model type:")
        print(model_types)
        print(" ")

-------------------------------------------------------------------------------------------------------
Model 0
 
Hyperparameters:
{'filters': array([85, 17, 44]), 'learning_rate': 0.08360289270402858, 'regularization_rate': 0.0022439468517196116, 'fc_hidden_nodes': 443}
 
Model description:
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_1 (Batch (None, 512, 9)            36        
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 512, 85)           2380      
_________________________________________________________________
batch_normalization_2 (Batch (None, 512, 85)           340       
_________________________________________________________________
activation_1 (Activation)    (None, 512, 85)           0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 512, 17)   

## Compare models

In [13]:
# Define directory where the results, e.g. json file, will be stored
resultpath = os.path.join(data_path, '..', 'data/models')
if not os.path.exists(resultpath):
        os.makedirs(resultpath)

In [14]:
outputfile = os.path.join(resultpath, 'modelcomparison_pamap.json')
histories, val_accuracies, val_losses = find_architecture.train_models_on_samples(X_train, y_train_binary,
                                                                           X_val, y_val_binary,
                                                                           models,nr_epochs=5,
                                                                           subset_size=1000,
                                                                           verbose=True,
                                                                           outputfile=outputfile)
print('Details of the training process were stored in ',outputfile)

Training model 0 CNN
Train on 1000 samples, validate on 3000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training model 1 DeepConvLSTM
Train on 1000 samples, validate on 3000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training model 2 DeepConvLSTM
Train on 1000 samples, validate on 3000 samples
Epoch 1/5
Epoch 2/5

KeyboardInterrupt: 

In [15]:
best_model_index = np.argmax(val_accuracies)
best_model, best_params, best_model_types = models[best_model_index]
print('Model type and parameters of the best model:')
print(best_model_types)
print(best_params)

Model type and parameters of the best model:
DeepConvLSTM
{'lstm_dims': [78], 'learning_rate': 0.000893145093504032, 'regularization_rate': 0.00319386451934688, 'filters': [48, 43, 68, 77]}


## Train the best model on the full dataset

In [16]:
#We make a copy of the model, to start training from fresh
nr_epochs = 1
datasize = X_train.shape[0]
history = best_model.fit(X_train[:datasize,:,:], y_train_binary[:datasize,:],
              epochs=nr_epochs, validation_data=(X_val, y_val_binary))

Train on 500 samples, validate on 100 samples
Epoch 1/1


In [17]:
modelname = 'my_bestmodel.h5'
model_path = os.path.join(resultpath,modelname)

In [18]:
best_model.save(model_path)