## Tutorial PAMAP2 with mcfly

#### Note: First check that you are running this notebook with a python3 kernel

In [1]:
#import required python modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from os import listdir
from numpy import genfromtxt
import urllib.request
import os.path
import zipfile

In [2]:
%matplotlib inline

## Fetching the data

In [65]:
# in which directory do you want to store the data?
directory_to_extract_to = "/media/windows-share"

In [68]:
targetdir = directory_to_extract_to + '/PAMAP2'
if os.path.exists(targetdir):
    print('Data previously loaded and extracted to' + targetdir)
    print('You are ready to move on to the processing.')
    
else:
    #download the PAMAP2 data, this is 688 Mb
    path_to_zip_file = directory_to_extract_to + '/PAMAP2_Dataset.zip'
    test_file_exist = os.path.isfile(path_to_zip_file)
    if test_file_exist is False:
        url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00231/PAMAP2_Dataset.zip'
        local_fn, headers = urllib.request.urlretrieve(url,filename=path_to_zip_file) #retrieve data from url
        print('The was downloaded and stored in: ' + path_to_zip_file )
    else:
        print('The data was previously downloaded and stored in: ' + path_to_zip_file )
    # unzip
    os.makedirs(targetdir) # create target directory
    with zipfile.ZipFile(path_to_zip_file ,"r") as zip_ref:
        zip_ref.extractall(targetdir)    

Data previously loaded and extracted to/media/windows-share/PAMAP2
You are ready to move on to the processing


# Pre-processing

In [73]:
datadir = targetdir + '/PAMAP2_Dataset/Protocol'
filenames = listdir(datadir)
print('There are ' + str(len(filenames)) + ' files')

In [99]:
# let's load the files and put them in a list of pandas dataframes:
datasets = [pd.read_csv(datadir+'/'+fn, header=None, sep=' ') for fn in filenames]

In [100]:
# The columns are numbers, let's add labels:
axes = ['x', 'y', 'z']
IMUsensor_columns = ['temperature'] + \
                    ['acc_16g_' + i for i in axes] + \
                    ['acc_6g_' + i for i in axes] + \
                    ['gyroscope_'+ i for i in axes] + \
                    ['magnometer_'+ i for i in axes] + \
                    ['orientation_' + str(i) for i in range(4)]
header = ["timestamp", "activityID", "heartrate"] + ["hand_"+s for s in IMUsensor_columns]\
        + ["chest_"+s for s in IMUsensor_columns]+ ["ankle_"+s for s in IMUsensor_columns]
for i in range(0,len(datasets)):
    datasets[i].columns = header

In [119]:
# check new column names for the first file
datasets[0].columns.values

array(['timestamp', 'activityID', 'heartrate', 'hand_temperature',
       'hand_acc_16g_x', 'hand_acc_16g_y', 'hand_acc_16g_z',
       'hand_acc_6g_x', 'hand_acc_6g_y', 'hand_acc_6g_z',
       'hand_gyroscope_x', 'hand_gyroscope_y', 'hand_gyroscope_z',
       'hand_magnometer_x', 'hand_magnometer_y', 'hand_magnometer_z',
       'hand_orientation_0', 'hand_orientation_1', 'hand_orientation_2',
       'hand_orientation_3', 'chest_temperature', 'chest_acc_16g_x',
       'chest_acc_16g_y', 'chest_acc_16g_z', 'chest_acc_6g_x',
       'chest_acc_6g_y', 'chest_acc_6g_z', 'chest_gyroscope_x',
       'chest_gyroscope_y', 'chest_gyroscope_z', 'chest_magnometer_x',
       'chest_magnometer_y', 'chest_magnometer_z', 'chest_orientation_0',
       'chest_orientation_1', 'chest_orientation_2', 'chest_orientation_3',
       'ankle_temperature', 'ankle_acc_16g_x', 'ankle_acc_16g_y',
       'ankle_acc_16g_z', 'ankle_acc_6g_x', 'ankle_acc_6g_y',
       'ankle_acc_6g_z', 'ankle_gyroscope_x', 'ankle_gyrosc

In [75]:
#Fill data
datasets_filled = [d.interpolate() for d in datasets]

In [76]:
# Select columns
columns_to_use = ['hand_acc_16g_x', 'hand_acc_16g_y', 'hand_acc_16g_z',
                 'ankle_acc_16g_x', 'ankle_acc_16g_y', 'ankle_acc_16g_z',
                 'chest_acc_16g_x', 'chest_acc_16g_y', 'chest_acc_16g_z']
columns = data.columns[2:]

In [3]:
# Create mapping for class labels
import numpy as np
import keras
from keras.utils.np_utils import to_categorical

Using Theano backend.


In [120]:
y_set_all = [set(np.array(data.activityID)) - set([0]) for data in datasets_filled]
classlabels = list(set.union(*[set(y) for y in y_set_all]))
nr_classes = len(classlabels)
mapclasses = {classlabels[i] : i for i in range(len(classlabels))}
def transform_y(y):
    y_mapped = np.array([mapclasses[c] for c in y], dtype='int')
    y_binary = to_categorical(y_mapped, nr_classes)
    return y_binary