# PAMAP2 dataset

In [1]:
import numpy as np
import pandas as pd
from os import listdir
from numpy import genfromtxt
import matplotlib.pyplot as plt

In [2]:
%matplotlib inline

In [3]:
datadir = "/media/sf_VBox_Shared/timeseries/PAMAP2_Dataset/Protocol"
filenames = listdir(datadir)

axes = ['x', 'y', 'z']
IMUsensor_columns = ['temperature'] + \
                    ['acc_16g_' + i for i in axes] + \
                    ['acc_6g_' + i for i in axes] + \
                    ['gyroscope_'+ i for i in axes] + \
                    ['magnometer_'+ i for i in axes] + \
                    ['orientation_' + str(i) for i in range(4)]
header = ["timestamp", "activityID", "heartrate"] + ["hand_"+s for s in IMUsensor_columns]\
        + ["chest_"+s for s in IMUsensor_columns]+ ["ankle_"+s for s in IMUsensor_columns]
datasets = [pd.read_csv(datadir+'/'+fn, header=None, sep=' ') for fn in filenames]
for data in datasets:
    data.columns = header

data = datasets[0]    
print(data.shape)
print(header)

(376417, 54)
['timestamp', 'activityID', 'heartrate', 'hand_temperature', 'hand_acc_16g_x', 'hand_acc_16g_y', 'hand_acc_16g_z', 'hand_acc_6g_x', 'hand_acc_6g_y', 'hand_acc_6g_z', 'hand_gyroscope_x', 'hand_gyroscope_y', 'hand_gyroscope_z', 'hand_magnometer_x', 'hand_magnometer_y', 'hand_magnometer_z', 'hand_orientation_0', 'hand_orientation_1', 'hand_orientation_2', 'hand_orientation_3', 'chest_temperature', 'chest_acc_16g_x', 'chest_acc_16g_y', 'chest_acc_16g_z', 'chest_acc_6g_x', 'chest_acc_6g_y', 'chest_acc_6g_z', 'chest_gyroscope_x', 'chest_gyroscope_y', 'chest_gyroscope_z', 'chest_magnometer_x', 'chest_magnometer_y', 'chest_magnometer_z', 'chest_orientation_0', 'chest_orientation_1', 'chest_orientation_2', 'chest_orientation_3', 'ankle_temperature', 'ankle_acc_16g_x', 'ankle_acc_16g_y', 'ankle_acc_16g_z', 'ankle_acc_6g_x', 'ankle_acc_6g_y', 'ankle_acc_6g_z', 'ankle_gyroscope_x', 'ankle_gyroscope_y', 'ankle_gyroscope_z', 'ankle_magnometer_x', 'ankle_magnometer_y', 'ankle_magnometer_

In [4]:
print(len(datasets))
train_range = range(6)
val_range = [6]
test_range = range(7,len(datasets))

9


## prepare dataset for keras

In [5]:
from keras.utils.np_utils import to_categorical

columns_to_use = ['hand_acc_16g_x', 'hand_acc_16g_y', 'hand_acc_16g_z',
                 'ankle_acc_16g_x', 'ankle_acc_16g_y', 'ankle_acc_16g_z',
                 'chest_acc_16g_x', 'chest_acc_16g_y', 'chest_acc_16g_z']

y_list = [np.array(data.activityID) for data in datasets]
classlabels = list(set.union(*[set(y) for y in y_list]))
nr_classes = len(classlabels)
mapclasses = {classlabels[i] : i for i in range(len(classlabels))}
y_list = [np.array([mapclasses[c] for c in y], dtype='int') for y in  y_list]

y_binary_list = [to_categorical(y, nr_classes) for y in y_list]
X_list = [np.array(data[columns_to_use]) for data in datasets]
print(X_list[0].shape, y_list[0].shape, y_binary_list[0].shape)

Using Theano backend.


(376417, 9) (376417,) (376417, 13)


In [6]:
#Fill missing values
#
for X in X_list:
    print(np.isnan(X).sum())
    X[np.isnan(X)] = 0

9870
16683
3696
10584
11499
8208
8400
16230
189


In [7]:
nr_samples = len(X_list)
sequence_length, nr_channels = X_list[0].shape
print(sequence_length, nr_channels, nr_classes)

376417 9 13


In [10]:
for i in train_range:
    print(X_list[i].shape, y_binary_list[i].shape)
    

(376417, 9) (376417, 13)
(447000, 9) (447000, 13)
(252833, 9) (252833, 13)
(329576, 9) (329576, 13)
(374783, 9) (374783, 13)
(361817, 9) (361817, 13)


## Construct batches

In [64]:
nr_batches_per_series = 1000
batch_length = 100
nr_batches = nr_batches_per_series * len(train_range)

X_batches = np.zeros((nr_batches, batch_length, nr_channels))
y_batches_binary = np.zeros((nr_batches, batch_length, nr_classes))
y_batches_last_binary = np.zeros((nr_batches, nr_classes))

for j in train_range:
    for i in range(nr_batches_per_series):
        k = j*nr_batches_per_series + i
        X = X_list[j]
        y_binary = y_binary_list[j]
        start = np.random.randint(X.shape[0]-batch_length)
        X_batches[k,:,:] = X[start:start+batch_length,:]
        y_batches_binary[k,:,:] = y_binary[start:start+batch_length, :]
        y_batches_last_binary[k,:] = y_binary[start+batch_length-1, :]

In [13]:
# Take sliding-window frames. Target is label of last time step
# Data is 100 Hz
frame_length = 5 * 100
step = 1 * 100

def sliding_window(X, y_binary, frame_length, step, X_samples, y_samples):
    for i in range(0, X.shape[0]-frame_length, step):
        X_sub = X[i:i+frame_length,:]
        y_sub = y_binary[i+frame_length-1, :]
        X_samples.append(X_sub)
        y_samples.append(y_sub)

X_train = []
y_train = []
X_val = []
y_val = []
X_test = []
y_test = []
for j in train_range:
    X = X_list[j]
    y_binary = y_binary_list[j]
    sliding_window(X, y_binary, frame_length, step, X_train, y_train)
for j in val_range:
    X = X_list[j]
    y_binary = y_binary_list[j]
    sliding_window(X, y_binary, frame_length, step, X_val, y_val)
for j in test_range:
    X = X_list[j]
    y_binary = y_binary_list[j]
    sliding_window(X, y_binary, frame_length, step, X_test, y_test)
    
        
X_train = np.array(X_train)
y_train = np.array(y_train)
X_val = np.array(X_val)
y_val = np.array(y_val)
X_test = np.array(X_test)
y_test = np.array(y_test)


#Shuffle around the train set
np.random.seed(123)
neworder = np.random.permutation(X_train.shape[0])
X_train = X_train[neworder,:,:]
y_train = y_train[neworder,:]


print(X_samples.shape, y_samples.shape)
#Test and validation 

(21397, 500, 9) (21397, 13)


## Save the results

In [14]:
# Save binary file
outdatapath = '/media/sf_VBox_Shared/timeseries/PAMAP2_Dataset/slidingwindow500last/'
np.save(outdatapath+'X_train', X_train)
np.save(outdatapath+'y_train_binary', y_train)
np.save(outdatapath+'X_val', X_val)
np.save(outdatapath+'y_val_binary', y_val)
np.save(outdatapath+'X_test', X_test)
np.save(outdatapath+'y_test_binary', y_test)