In [1]:
import csv
import numpy as np
import scipy.io
import glob
import matplotlib.pyplot as plt
%matplotlib inline

PAMAP2 dataset
1. download the dataset at https://archive.ics.uci.edu/ml/datasets/pamap2+physical+activity+monitoring
2. unzip the file to a directory (myDB). The dataset include 9 participants doing 18 daily activities, which can be found at the upzipped dataset description.
3. We will use two subjects(subject105, subject106) as test data, while the rest 7 subjects as training set. Manually set up 'train' directory and 'test' directory, and move the corresponding subjects there.
4. 12 activities (out of the 18 activities) will be used. For the simplified version (with data balanced for each class), 10 activities will be selected.
5. only a subset sensors will be used, including one (dominant side)wrist-IMU, one chest-IMU, and one (dominant side)ankle-IMU,  each with 3D accelerometer (+-16g), 3D gyroscope, and 3D magnetometer. (a total of 3*3*3=27 channels)
6. We will downsample the data from 100Hz to 33Hz

In [2]:


def read_PAMAP2(myDB, filetype):
    print('reading...'+filetype+' set')
    PAMAP2_Dir = myDB+'PAMAP2_Dataset/'+filetype+'/'
    Xy_array = np.empty([0, 53])
    list_of_files = glob.glob(PAMAP2_Dir+'*.dat')
    for fileName in list_of_files:
        print(fileName)
        curr_data = np.loadtxt(fileName)[:,1:]
        Xy_array = np.concatenate((Xy_array, curr_data), axis=0)
    Xy_array = Xy_array[Xy_array[:,0]!=0]
    Xy_array = Xy_array[Xy_array[:,0]!=9]
    Xy_array = Xy_array[Xy_array[:,0]!=10]
    Xy_array = Xy_array[Xy_array[:,0]!=11]
    Xy_array = Xy_array[Xy_array[:,0]!=18]
    Xy_array = Xy_array[Xy_array[:,0]!=19]
    Xy_array = Xy_array[Xy_array[:,0]!=20]
    print('get rid of optional labels . . . done')
    
    Xy_array[Xy_array[:,0]==12,0]=8
    Xy_array[Xy_array[:,0]==13,0]=9
    Xy_array[Xy_array[:,0]==16,0]=10
    Xy_array[Xy_array[:,0]==17,0]=11
    Xy_array[Xy_array[:,0]==24,0]=0
    print('updating labels to 12 activities (0-11)')   
    
    Xy_array = Xy_array[0::3]
    print('subsampling at a step of 3 ... done')
       
    Xy_array = np.nan_to_num(Xy_array)
    print('converting nan to zeros')
    
    
    X52 = Xy_array[:,1:].astype('float32')
    y = Xy_array[:,0:1]
    
    X_wrist = np.concatenate((X52[:,2:5], X52[:,8:11], X52[:,11:14]), axis=1)
    X_chest = np.concatenate((X52[:,19:22], X52[:,25:28], X52[:,28:31]), axis=1)
    X_ankle = np.concatenate((X52[:,35:38], X52[:,41:44], X52[:,44:47]), axis=1)
    print('only keep the IMUs on wrist/chest/ankle, with 27d time-series data... done')
    Xy=np.concatenate((X_wrist, X_chest, X_ankle, y), axis=1)
    print('with 28d output data, with label in the last dimension... done')
    return Xy

In [3]:
filetype = 'train'
myDB = '../../../Documents/ubicomp_datasets/'

Xy_train = read_PAMAP2(myDB, 'train')
Xy_test = read_PAMAP2(myDB, 'test')
print('raw data, without normalisation!')

scipy.io.savemat('PAMAP2.mat', dict(Xy_train=Xy_train,Xy_test=Xy_test))

reading...train set
../../../Documents/ubicomp_datasets/PAMAP2_Dataset/train/subject101_op.dat
../../../Documents/ubicomp_datasets/PAMAP2_Dataset/train/subject108.dat
../../../Documents/ubicomp_datasets/PAMAP2_Dataset/train/subject109.dat
../../../Documents/ubicomp_datasets/PAMAP2_Dataset/train/subject109_op.dat
../../../Documents/ubicomp_datasets/PAMAP2_Dataset/train/subject107.dat
../../../Documents/ubicomp_datasets/PAMAP2_Dataset/train/subject104.dat
../../../Documents/ubicomp_datasets/PAMAP2_Dataset/train/subject101.dat
../../../Documents/ubicomp_datasets/PAMAP2_Dataset/train/subject102.dat
../../../Documents/ubicomp_datasets/PAMAP2_Dataset/train/subject103.dat
../../../Documents/ubicomp_datasets/PAMAP2_Dataset/train/subject108_op.dat
get rid of optional labels . . . done
updating labels to 12 activities (0-11)
subsampling at a step of 3 ... done
converting nan to zeros
only keep the IMUs on wrist/chest/ankle, with 27d time-series data... done
with 28d output data, with label in th