# HDA - Project 3: TASK A
## Classification of Modes of Locomotion
This first cell contains the parameters that can be tuned for code execution:
- subject: select the subject on which to test the model, between [1,4];
- folder: directory name where '.mat' files are stored;
- label_col: column of features to be selected to perform activity detection, between [0,6];
- window_size: parameter that sets the length of temporal windows on which to perform the convolution;
- stride: step length to chose the next window.

In [11]:
subject = 2
folder = "./data/full/"
trim_zeros = True
label_col = 0     # default for task A
window_size = 15
stride = 15

In [12]:
import utils
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import matplotlib.pyplot as plt
from keras.optimizers import Adam

In [13]:
# import all sessions for a subject
(data1, data2, data3, data4, data5, data6) = utils.loadData(subject, folder=folder)

# create training set and test set
X_train = np.concatenate((data1['features'],\
                          data2['features'],\
                          data3['features'],\
                          data6['features']), axis=0)

Y_train = np.concatenate((data1['labels'][:,label_col],\
                          data2['labels'][:,label_col],\
                          data3['labels'][:,label_col],\
                          data6['labels'][:,label_col]), axis=0)

X_test = np.concatenate((data4['features'],\
                         data5['features']), axis=0)

Y_test = np.concatenate((data4['labels'][:,label_col],\
                         data5['labels'][:,label_col]))

features = X_test.shape[1]

if trim_zeros:
    mask = np.where(Y_train == 0)[0]
    Y_train = np.delete(Y_train,mask)
    X_train = np.delete(X_train,mask,axis=0)

    mask = np.where(Y_test == 0)[0]
    Y_test = np.delete(Y_test,mask)
    X_test = np.delete(X_test,mask,axis=0)

print("\nTraining samples: ", X_train.shape[0],\
      "\nTest samples:      ", X_test.shape[0],\
      "\nFeatures:            ", features)


Session shapes:
ADL1:   (38733, 110)
ADL2:   (26824, 110)
ADL3:   (31242, 110)
ADL4:   (29723, 110)
ADL5:   (27997, 110)
Drill:  (49009, 110)

Training samples:  131959 
Test samples:       48486 
Features:             110


In [14]:
# decision to overcome the problem of entire missing columns
X_train = np.nan_to_num(X_train)
X_test = np.nan_to_num(X_test)

# features normalization
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# switch to one hot encoded labels
onehot_encoder = OneHotEncoder(sparse=False,categorical_features='all')

Y_train_oh = onehot_encoder.fit_transform(Y_train.reshape(-1, 1))
Y_test_oh = onehot_encoder.fit_transform(Y_test.reshape(-1, 1))
print("\nClasses in training set: ", Y_train_oh.shape[1],\
      "\nClasses in test set:     ", Y_test_oh.shape[1])


Classes in training set:  4 
Classes in test set:      4


Preparation of data in a input-suitable form

In [15]:
X_train_s, Y_train_s = utils.prepareData(X_train, Y_train_oh, window_size, stride, shuffle=False)
X_test_s, Y_test_s = utils.prepareData(X_test, Y_test_oh, window_size, stride, shuffle=False)


Features have shape:  (8796, 15, 110) 
Labels have shape:    (8796, 4) 
Fraction of labels:   [0.46737153 0.29115507 0.2186221  0.0228513 ]

Features have shape:  (3231, 15, 110) 
Labels have shape:    (3231, 4) 
Fraction of labels:   [0.36892603 0.28071804 0.31104921 0.03930672]


In [16]:
classes = np.max((Y_train_oh.shape[1], Y_test_oh.shape[1]))

# Classification Performances

## 1D Convolutional Neural Network

In [17]:
model_unidim = utils.Model1D((window_size, features), classes)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_4 (Conv1D)            (None, 15, 18)            9918      
_________________________________________________________________
batch_normalization_4 (Batch (None, 15, 18)            72        
_________________________________________________________________
leaky_re_lu_5 (LeakyReLU)    (None, 15, 18)            0         
_________________________________________________________________
max_pooling1d_4 (MaxPooling1 (None, 8, 18)             0         
_________________________________________________________________
conv1d_5 (Conv1D)            (None, 8, 36)             4572      
_________________________________________________________________
batch_normalization_5 (Batch (None, 8, 36)             144       
_________________________________________________________________
leaky_re_lu_6 (LeakyReLU)    (None, 8, 36)             0         
__________

In [18]:
opt = Adam(lr=0.01)
model_unidim.compile(optimizer = opt, loss = "categorical_crossentropy", metrics = ["accuracy"])

model_unidim.fit(x = X_train_s, y = Y_train_s, epochs = 15, batch_size = 128, validation_data=(X_test_s, Y_test_s))

Train on 8796 samples, validate on 3231 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x1362c178fd0>

In [9]:
# predict labels
Y_pred_s = model_unidim.predict(X_test_s)

In [None]:
# print results
#reverse the one-hot encoder procedure
Y_test_hard = np.argmax(Y_test_s, axis=1)
Y_pred_hard = np.argmax(Y_pred_s, axis=1)

print("F1-measure: ", utils.f1_score(Y_test_hard, Y_pred_hard, average='weighted'))
print("AUC w.r. to each class: ", utils.AUC(Y_test_s, Y_pred_s, classes))

# Compute and plot confusion matrix
cnf_matrix = utils.confusion_matrix(Y_test_hard, Y_pred_hard)
np.set_printoptions(precision=2)

plt.figure()
utils.plot_confusion_matrix(cnf_matrix,classes = [1,2,4,5],
                      title='Confusion matrix, without normalization')