# HDA - Project 3

This first cell contains the hyper-parameters that can be tuned for code execution:
- subject: select the subject on which to test the model, between [1,4];
- folder: directory name where '.mat' files are stored;
- label_col: column of features to be selected to perform activity detection, between [0,6];
- window_size: parameter that sets the length of temporal windows on which to perform the convolution;
- stride: step length to chose the next window.

In [12]:
import utils
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import matplotlib.pyplot as plt
from keras import regularizers
from keras.activations import relu
from keras.layers import Conv1D, Conv2D, BatchNormalization, Dropout, LeakyReLU, Flatten, Activation, Dense, MaxPooling1D, MaxPooling2D, LSTM, Reshape, TimeDistributed, Bidirectional
from keras.models import Model, Sequential
from keras.optimizers import Adam

In [13]:
# parameters definition

subject = 1
folder = "./data/full/"
label_col = 0     # default for task A
window_size = 64
stride = 3
null_class = True

if(null_class):
    n_classes = 5
    classes = [0,1,2,4,5]
else:
    n_classes = 4
    classes = [1,2,4,5]

# Section 1 - Loading and Preprocessing

### Dataset Loading 

In [14]:
# import all sessions for a subject
(data1, data2, data3, data4, data5, data6) = utils.loadData(subject, folder=folder)

# create training set and test set
X_train = np.concatenate((data1['features_interp'],\
                          data2['features_interp'],\
                          data3['features_interp'],\
                          data6['features_interp']), axis=0)

Y_train = np.concatenate((data1['labels_cut'][:,label_col],\
                          data2['labels_cut'][:,label_col],\
                          data3['labels_cut'][:,label_col],\
                          data6['labels_cut'][:,label_col]), axis=0)

X_test = np.concatenate((data4['features_interp'],\
                         data5['features_interp']), axis=0)

Y_test = np.concatenate((data4['labels_cut'][:,label_col],\
                         data5['labels_cut'][:,label_col]))

features = X_test.shape[1]
print("\nTraining samples: ", X_train.shape[0],\
      "\nTest samples:      ", X_test.shape[0],\
      "\nFeatures:            ", features)

# decision to overcome the problem of entire missing columns
X_train = np.nan_to_num(X_train)
X_test = np.nan_to_num(X_test)

# features normalization
scaler = StandardScaler().fit(X_train)
X_train =scaler.transform(X_train)
X_test = scaler.transform(X_test)

# switch to one hot encoded labels
onehot_encoder = OneHotEncoder(sparse=False)
Y_train_oh = onehot_encoder.fit_transform(Y_train.reshape(-1, 1))
Y_test_oh = onehot_encoder.fit_transform(Y_test.reshape(-1, 1))
print("\nClasses in training set: ", Y_train_oh.shape[1],\
      "\nClasses in test set:     ", Y_test_oh.shape[1])

print("Training set:")
X_train_s, Y_train_s = utils.prepareData(X_train, Y_train_oh, window_size, stride, shuffle=False, null_class = null_class)
print("\nTest set:")
X_test_s, Y_test_s = utils.prepareData(X_test, Y_test_oh, window_size, stride, shuffle=False, null_class = null_class)
# add bars plot


Session shapes:
ADL1:   (45810, 110)
ADL2:   (28996, 110)
ADL3:   (30167, 110)
ADL4:   (30228, 110)
ADL5:   (27308, 110)
Drill:  (52152, 110)

Training samples:  157125 
Test samples:       57536 
Features:             110

Classes in training set:  5 
Classes in test set:      5
Training set:
<class 'numpy.ndarray'> (52354, 64, 110) <class 'numpy.ndarray'> (52354, 5)

Features have shape:  (52354, 64, 110) 
Labels have shape:    (52354, 5) 
Fraction of labels:   [0.10988654 0.41987241 0.27476411 0.17119991 0.02427704]

Test set:
<class 'numpy.ndarray'> (19157, 64, 110) <class 'numpy.ndarray'> (19157, 5)

Features have shape:  (19157, 64, 110) 
Labels have shape:    (19157, 5) 
Fraction of labels:   [0.17742862 0.34337318 0.20290233 0.23771989 0.03857598]


# Test with Deep CNN and LSTM

# Conv2D

In [15]:
def ModelDeep2D(input_shape, classes, withSoftmax = True):
    """ 
    Arguments:
    input_shape -- shape of the images of the dataset

    Returns: 
    model -- a Model() instance in Keras
    """
    
    model = Sequential()
  
    model.add(BatchNormalization(input_shape = input_shape))
    model.add(Conv2D(filters = 50,
                    kernel_size = (11,1),
                    activation='relu'))
    
    model.add(MaxPooling2D(pool_size=(2,1)))
    
    model.add(Reshape((27,5500)))
    
    model.add(LSTM(100,
                  return_sequences=True))
    
    model.add(LSTM(100))
    
    model.add(Dense(512,activation = 'relu'))
    
    if withSoftmax:
        model.add(Dense(classes, activation = 'softmax'))
    
    model.summary()
    
    return model

In [16]:
model_temp = ModelDeep2D((window_size,features,1), n_classes)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_4 (Batch (None, 64, 110, 1)        4         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 54, 110, 50)       600       
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 27, 110, 50)       0         
_________________________________________________________________
reshape_3 (Reshape)          (None, 27, 5500)          0         
_________________________________________________________________
lstm_5 (LSTM)                (None, 27, 100)           2240400   
_________________________________________________________________
lstm_6 (LSTM)                (None, 100)               80400     
_________________________________________________________________
dense_4 (Dense)              (None, 512)               51712     
Total para

In [18]:
opt4 = Adam(lr=0.01)
model_temp.compile(optimizer = opt4, loss = "categorical_crossentropy", metrics = ["accuracy"])

# need to reshape in order to fit to the new 2D model
X_train = X_train_s.reshape(X_train_s.shape[0], window_size, features, 1)
X_test = X_test_s.reshape(X_test_s.shape[0], window_size, features, 1)

In [21]:
model_temp.fit(x = X_train, 
               y = Y_train_s, 
               epochs = 25, 
               batch_size = 200,
               verbose = 1,
               validation_data=(X_test, Y_test_s))

ValueError: Error when checking target: expected dense_4 to have shape (512,) but got array with shape (5,)

In [None]:
# Load the data and labels
print('Loading the training and testing data ...')
trainingShape = X_train.shape
testingShape = X_test.shape
assert trainingShape[1] == testingShape[1] # Window size
assert trainingShape[2] == testingShape[2] # Nb of sensors

nbTrainingExamples = trainingShape[0]
assert len(y_train) == nbTrainingExamples
print('   %d training examples loaded' % (nbTrainingExamples))

nbTestingExamples = testingShape[0]
assert len(y_test) == nbTestingExamples
print('   %d testing examples loaded' % (nbTestingExamples))

# Allocate the feature arrays
featureSize = 512
trainingDnnFeatures = np.empty((nbTrainingExamples,featureSize),dtype=np.float32)
testingDnnFeatures = np.empty((nbTestingExamples,featureSize),dtype=np.float32)

print('Computing DNN features on the training set...')
idx = 0
while idx < nbTrainingExamples:
    if idx + batchSize < nbTrainingExamples:
        endIdx = idx+batchSize
        size = batchSize
    else:
        endIdx = nbTrainingExamples
        size = nbTrainingExamples-idx
    predictions = model.predict(x_train[idx:endIdx],batch_size=size)
    trainingDnnFeatures[idx:endIdx] = predictions
    idx += batchSize

print('Computing DNN features on the testing set...')
idx = 0
while idx < nbTestingExamples:
    if idx + batchSize < nbTestingExamples:
        endIdx = idx+batchSize
        size = batchSize
    else:
        endIdx = nbTestingExamples
        size = nbTestingExamples-idx
    predictions = model.predict(x_test[idx:endIdx],batch_size=size)
    testingDnnFeatures[idx:endIdx] = predictions
    idx += batchSize

# Save features and labels
#print('Saving results ...')

#np.save('/dnnFeatures_training.npy',trainingDnnFeatures)
#np.save('/dnnLabels_training.npy',y_train)
#np.save('/dnnFeatures_testing.npy',testingDnnFeatures)
#np.save('/dnnLabels_testing.npy',y_test)

In [None]:

#trainingData = np.load(trainingDataPath)
#trainingLabels = np.load(trainingLabelsPath)
#testingData = np.load(testingDataPath)
#testingLabels = np.load(testingLabelsPath)

# Train the linear SVM model

C = [2**(-6)]

for idx in range(len(C)):
    print('Training the model with C = %.4f' % (C[idx]))
    classifier = LinearSVC(C=C[idx])
    classifier.fit(trainingDnnFeatures,Y_train_s)

    # Evaluate the model on the testing set
    print('   Evaluating the model')
    estimatedLabels = classifier.predict(testingDnnFeatures)

    # Compute the accuracy, weighted F1-score and average F1-score
    accuracy = utils.accuracy_score(Y_test_s,estimatedLabels)
    weightedF1 = utils.f1_score(Y_test_s,estimatedLabels,average='weighted')

    # Print results
    print('   Test accuracy = %.2f %%' % (accuracy*100))
    print('   Weighted F1-score = %.4f' % (weightedF1))


In [11]:
#Y_pred_s = model_temp.predict(X_test)

# print results
#reverse the one-ot encoder procedure
#Y_test_hard = np.argmax(Y_test_s, axis=1)
#Y_pred_hard = np.argmax(Y_pred_s, axis=1)

#print("F1-measure: ", utils.f1_score(Y_test_hard, Y_pred_hard, average='weighted'))
#print("AUC w.r. to each class: ", utils.AUC(Y_test_s, Y_pred_s, 5))

# Compute and plot confusion matrix
#cnf_matrix = utils.confusion_matrix(Y_test_hard, Y_pred_hard)
#np.set_printoptions(precision=2)

#plt.figure()
#utils.plot_confusion_matrix(cnf_matrix, classes=classes,
#                      title='Confusion matrix, without normalization')