# HDA - Project 3: TASK B
Set the subject number 'sub' to a value between [1,4] and root to the folder where '.mat' files are stored

In [1]:
subject = 1
root = "./data/full/"

In [2]:
import numpy as np
import scipy.io
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import f1_score, roc_curve, auc
import matplotlib as plt

import warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore",category=FutureWarning)
    
    from keras import regularizers
    from keras.layers import Conv1D, BatchNormalization, Dropout, LeakyReLU, Flatten, Activation, Dense, MaxPooling1D
    from keras.models import Model, Sequential
    from keras.optimizers import Adam
    import keras.backend as K
    
K.set_image_data_format('channels_last')

%matplotlib inline

Using TensorFlow backend.


In [3]:
s = "S" + str(subject) + "-"
x1 = scipy.io.loadmat(root + s + "ADL1", mdict={'filled_features':'features', 'labels':'labels'})
x2 = scipy.io.loadmat(root + s + "ADL2", mdict={'filled_features':'features', 'labels':'labels'})
x3 = scipy.io.loadmat(root + s + "ADL3", mdict={'filled_features':'features', 'labels':'labels'})
x4 = scipy.io.loadmat(root + s + "ADL4", mdict={'filled_features':'features', 'labels':'labels'})
x5 = scipy.io.loadmat(root + s + "ADL5", mdict={'filled_features':'features', 'labels':'labels'})
x6 = scipy.io.loadmat(root + s + "Drill", mdict={'filled_features':'features', 'labels':'labels'})

In [4]:
print("Session shapes:\n")
print("ADL1:  ", x1['filled_features'].shape)
print("ADL2:  ", x2['filled_features'].shape)
print("ADL3:  ", x3['filled_features'].shape)
print("ADL4:  ", x4['filled_features'].shape)
print("ADL5:  ", x5['filled_features'].shape)
print("Drill: ", x6['filled_features'].shape)
print("\nTraining set: ADL1 + ADL2 + ADL3 + Drill = ", x1['filled_features'].shape[0] + x2['filled_features'].shape[0] + x3['filled_features'].shape[0] + x6['filled_features'].shape[0])
print("Test set: ADL4 + ADL5 = ", x4['filled_features'].shape[0] + x5['filled_features'].shape[0])

Session shapes:

ADL1:   (51116, 113)
ADL2:   (32224, 113)
ADL3:   (33273, 113)
ADL4:   (32955, 113)
ADL5:   (30127, 113)
Drill:  (54966, 113)

Training set: ADL1 + ADL2 + ADL3 + Drill =  171579
Test set: ADL4 + ADL5 =  63082


In [5]:
# features
X_train = np.concatenate((x1['filled_features'],x2['filled_features'],x3['filled_features'],x6['filled_features']),axis=0)
X_test = np.concatenate((x4['filled_features'],x5['filled_features']),axis=0)

# labels (locomotion activity)
Y_train = np.concatenate((x1['labels'][:,0],x2['labels'][:,0],x3['labels'][:,0],x6['labels'][:,0]),axis=0) # here we take just the first column because we want to fulfill the first task
Y_test = np.concatenate((x4['labels'][:,0],x5['labels'][:,0]))
#Y_test = np.concatenate((x4['labels'][:,0],x5['labels'][:,0]),axis=0)

# Initial decision to overcome the problem of entire missing columns
X_train = np.nan_to_num(X_train)
X_test = np.nan_to_num(X_test)

print("X_train shape: ", X_train.shape)
print("X_test shape:  ", X_test.shape)

X_train shape:  (171579, 113)
X_test shape:   (63082, 113)


In [6]:
# fatures normalization
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
label_max = np.max(Y_train)
print("The higest label value is: ", label_max)
onehot_encoder = OneHotEncoder(sparse=False)
YOH_train = onehot_encoder.fit_transform(Y_train.reshape(-1, 1))
YOH_test = onehot_encoder.fit_transform(Y_test.reshape(-1, 1))

print("YOH_train shape: ", YOH_train.shape)
print("YOH_test shape: ", YOH_test.shape)

The higest label value is:  5
YOH_train shape:  (171579, 5)
YOH_test shape:  (63082, 5)


In [8]:
window_size = 10
stride = int(window_size / 2)
num_features = X_train.shape[1]
samples, classes = YOH_train.shape
print("Number of samples: ", samples, "\nNumber of classes: ", classes)

windows = int(samples // stride) - 1
print("Number of windows: ", windows)

Number of samples:  171579 
Number of classes:  5
Number of windows:  34314


In [9]:
num_features_test = X_test.shape[1]
samples_test, classes_test = YOH_test.shape
print("Number of samples: ", samples_test, "\nNumber of classes: ", classes_test)

windows_test = int(samples_test // stride) - 1
print("Number of windows: ", windows_test)

Number of samples:  63082 
Number of classes:  5
Number of windows:  12615


In [10]:
cube = np.zeros([windows, window_size, num_features])
cube.shape
cube_test = np.zeros([windows_test, window_size, num_features])
cube_test.shape
lab_cum = np.zeros([windows])
lab_cum.shape
lab_cum_test = np.zeros([windows_test])
lab_cum_test.shape

for w in range(windows):
    index = int(w * stride)
    cube[w,:,:] = X_train[index:index+window_size, :].reshape((window_size,num_features))
    l = YOH_train[index:index+window_size,:] # shape 50 x 5
    #print(np.sum(l, axis=0),np.argmax(np.sum(l, axis=0)))
    lab_cum[w] = np.argmax(np.sum(l, axis=0))
    
for w in range(windows_test):
    index = int(w * stride)
    cube_test[w,:,:] = X_test[index:index+window_size, :].reshape((window_size,num_features))
    l = YOH_test[index:index+window_size,:] # shape 50 x 5
    #print(np.sum(l, axis=0),np.argmax(np.sum(l, axis=0)))
    lab_cum_test[w] = np.argmax(np.sum(l, axis=0))

#print('cumulative labels: ', lab_cum.shape, type(lab_cum), "\n", lab_cum)
YOH_train_final = onehot_encoder.fit_transform(lab_cum.reshape(-1, 1))
print(YOH_train_final)

YOH_test_final = onehot_encoder.fit_transform(lab_cum_test.reshape(-1, 1))
print(YOH_test_final)

[[1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 ...
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]]
[[1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 ...
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]]


In [None]:
def Model1D(input_shape):
    
    """ 
    Arguments:
    input_shape -- shape of the images of the dataset

    Returns: 
    model -- a Model() instance in Keras
    """
    
    model = Sequential()
    model.add(Conv1D(filters = 18,
                    kernel_size=5,
                    strides=1,
                    padding='same',
                    input_shape = input_shape))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size=2,
                          strides=2,
                          padding='same'))
    
    model.add(Conv1D(filters = 36,
                    kernel_size=7,
                    strides=1,
                    padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size=2,
                          strides=2,
                          padding='same'))
    
    model.add(Dropout(0.2))
    
    model.add(Conv1D(filters = 72,
                    kernel_size=7,
                    strides=1,
                    padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size=2,
                          strides=2,
                          padding='same'))
    
    #model.add(Conv1D(filters = 144,
    #                kernel_size=7,
    #                strides=1,
    #                padding='same'))
    #model.add(BatchNormalization())
    #model.add(Activation('relu'))
    #model.add(MaxPooling1D(pool_size=2,
    #                      strides=2,
    #                      padding='same'))
    
    model.add(Flatten())
    
    model.add(Dense(64, kernel_regularizer=regularizers.l2(0.01)))
    model.add(Activation('relu'))
    
    model.add(Dropout(0.4))

    model.add(Dense(classes))
    model.add(Activation('softmax'))
    
    #model.summary()
    
    return model

In [None]:
model_unidim = Model1D((10,113))

opt = Adam(lr=0.01)
model_unidim.compile(optimizer = opt, loss = "categorical_crossentropy", metrics = ["accuracy"])

model_unidim.fit(x = cube, y = YOH_train_final, epochs = 5, batch_size = 128, validation_data=(cube_test,YOH_test_final))

Train on 34314 samples, validate on 12615 samples
Epoch 1/5


In [None]:
y_pred = model_unidim.predict(cube_test) # predictions expressed in terms of probabilities
y_pred_hard = np.argmax(y_pred, axis=1) # "hard" predictions inferred from previous max probability
YOH_pred = onehot_encoder.fit_transform(y_pred_hard.reshape(-1, 1)) # one hot encoded to male it consistent

#print("y_pred:\n", y_pred)
#print("\ny_pred_hard:\n", y_pred_hard)
#print("\nYOH_pred:\n", YOH_pred)

In [None]:
def AUC(y_true, y_pred):
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(classes):
        fpr[i], tpr[i], _ = roc_curve(y_true[:, i], y_pred[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    return roc_auc

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
print("F1-measure: ", f1_score(YOH_test_final, YOH_pred, average='weighted'))
print("AUC w.r. to each class: ", AUC(YOH_test_final, YOH_pred))

# Compute and plot confusion matrix
y_test_hard = np.argmax(YOH_test_final, axis=1) #reverse the one-ot encoder procedure
cnf_matrix = confusion_matrix(y_test_hard, y_pred_hard)
np.set_printoptions(precision=2)

plt.figure()
plot_confusion_matrix(cnf_matrix, classes=[1,2,4,5],
                      title='Confusion matrix, without normalization')