# HDA - Project 3: TASK A

Rows labelled with category 0 has been dropped, so we don't consider the _idle_ state of the user. 
### In this context, first we dropped the rows and then missing values are interpolated using splines

In [None]:
#import of all modules

import numpy as np
import scipy.io
import matplotlib.pyplot as plt
import itertools

from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import f1_score, roc_curve, auc, confusion_matrix

import warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore",category=FutureWarning)
    
    from keras import regularizers
    from keras.layers import Conv1D, Conv2D, BatchNormalization, Dropout, LeakyReLU, Flatten, Activation, Dense, MaxPooling1D, MaxPooling2D
    from keras.models import Model, Sequential
    from keras.optimizers import Adam
    import keras.backend as K
    
K.set_image_data_format('channels_last')

%matplotlib inline

In the following cell we put all the useful function that we need for **preprocessing** and **results visualization**.

In [None]:
def groupSamples()
    for w in range(windows):
        index = int(w * stride)
        cube[w,:,:] = X_train[index:index+window_size, :].reshape((window_size,num_features))
        l = YOH_train[index:index+window_size,:] # shape 50 x 5
        #print(np.sum(l, axis=0),np.argmax(np.sum(l, axis=0)))
        lab_cum[w] = np.argmax(np.sum(l, axis=0))

This part of preprocessing has to be intended to be temporary, in fact we need to extend this elaborations to the bigger set of samples.

In [None]:
x1 = scipy.io.loadmat("prep/A_nonzero/S1-ADL1", mdict={'filled_features':'features', 'labels':'labels'})
x2 = scipy.io.loadmat("prep/A_nonzero/S1-ADL2", mdict={'filled_features':'features', 'labels':'labels'})
x3 = scipy.io.loadmat("prep/A_nonzero/S1-ADL3", mdict={'filled_features':'features', 'labels':'labels'})
x4 = scipy.io.loadmat("prep/A_nonzero/S1-ADL4", mdict={'filled_features':'features', 'labels':'labels'})
x5 = scipy.io.loadmat("prep/A_nonzero/S1-ADL5", mdict={'filled_features':'features', 'labels':'labels'})
x6 = scipy.io.loadmat("prep/A_nonzero/S1-Drill", mdict={'filled_features':'features', 'labels':'labels'})

print("Session shapes:\n")
print("ADL1:  ", x1['filled_features'].shape)
print("ADL2:  ", x2['filled_features'].shape)
print("ADL3:  ", x3['filled_features'].shape)
print("ADL4:  ", x4['filled_features'].shape)
print("ADL5:  ", x5['filled_features'].shape)
print("Drill: ", x6['filled_features'].shape)
print("\nTraining set: ADL1 + ADL2 + ADL3 + Drill = ", x1['filled_features'].shape[0] + x2['filled_features'].shape[0] + x3['filled_features'].shape[0] + x6['filled_features'].shape[0])
print("Test set: ADL4 + ADL5 = ", x4['filled_features'].shape[0] + x5['filled_features'].shape[0])

# We split the data into train and test set.

# features
X_train = np.concatenate((x1['filled_features'],x2['filled_features'],x3['filled_features'],x6['filled_features']),axis=0)
X_test = np.concatenate((x4['filled_features'],x5['filled_features']),axis=0)

# labels (locomotion activity)
Y_train = np.concatenate((x1['labels'][:,0],x2['labels'][:,0],x3['labels'][:,0],x6['labels'][:,0]),axis=0) # here we take just the first column because we want to fulfill the first task
Y_test = np.concatenate((x4['labels'][:,0],x5['labels'][:,0]))

# Decision to overcome the problem of entire missing columns
X_train = np.nan_to_num(X_train)
X_test = np.nan_to_num(X_test)

print("X_train shape: ", X_train.shape)
print("X_test shape: ", X_test.shape)

Once we have both training and test set we can proceed with the definition of classes using one-hot encoding procedure.

In [None]:
# features normalization
scaler = StandardScaler().fit(X_train)
X_train =scaler.transform(X_train)
X_test = scaler.transform(X_test)

We decided to use a onehot encoder for the labels

onehot_encoder = OneHotEncoder(sparse=False)
YOH_train = onehot_encoder.fit_transform(Y_train.reshape(-1, 1))
YOH_test = onehot_encoder.fit_transform(Y_test.reshape(-1, 1))

print("YOH_train shape: ", YOH_train.shape)
print("YOH_test shape: ", YOH_test.shape)

window_size = 15
stride = int(window_size)
num_features = X_train.shape[1]
samples, classes = YOH_train.shape
print("Number of samples: ", samples, "\nNumber of classes: ", classes)

windows = int(samples // stride) - 1
print("Number of windows: ", windows)

num_features_test = X_test.shape[1]
samples_test, classes_test = YOH_test.shape
print("Number of samples: ", samples_test, "\nNumber of classes: ", classes_test)

windows_test = int(samples_test // stride) - 1
print("Number of windows: ", windows_test)

cube = np.zeros([windows, window_size, num_features])
cube.shape

cube_test = np.zeros([windows_test, window_size, num_features])
cube_test.shape

lab_cum = np.zeros([windows])
lab_cum.shape

lab_cum_test = np.zeros([windows_test])
lab_cum_test.shape

for w in range(windows):
    index = int(w * stride)
    cube[w,:,:] = X_train[index:index+window_size, :].reshape((window_size,num_features))
    l = YOH_train[index:index+window_size,:] # shape 50 x 5
    #print(np.sum(l, axis=0),np.argmax(np.sum(l, axis=0)))
    lab_cum[w] = np.argmax(np.sum(l, axis=0))

for w in range(windows_test):
    index = int(w * stride)
    cube_test[w,:,:] = X_test[index:index+window_size, :].reshape((window_size,num_features))
    l = YOH_test[index:index+window_size,:] # shape 50 x 5
    #print(np.sum(l, axis=0),np.argmax(np.sum(l, axis=0)))
    lab_cum_test[w] = np.argmax(np.sum(l, axis=0))

#print('cumulative labels: ', lab_cum.shape, type(lab_cum), "\n", lab_cum)
YOH_train_final = onehot_encoder.fit_transform(lab_cum.reshape(-1, 1))
print(YOH_train_final)

YOH_test_final = onehot_encoder.fit_transform(lab_cum_test.reshape(-1, 1))
print(YOH_test_final)