# Keras Implementation - Human Data Analytics Project 

Importing all modules that we need for our elaboration

In [1]:
import numpy as np
import scipy.io

import warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore",category=FutureWarning)
    
    from keras import regularizers
    from keras.layers import Conv1D, BatchNormalization, Dropout, LeakyReLU, Flatten, Activation, Dense, MaxPooling1D
    from keras.models import Model, Sequential
    from keras.optimizers import Adam
    from sklearn.preprocessing import OneHotEncoder, StandardScaler
    from sklearn.metrics import f1_score
    import keras.backend as K
    
K.set_image_data_format('channels_last')

%matplotlib inline

Using TensorFlow backend.


# Preprocessing Section

In [2]:
x1 = scipy.io.loadmat("data_temp/S1-ADL1", mdict={'filled_features':'features', 'labels':'labels'})
x2 = scipy.io.loadmat("data_temp/S1-ADL2", mdict={'filled_features':'features', 'labels':'labels'})
x3 = scipy.io.loadmat("data_temp/S1-ADL3", mdict={'filled_features':'features', 'labels':'labels'})
x4 = scipy.io.loadmat("data_temp/S1-ADL4", mdict={'filled_features':'features', 'labels':'labels'})
x5 = scipy.io.loadmat("data_temp/S1-ADL5", mdict={'filled_features':'features', 'labels':'labels'})
x6 = scipy.io.loadmat("data_temp/S1-Drill", mdict={'filled_features':'features', 'labels':'labels'})

We split the data into train and test set.

In [3]:
X_train = np.concatenate((x1['filled_features'],x2['filled_features'],x3['filled_features'],x6['filled_features']),axis=0)
X_test = np.concatenate((x4['filled_features'],x5['filled_features']),axis=0)

Y_train = np.concatenate((x1['labels'][:,0],x2['labels'][:,0],x3['labels'][:,0],x6['labels'][:,0]),axis=0) # here we take just the first column because we want to fulfill the first task
Y_test = np.concatenate((x4['labels'][:,0],x5['labels'][:,0]))
#Y_test = np.concatenate((x4['labels'][:,0],x5['labels'][:,0]),axis=0)

# Initial decision to overcome the problem of entire missing columns
X_train = np.nan_to_num(X_train)
X_test = np.nan_to_num(X_test)

print("X_train shape: ", X_train.shape)
print("X_test shape: ", X_test.shape)


X_train shape:  (159939, 113)
X_test shape:  (57536, 113)


The dataset needs to be normalized in order to improve performances.

In [4]:
# fatures normalization
scaler = StandardScaler().fit(X_train)
X_train =scaler.transform(X_train)
X_test = scaler.transform(X_test)

We decided to use a onehot encoder for the labels

In [8]:
onehot_encoder = OneHotEncoder(n_values=5, sparse=False)
YOH_train = onehot_encoder.fit_transform(Y_train.reshape(-1, 1))
YOH_test = onehot_encoder.fit_transform(Y_test.reshape(-1, 1))

print("YOH_train shape: ", YOH_train.shape)
print("YOH_test shape: ", YOH_test.shape)

ValueError: Feature out of bounds for n_values=5

In [None]:
window_size = 10
stride = int(window_size / 2)
num_features = X_train.shape[1]
samples, classes = YOH_train.shape
print("Number of samples: ", samples, "\nNumber of classes: ", classes)

windows = int(samples // stride) - 1
print("Number of windows: ", windows)

In [None]:
num_features_test = X_test.shape[1]
samples_test, classes_test = YOH_test.shape
print("Number of samples: ", samples_test, "\nNumber of classes: ", classes_test)

windows_test = int(samples_test // stride) - 1
print("Number of windows: ", windows_test)

In [None]:
cube = np.zeros([windows, window_size, num_features])
cube.shape

In [None]:
cube_test = np.zeros([windows_test, window_size, num_features])
cube_test.shape

In [None]:
lab_cum = np.zeros([windows])
lab_cum.shape

In [None]:
lab_cum_test = np.zeros([windows_test])
lab_cum_test.shape

In [None]:
for w in range(windows):
    index = int(w * stride)
    cube[w,:,:] = X_train[index:index+window_size, :].reshape((window_size,num_features))
    l = YOH_train[index:index+window_size,:] # shape 50 x 5
    #print(np.sum(l, axis=0),np.argmax(np.sum(l, axis=0)))
    lab_cum[w] = np.argmax(np.sum(l, axis=0))

In [None]:
for w in range(windows_test):
    index = int(w * stride)
    cube_test[w,:,:] = X_test[index:index+window_size, :].reshape((window_size,num_features))
    l = YOH_test[index:index+window_size,:] # shape 50 x 5
    #print(np.sum(l, axis=0),np.argmax(np.sum(l, axis=0)))
    lab_cum_test[w] = np.argmax(np.sum(l, axis=0))

In [None]:
#print('cumulative labels: ', lab_cum.shape, type(lab_cum), "\n", lab_cum)
YOH_train_final = onehot_encoder.fit_transform(lab_cum.reshape(-1, 1))
print(YOH_train_final)

In [None]:
YOH_test_final = onehot_encoder.fit_transform(lab_cum_test.reshape(-1, 1))
print(YOH_test_final)

In [None]:
def TestModel(input_shape):
    """ 
    Arguments:
    input_shape -- shape of the images of the dataset

    Returns: 
    model -- a Model() instance in Keras
    """
    
    model = Sequential()
    model.add(Conv1D(filters = 18,
                    kernel_size=5,
                    strides=1,
                    padding='same',
                    input_shape = input_shape))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size=2,
                          strides=2,
                          padding='same'))
    
    model.add(Conv1D(filters = 36,
                    kernel_size=7,
                    strides=1,
                    padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size=2,
                          strides=2,
                          padding='same'))
    
    model.add(Dropout(0.2))
    
    model.add(Conv1D(filters = 72,
                    kernel_size=7,
                    strides=1,
                    padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size=2,
                          strides=2,
                          padding='same'))
    
    #model.add(Conv1D(filters = 144,
    #                kernel_size=7,
    #                strides=1,
    #                padding='same'))
    #model.add(BatchNormalization())
    #model.add(Activation('relu'))
    #model.add(MaxPooling1D(pool_size=2,
    #                      strides=2,
    #                      padding='same'))
    
    model.add(Flatten())
    
    model.add(Dense(64, kernel_regularizer=regularizers.l2(0.01)))
    model.add(Activation('relu'))
    
    model.add(Dropout(0.4))

    model.add(Dense(5))
    model.add(Activation('softmax'))
    
    #model.summary()
    
    return model

In [None]:
model_test = TestModel((10,113))

In [None]:
opt = Adam(lr=0.01)
model_test.compile(optimizer = opt, loss = "categorical_crossentropy", metrics = ["accuracy"])

In [None]:
model_test.fit(x = cube, y = YOH_train_final, epochs = 5, batch_size = 128, validation_data=(cube_test,YOH_test_final))

In [None]:
y_pred = model_test.predict(cube_test)
print("y_pred:\n", y_pred)

y_pred_hard = np.argmax(y_pred, axis=1)
print("\ny_pred_hard:\n", y_pred_hard)

YOH_pred = onehot_encoder.fit_transform(y_pred_hard.reshape(-1, 1))
print("\nYOH_pred:\n", YOH_pred)

In [None]:
print(f1_score(YOH_test_final, YOH_pred, average='weighted'))

# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(classes):
    fpr[i], tpr[i], _ = roc_curve(YOH_test_final[:, i], YOH_pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

print(roc_auc)