# Keras Implementation - Human Data Analytics Project 

Importing all modules that we need for our elaboration

In [1]:
import numpy as np
import scipy.io
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import f1_score, roc_curve, auc
import matplotlib as plt

import warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore",category=FutureWarning)
    
    from keras import regularizers
    from keras.layers import Conv1D, BatchNormalization, Dropout, LeakyReLU, Flatten, Activation, Dense, MaxPooling1D
    from keras.models import Model, Sequential
    from keras.optimizers import Adam
    import keras.backend as K
    
K.set_image_data_format('channels_last')

%matplotlib inline

Using TensorFlow backend.


# Preprocessing Section
Importing data sessions for subject 1

In [2]:
x1 = scipy.io.loadmat("prep/A_nonzero/S1-ADL1", mdict={'filled_features':'features', 'labels':'labels'})
x2 = scipy.io.loadmat("prep/A_nonzero/S1-ADL2", mdict={'filled_features':'features', 'labels':'labels'})
x3 = scipy.io.loadmat("prep/A_nonzero/S1-ADL3", mdict={'filled_features':'features', 'labels':'labels'})
x4 = scipy.io.loadmat("prep/A_nonzero/S1-ADL4", mdict={'filled_features':'features', 'labels':'labels'})
x5 = scipy.io.loadmat("prep/A_nonzero/S1-ADL5", mdict={'filled_features':'features', 'labels':'labels'})
x6 = scipy.io.loadmat("prep/A_nonzero/S1-Drill", mdict={'filled_features':'features', 'labels':'labels'})

Check the number of recorded samples for each session

In [3]:
print("Session shapes:\n")
print("ADL1:  ", x1['filled_features'].shape)
print("ADL2:  ", x2['filled_features'].shape)
print("ADL3:  ", x3['filled_features'].shape)
print("ADL4:  ", x4['filled_features'].shape)
print("ADL5:  ", x5['filled_features'].shape)
print("Drill: ", x6['filled_features'].shape)
print("\nTraining set: ADL1 + ADL2 + ADL3 + Drill = ", x1['filled_features'].shape[0] + x2['filled_features'].shape[0] + x3['filled_features'].shape[0] + x6['filled_features'].shape[0])
print("Test set: ADL4 + ADL5 = ", x4['filled_features'].shape[0] + x5['filled_features'].shape[0])

Session shapes:

ADL1:   (37507, 113)
ADL2:   (24510, 113)
ADL3:   (25305, 113)
ADL4:   (24851, 113)
ADL5:   (22440, 113)
Drill:  (52105, 113)

Training set: ADL1 + ADL2 + ADL3 + Drill =  139427
Test set: ADL4 + ADL5 =  47291


We split the data into train and test set.

In [4]:
# features
X_train = np.concatenate((x1['filled_features'],x2['filled_features'],x3['filled_features'],x6['filled_features']),axis=0)
X_test = np.concatenate((x4['filled_features'],x5['filled_features']),axis=0)

# labels (locomotion activity)
Y_train = np.concatenate((x1['labels'][:,0],x2['labels'][:,0],x3['labels'][:,0],x6['labels'][:,0]),axis=0) # here we take just the first column because we want to fulfill the first task
Y_test = np.concatenate((x4['labels'][:,0],x5['labels'][:,0]))
#Y_test = np.concatenate((x4['labels'][:,0],x5['labels'][:,0]),axis=0)

# Initial decision to overcome the problem of entire missing columns
X_train = np.nan_to_num(X_train)
X_test = np.nan_to_num(X_test)

print("X_train shape: ", X_train.shape)
print("X_test shape: ", X_test.shape)


X_train shape:  (139427, 113)
X_test shape:  (47291, 113)


The dataset needs to be normalized in order to improve performances.

In [5]:
# fatures normalization
scaler = StandardScaler().fit(X_train)
X_train =scaler.transform(X_train)
X_test = scaler.transform(X_test)

We decided to use a onehot encoder for the labels

In [6]:
label_max = np.max(Y_train)
print("The higest label value is: ", label_max)
onehot_encoder = OneHotEncoder(n_values=label_max+1, sparse=False)
YOH_train = onehot_encoder.fit_transform(Y_train.reshape(-1, 1))
YOH_test = onehot_encoder.fit_transform(Y_test.reshape(-1, 1))

print("YOH_train shape: ", YOH_train.shape)
print("YOH_test shape: ", YOH_test.shape)

The higest label value is:  5
YOH_train shape:  (139427, 6)
YOH_test shape:  (47291, 6)


In [7]:
window_size = 10
stride = int(window_size / 2)
num_features = X_train.shape[1]
samples, classes = YOH_train.shape
print("Number of samples: ", samples, "\nNumber of classes: ", classes)

windows = int(samples // stride) - 1
print("Number of windows: ", windows)

Number of samples:  139427 
Number of classes:  6
Number of windows:  27884


In [8]:
num_features_test = X_test.shape[1]
samples_test, classes_test = YOH_test.shape
print("Number of samples: ", samples_test, "\nNumber of classes: ", classes_test)

windows_test = int(samples_test // stride) - 1
print("Number of windows: ", windows_test)

Number of samples:  47291 
Number of classes:  6
Number of windows:  9457


In [9]:
cube = np.zeros([windows, window_size, num_features])
cube.shape

(27884, 10, 113)

In [10]:
cube_test = np.zeros([windows_test, window_size, num_features])
cube_test.shape

(9457, 10, 113)

In [11]:
lab_cum = np.zeros([windows])
lab_cum.shape

(27884,)

In [12]:
lab_cum_test = np.zeros([windows_test])
lab_cum_test.shape

(9457,)

In [13]:
for w in range(windows):
    index = int(w * stride)
    cube[w,:,:] = X_train[index:index+window_size, :].reshape((window_size,num_features))
    l = YOH_train[index:index+window_size,:] # shape 50 x 5
    #print(np.sum(l, axis=0),np.argmax(np.sum(l, axis=0)))
    lab_cum[w] = np.argmax(np.sum(l, axis=0))

In [14]:
for w in range(windows_test):
    index = int(w * stride)
    cube_test[w,:,:] = X_test[index:index+window_size, :].reshape((window_size,num_features))
    l = YOH_test[index:index+window_size,:] # shape 50 x 5
    #print(np.sum(l, axis=0),np.argmax(np.sum(l, axis=0)))
    lab_cum_test[w] = np.argmax(np.sum(l, axis=0))

In [15]:
#print('cumulative labels: ', lab_cum.shape, type(lab_cum), "\n", lab_cum)
YOH_train_final = onehot_encoder.fit_transform(lab_cum.reshape(-1, 1))
print(YOH_train_final)

[[0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 ...
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]]


In [16]:
YOH_test_final = onehot_encoder.fit_transform(lab_cum_test.reshape(-1, 1))
print(YOH_test_final)

[[0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 ...
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]]


In [17]:
def TestModel(input_shape):
    """ 
    Arguments:
    input_shape -- shape of the images of the dataset

    Returns: 
    model -- a Model() instance in Keras
    """
    
    model = Sequential()
    model.add(Conv1D(filters = 18,
                    kernel_size=5,
                    strides=1,
                    padding='same',
                    input_shape = input_shape))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size=2,
                          strides=2,
                          padding='same'))
    
    model.add(Conv1D(filters = 36,
                    kernel_size=7,
                    strides=1,
                    padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size=2,
                          strides=2,
                          padding='same'))
    
    model.add(Dropout(0.2))
    
    model.add(Conv1D(filters = 72,
                    kernel_size=7,
                    strides=1,
                    padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size=2,
                          strides=2,
                          padding='same'))
    
    #model.add(Conv1D(filters = 144,
    #                kernel_size=7,
    #                strides=1,
    #                padding='same'))
    #model.add(BatchNormalization())
    #model.add(Activation('relu'))
    #model.add(MaxPooling1D(pool_size=2,
    #                      strides=2,
    #                      padding='same'))
    
    model.add(Flatten())
    
    model.add(Dense(64, kernel_regularizer=regularizers.l2(0.01)))
    model.add(Activation('relu'))
    
    model.add(Dropout(0.4))

    model.add(Dense(classes))
    model.add(Activation('softmax'))
    
    #model.summary()
    
    return model

In [18]:
model_test = TestModel((10,113))

In [19]:
opt = Adam(lr=0.01)
model_test.compile(optimizer = opt, loss = "categorical_crossentropy", metrics = ["accuracy"])

In [20]:
model_test.fit(x = cube, y = YOH_train_final, epochs = 50, batch_size = 128, validation_data=(cube_test,YOH_test_final))

Train on 27884 samples, validate on 9457 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x1f000e2ad30>

In [21]:
y_pred = model_test.predict(cube_test)
print("y_pred:\n", y_pred)

y_pred_hard = np.argmax(y_pred, axis=1)
print("\ny_pred_hard:\n", y_pred_hard)

YOH_pred = onehot_encoder.fit_transform(y_pred_hard.reshape(-1, 1))
print("\nYOH_pred:\n", YOH_pred)

y_pred:
 [[1.3969077e-24 9.9998748e-01 1.2481253e-05 7.8678078e-26 7.0251398e-32
  9.6364861e-20]
 [1.8353447e-24 9.9998665e-01 1.3386027e-05 1.0546289e-25 1.1443007e-31
  1.2776442e-19]
 [4.2821142e-22 9.9994445e-01 5.5555483e-05 3.4438199e-23 9.8919007e-28
  1.3877968e-17]
 ...
 [0.0000000e+00 0.0000000e+00 1.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00]
 [0.0000000e+00 0.0000000e+00 1.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00]
 [0.0000000e+00 0.0000000e+00 1.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00]]

y_pred_hard:
 [1 1 1 ... 2 2 2]

YOH_pred:
 [[0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 ...
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]]


In [22]:
print("F1-measure: ", f1_score(YOH_test_final, YOH_pred, average='weighted'))

# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(classes):
    fpr[i], tpr[i], _ = roc_curve(YOH_test_final[:, i], YOH_pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

print("ROC-AUC measure: ", roc_auc)

F1-measure:  0.9171759300164628
ROC-AUC measure:  {0: nan, 1: 0.9210150991613577, 2: 0.9000555068992679, 3: nan, 4: 0.9970149687441026, 5: 0.9253393665158371}


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [23]:
plt.

SyntaxError: invalid syntax (<ipython-input-23-efb1a9b93a17>, line 1)