# HDA - Project 3

In [1]:
import utils
import deeplearning
import numpy as np
import matplotlib.pyplot as plt

from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score

from keras import regularizers
from keras.activations import relu
from keras.layers import Conv2D, BatchNormalization, Dropout, LeakyReLU, Flatten, Activation, Dense, MaxPooling2D, LSTM, Reshape
from keras.models import Model, Sequential
from keras.optimizers import Adam

Using TensorFlow backend.


This notebook has been written with the purpose of training the neural network (the one that we want) on the complete set of users. Due to the huge amount of data, even for a single user, I fear that we must divide training and classification section (and import distinctly two different times the dataset for each user). I searched the internet for a suitable solution concerning the consecutive creation of new variables at each iteration of the loop cycle, but none was suitable (as I said, I fear that using a dictionary would overflow the memory). 

My idea was to study the difference between the performance when we train the network only on the user of interest, and when we instead use the network trained on all users.

In [2]:
subjects = [1,2,3,4]
sbj = 1
folder = "./data/full/"
#folder = "/floyd/input/hdadataset/full/" # To be used with FloydHub

label = 0     # default for task A
window_size = 64
stride = 3
null_class = True
epochs = 25
batch_size = 300

In [None]:
# neural network definition

n_features = 110 #number of features taken into consideration for the solution of the problem

model = deeplearning.Hybrid((window_size,n_features,1), n_classes)
model.summary() # model visualization

model.compile(optimizer = Adam(lr=0.01), 
                   loss = "categorical_crossentropy", 
                   metrics = ["accuracy"])

In [None]:
# TRAINING

for sbj in subjects:
    
    print('Training for SUBJECT ' % (sbj))
    
    [x_train, y_train, x_test, y_test, n_classes] = utils.preprocessing(sbj,
                                                         folder,
                                                         label,
                                                         window_size,
                                                         stride,
                                                         null_class)
    
    # we need to perform the following operation in order to provide 
    input_train = x_train.reshape(x_train.shape[0], window_size, n_features, 1)
    input_test = x_test.reshape(x_test.shape[0], window_size, n_features, 1) 
    
    model_hyb.fit(x = input_train, 
               y = y_train, 
               epochs = epochs, 
               batch_size = batch_size,
               verbose = 1,
               validation_data=(input_test, y_test))

In [None]:
# CLASSIFICATION

for sbj in subjects:
    
    print('Classificaion for SUBJECT ' % (sbj))
    
    [x_train, y_train, x_test, y_test, n_classes] = utils.preprocessing(sbj,
                                                         folder,
                                                         label,
                                                         window_size,
                                                         stride,
                                                         null_class)
    
    # we need to perform the following operation in order to provide 
    input_train = x_train.reshape(x_train.shape[0], window_size, n_features, 1)
    input_test = x_test.reshape(x_test.shape[0], window_size, n_features, 1) 
    
    [trainingFeatures, testingFeatures] = deeplearning.extractFeatures(model,
                                                                       input_train,
                                                                       input_test,
                                                                       model.layers[-1].output_shape[1],
                                                                       batchSize = 300)
    
    
    prediction_encoded = model.predict(input_test) # prediction on the test set using the trained model 
    
    #For training the Support Vector Machine and for evaluating the model we must switch from the one-hot encoded version to the original one. 
    output_train = np.argmax(y_train, axis=1)
    output_test = np.argmax(y_test, axis=1)
    prediction = np.argmax(prediction_encoded, axis=1)

    C = [2**(-6)]
    prediction_svm = deeplearning.SVMLayer(C,
                                           output_train,
                                           trainingFeatures,
                                           testingFeatures) 
    print("\nBEFORE SVM:")
    print("Accuracy: ", accuracy_score(output_test, prediction))
    print("F1-measure: ", utils.f1_score(output_test, prediction, average='weighted'))

    print("\nAFTER SVM:")
    print("Accuracy: ", accuracy_score(output_test, prediction_svm))
    print("F1-measure: ", utils.f1_score(output_test, prediction_svm, average='weighted'))
    