# Classifiers
Exploring different classifiers with different autoencoders.

#### Table of contents:  

autoencoders:  
[Undercomplete Autoencoder](#Undercomplete-Autoencoder)  
[Sparse Autoencoder](#Sparse-Autoencoder)  
[Deep Autoencoder](#Deep-Autoencoder)  
[Contractive Autoencoder](#Contractive-Autoencoder)  

classifiers:  
[Simple dense layer](#Simple-dense-layer)  
[LSTM-based classifier](#LSTM-based-classifier)  
[kNN](#kNN)  
[SVC](#SVC)  
[Random Forest](#Random-Forest)  
[XGBoost](#XGBoost)  

In [1]:
import datareader # made by the previous author for reading the collected data
import dataextractor # same as above
import pandas
import numpy as np
import tensorflow as tf
# need to disable eager execution for .get_weights() in contractive autoencoder loss to work
tf.compat.v1.disable_eager_execution()
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Activation, Input
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Conv1D, MaxPooling1D
# required for the contractive autoencoder
import tensorflow.keras.backend as K
import json
from datetime import datetime

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import RandomizedSearchCV

import talos
from talos.utils import lr_normalizer

from tensorflow import keras
from tensorflow.keras import layers, regularizers
import matplotlib.pyplot as plt

tf.keras.backend.set_floatx('float32') # call this, to set keras to use float32 to avoid a warning message
metrics = ['accuracy']#,
#            keras.metrics.TruePositives(),
#            keras.metrics.FalsePositives(),
#            keras.metrics.TrueNegatives(),
#            keras.metrics.FalseNegatives()]

In [2]:
# from https://github.com/ageron/handson-ml/blob/master/extra_tensorflow_reproducibility.ipynb
config = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1,
                        inter_op_parallelism_threads=1)

with tf.compat.v1.Session(config=config) as sess:
    #... this will run single threaded
    pass

In [3]:
import random

random.seed(1)
np.random.seed(4)
tf.random.set_seed(2)

In [4]:
# Start the notebook in the terminal with "PYTHONHASHSEED=0 jupyter notebook" 
# or in anaconda "set PYTHONHASHSEED=0" then start jupyter notebook
import os
if os.environ.get("PYTHONHASHSEED") != "0":
    raise Exception("You must set PYTHONHASHSEED=0 when starting the Jupyter server to get reproducible results.")

This is the original author's code, just copied into separate cells of this jupyter notebook:

In [5]:
def get_busy_vs_relax_timeframes(path, ident, seconds):
    """Returns raw data from either 'on task' or 'relax' time frames and their class (0 or 1).
    TODO: join functions"""

    dataread = datareader.DataReader(path, ident)  # initialize path to data
    data = dataread.read_grc_data()  # read from files
    samp_rate = int(round(len(data[1]) / max(data[0])))
    cog_res = dataread.read_cognitive_load_study(str(ident) + '-primary-extract.txt')

    tasks_data = np.empty((0, seconds*samp_rate))
    tasks_y = np.empty((0, 1))

    busy_n = dataread.get_data_task_timestamps(return_indexes=True)
    relax_n = dataread.get_relax_timestamps(return_indexes=True)

    for i in cog_res['task_number']:
        task_num_table = i - 225  # 0 - 17

        ### task versus relax (1 sample each)
        dataextract = dataextractor.DataExtractor(data[0][busy_n[task_num_table][0]:busy_n[task_num_table][1]],
                                                  data[1][busy_n[task_num_table][0]:busy_n[task_num_table][1]],
                                                  samp_rate)

        dataextract_relax = dataextractor.DataExtractor(data[0][relax_n[task_num_table][0]:relax_n[task_num_table][1]],
                                                        data[1][relax_n[task_num_table][0]:relax_n[task_num_table][1]],
                                                        samp_rate)
        try:
            tasks_data = np.vstack((tasks_data, dataextract.y[-samp_rate * seconds:]))
            tasks_y = np.vstack((tasks_y, 1))
            tasks_data = np.vstack((tasks_data, dataextract_relax.y[-samp_rate * seconds:]))
            tasks_y = np.vstack((tasks_y, 0))
        except ValueError:
            continue
#             print(ident)  # ignore short windows

    return tasks_data, tasks_y


In [6]:
def get_engagement_increase_vs_decrease_timeframes(path, ident, seconds):
    """Returns raw data from either engagement 'increase' or 'decrease' time frames and their class (0 or 1).
    TODO: join functions"""

    dataread = datareader.DataReader(path, ident)  # initialize path to data
    data = dataread.read_grc_data()  # read from files
    samp_rate = int(round(len(data[1]) / max(data[0])))
    cog_res = dataread.read_cognitive_load_study(str(ident) + '-primary-extract.txt')

    tasks_data = np.empty((0, seconds * samp_rate))
    tasks_y = np.empty((0, 1))

    busy_n = dataread.get_data_task_timestamps(return_indexes=True)
    relax_n = dataread.get_relax_timestamps(return_indexes=True)

    for i in cog_res['task_number']:
        task_num_table = i - 225  # 0 - 17

        ### engagement increase / decrease
        if task_num_table == 0:
            continue
        mid = int((relax_n[task_num_table][0] + relax_n[task_num_table][1])/2)
        length = int(samp_rate*30)
        for j in range(10):
            new_end = int(mid-j*samp_rate)

            new_start2 = int(mid+j*samp_rate)

            dataextract_decrease = dataextractor.DataExtractor(data[0][new_end - length:new_end],
                                                               data[1][new_end-length:new_end],
                                                               samp_rate)

            dataextract_increase = dataextractor.DataExtractor(data[0][new_start2: new_start2 + length],
                                                               data[1][new_start2: new_start2 + length], samp_rate)

            try:
                tasks_data = np.vstack((tasks_data, dataextract_increase.y))
                tasks_y = np.vstack((tasks_y, 1))
                tasks_data = np.vstack((tasks_data, dataextract_decrease.y))
                tasks_y = np.vstack((tasks_y, 0))
            except ValueError:
                print(ident)  # ignore short windows

    return tasks_data, tasks_y


In [7]:
def get_task_complexities_timeframes(path, ident, seconds):
    """Returns raw data along with task complexity class.
    TODO: join functions. Add parameter to choose different task types and complexities"""

    dataread = datareader.DataReader(path, ident)  # initialize path to data
    data = dataread.read_grc_data()  # read from files
    samp_rate = int(round(len(data[1]) / max(data[0])))
    cog_res = dataread.read_cognitive_load_study(str(ident) + '-primary-extract.txt')

    tasks_data = np.empty((0, seconds*samp_rate))
    tasks_y = np.empty((0, 1))

    busy_n = dataread.get_data_task_timestamps(return_indexes=True)
    relax_n = dataread.get_relax_timestamps(return_indexes=True)

    for i in cog_res['task_number']:
        task_num_table = i - 225  # 0 - 17

        ### task complexity classification
        if cog_res['task_complexity'][task_num_table] == 'medium':
            continue
        # if cog_res['task_label'][task_num_table] == 'FA' or cog_res['task_label'][task_num_table] == 'HP':
        #     continue
        if cog_res['task_label'][task_num_table] != 'NC':
            continue
        map_compl = {
            'low': 0,
            'medium': 2,
            'high': 1
        }
        for j in range(10):
            new_end = int(busy_n[task_num_table][1] - j * samp_rate)
            new_start = int(new_end - samp_rate*30)
            dataextract = dataextractor.DataExtractor(data[0][new_start:new_end],
                                                      data[1][new_start:new_end], samp_rate)
            try:
                tasks_data = np.vstack((tasks_data, dataextract.y))
                tasks_y = np.vstack((tasks_y, map_compl.get(cog_res['task_complexity'][task_num_table])))
            except ValueError:
                print(ident)

    return tasks_data, tasks_y


In [8]:
def get_TLX_timeframes(path, ident, seconds):
    """Returns raw data along with task load index class.
    TODO: join functions. Add parameter to choose different task types and complexities"""

    dataread = datareader.DataReader(path, ident)  # initialize path to data
    data = dataread.read_grc_data()  # read from files
    samp_rate = int(round(len(data[1]) / max(data[0])))
    cog_res = dataread.read_cognitive_load_study(str(ident) + '-primary-extract.txt')

    tasks_data = np.empty((0, seconds*samp_rate))
    tasks_y = np.empty((0, 1))

    busy_n = dataread.get_data_task_timestamps(return_indexes=True)
    relax_n = dataread.get_relax_timestamps(return_indexes=True)

    for i in cog_res['task_number']:
        task_num_table = i - 225  # 0 - 17

        ### task load index
        if cog_res['task_complexity'][task_num_table] == 'medium' or cog_res['task_label'][task_num_table] != 'PT':
            continue
        for j in range(10):
            new_end = int(busy_n[task_num_table][1] - j * samp_rate)
            new_start = int(new_end - samp_rate*30)
            dataextract = dataextractor.DataExtractor(data[0][new_start:new_end],
                                                      data[1][new_start:new_end], samp_rate)
            try:
                tasks_data = np.vstack((tasks_data, dataextract.y))
                tasks_y = np.vstack((tasks_y, cog_res['task_load_index'][task_num_table]))
            except ValueError:
                print(ident)

    return tasks_data, tasks_y


In [9]:
def get_data_from_idents(path, idents, seconds):
    """Go through all user data and take out windows of only <seconds> long time frames,
    along with the given class (from 'divide_each_task' function).
    """
    samp_rate = 43  # hard-coded sample rate
    data, ys = np.empty((0, samp_rate*seconds)), np.empty((0, 1))
    for i in idents:
        x, y = get_busy_vs_relax_timeframes(path, i, seconds) # either 'get_busy_vs_relax_timeframes',
        # get_engagement_increase_vs_decrease_timeframes, get_task_complexities_timeframes or get_TLX_timeframes
        # TODO: ^ modify, so that different functions can be accessible by parameter
        data = np.vstack((data, x))
        ys = np.vstack((ys, y))
    return data, ys


In [10]:
def model_train(model, x_train, y_train, batch_size, epochs, x_valid, y_valid, x_test, y_test):
    """Train model with the given training, validation, and test set, with appropriate batch size and # epochs."""
    epoch_data = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_valid, y_valid), verbose=0)
    score = model.evaluate(x_test, y_test, batch_size=batch_size)
    acc = score[1]
    score = score[0]
    return score, acc, epoch_data


In [11]:
def sequence_padding(x, maxlen):
    """Pad sequences (all have to be same length)."""
    print('Pad sequences (samples x time)')
    return sequence.pad_sequences(x, maxlen=maxlen, dtype=np.float)


## Autoencoders

#### Undercomplete Autoencoder  
from https://blog.keras.io/building-autoencoders-in-keras.html

In [12]:
def undercomplete_ae(x, encoding_dim=64, encoded_as_model=False):
    # Simplest possible autoencoder from https://blog.keras.io/building-autoencoders-in-keras.html

    # this is our input placeholder
    input_data = Input(shape=x[0].shape, name="input")
    dropout = Dropout(0.25, name="dropout")(input_data)
    # "encoded" is the encoded representation of the input
    encoded = Dense(encoding_dim, activation='relu', name="encoded")(dropout)
    
    # "decoded" is the lossy reconstruction of the input
    decoded = Dense(x[0].shape[0], activation='sigmoid', name="decoded")(encoded)

    autoencoder = Model(input_data, decoded)
    
    # compile the model
    autoencoder.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics)
    
    # if return encoder in the encoded variable
    if encoded_as_model:
        encoded = Model(input_data, encoded)
    
    return autoencoder, encoded

#### Sparse Autoencoder  
from https://blog.keras.io/building-autoencoders-in-keras.html

In [13]:
def sparse_ae(x, encoding_dim=64, encoded_as_model=False):
    # Simplest possible autoencoder from https://blog.keras.io/building-autoencoders-in-keras.html

    # this is our input placeholder
    input_data = Input(shape=x[0].shape, name="input")
    dropout = Dropout(0.25, name="dropout") (input_data)
    # "encoded" is the encoded representation of the input
    # add a sparsity constraint
    encoded = Dense(encoding_dim, activation='relu', name="encoded",
                    activity_regularizer=regularizers.l1(10e-5))(dropout)
    
    # "decoded" is the lossy reconstruction of the input
    decoded = Dense(x[0].shape[0], activation='sigmoid', name="decoded")(encoded)

    # this model maps an input to its reconstruction
    autoencoder = Model(input_data, decoded, name="sparse_ae")
    
    # compile the model
    autoencoder.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics)
    
    # if return encoder in the encoded variable
    if encoded_as_model:
        encoded = Model(input_data, encoded)
    
    return autoencoder, encoded

#### Deep Autoencoder  
from https://blog.keras.io/building-autoencoders-in-keras.html

In [14]:
def deep_ae(x, enc_layers=[512,256], encoding_dim=64, dec_layers=[256,512], encoded_as_model=False):
    # From https://www.tensorflow.org/guide/keras/functional#use_the_same_graph_of_layers_to_define_multiple_models
    input_data = keras.Input(shape=x[0].shape, name="normalized_signal")
    model = Dropout(0.25, name="dropout", autocast=False)(input_data)
    for i in enumerate(enc_layers):
        model = Dense(i[1], activation="relu", name="dense_enc_" + str(i[0]+1))(model)
    encoded_output = Dense(encoding_dim, activation="relu", name="encoded_signal")(model)

    encoded = encoded_output

    model = layers.Dense(dec_layers[0], activation="sigmoid", name="dense_dec_1")(encoded_output)
    for i in enumerate(dec_layers[1:]):
        model = Dense(i[1], activation="sigmoid", name="dense_dec_" + str(i[0]+2))(model)
    decoded_output = Dense(x[0].shape[0], activation="sigmoid", name="reconstructed_signal")(model)
    
    autoencoder = Model(input_data, decoded_output, name="autoencoder")
    
    # compile the model
    autoencoder.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics)
    
    # if return encoder in the encoded variable
    if encoded_as_model:
        encoded = Model(input_data, encoded)

    return autoencoder, encoded

#### Contractive Autoencoder
From: https://wiseodd.github.io/techblog/2016/12/05/contractive-autoencoder/

In [15]:
# define a function to be able to access the autoencoder in the loss funciton
def loss_with_params(autoencoder):
    # loss function from https://wiseodd.github.io/techblog/2016/12/05/contractive-autoencoder/
    def contractive_loss(y_pred, y_true):

        lam = 1e-4
        mse = K.mean(K.square(y_true - y_pred), axis=1)

        W = K.variable(value=autoencoder.get_layer('encoded').get_weights()[0])  # N x N_hidden
        W = K.transpose(W)  # N_hidden x N
        h = autoencoder.get_layer('encoded').output
        dh = h * (1 - h)  # N_batch x N_hidden

        # N_batch x N_hidden * N_hidden x 1 = N_batch x 1
        contractive = lam * K.sum(dh**2 * K.sum(W**2, axis=1), axis=1)

        return mse + contractive
    return contractive_loss

In [16]:
def contractive_ae(x, encoding_dim=64, encoded_as_model=False):
    # From https://wiseodd.github.io/techblog/2016/12/05/contractive-autoencoder/

    input_data = Input(shape=x[0].shape, name="input")
    encoded = Dense(encoding_dim, activation='sigmoid', name='encoded')(input_data)
    outputs = Dense(x[0].shape[0], activation='linear', name="output")(encoded)

    autoencoder = Model(input_data, outputs, name="autoencoder")
    
    # compile the model
    autoencoder.compile(optimizer='adam', loss=loss_with_params(autoencoder), metrics=metrics)
    
    # if return encoder in the encoded variable
    if encoded_as_model:
        encoded = Model(input_data, encoded)
    
    return autoencoder, encoded

## Classifiers

Initialize variables:

In [17]:
# initialize a dictionary to store accuracies for comparison
accuracies = {}

# used for reading the data into an array
seconds = 30  # time window length
idents = ['2gu87', 'iz2ps', '1mpau', '7dwjy', '7swyk', '94mnx', 'bd47a', 'c24ur', 'ctsax', 'dkhty', 'e4gay',
              'ef5rq', 'f1gjp', 'hpbxa', 'pmyfl', 'r89k1', 'tn4vl', 'td5pr', 'gyqu9', 'fzchw', 'l53hg', '3n2f9',
              '62i9y']
path = '../../../StudyData/'

# change to len(idents) at the end to use all the data
n = 5 #len(idents)

#### Simple dense layer

Define the classifier:

In [18]:
def dense_classifier(model, params):
    
    model = Dropout(params['dropout'], name='dropout_cl')(model)
    model = Dense(params['hidden_size'], activation=params['activation'], name='dense_cl1')(model)
    model = Dense(1, activation=params['last_activation'], name='dense_cl2')(model)

    return model

In [19]:
def dense_classifier_base():
    model = Sequential()
    model.add(Dropout(0))
    model.add(Dense(32))
    model.add(Activation('sigmoid'))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=metrics)
    return model

In [20]:
params = {
    'dropout': 0.24,
    'optimizer': 'Adam',
    'hidden_size': 64,
    'loss': 'binary_crossentropy',
    'last_activation': 'sigmoid',
    'activation': 'softmax',
    'batch_size': 256,
    'epochs': 100
}

Combine the autoencoders with the classifier: 

In [21]:
# set the variables in the dictionary
accuracies['simple_dense'] = {}
accs = accuracies['simple_dense']
accs['base'] = []
accs['undercomplete'] = []
accs['sparse'] = []
accs['deep'] = []
accs['contractive'] = []
start_time = datetime.now()

with tf.compat.v1.Session(config=config) as sess:
    # leave out person out validation
    for ident in range(n):

        print("iteration:", ident+1, "of", n, "; time elapsed:", datetime.now()-start_time)

        train_idents = [x for i, x in enumerate(idents) if (i != ident and i != (n-1+ident)%n)]
        validation_idents = [idents[ident]]
        test_idents = [idents[ident-1]]

        # Load data
        xt_train, y_train = get_data_from_idents(path, train_idents, seconds)
        xt_valid, y_valid = get_data_from_idents(path, validation_idents, seconds)
        xt_test, y_test = get_data_from_idents(path, test_idents, seconds)

        # Scale with standard scaler
        sscaler = StandardScaler()
        sscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
        xt_train = sscaler.transform(xt_train)
        xt_valid = sscaler.transform(xt_valid)
        xt_test = sscaler.transform(xt_test)

        # Scale with MinMax to range [0,1]
        mmscaler = MinMaxScaler()
        mmscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
        xt_train = mmscaler.transform(xt_train)
        xt_valid = mmscaler.transform(xt_valid)
        xt_test = mmscaler.transform(xt_test)

        # Base classifier
        model = dense_classifier_base()
    #     print("undercomplete CLASSIFICATION TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['base'].append(curr_acc)

        # AE Training params
        batch_size = 256
        epochs = 100

        # undercomplete AE
        autoencoder, encoded = undercomplete_ae(xt_train, 60)
    #     print("undercomplete AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = dense_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
    #     print("undercomplete CLASSIFICATION TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['undercomplete'].append(curr_acc)

        # sparse AE
        autoencoder, encoded = sparse_ae(xt_train, 60)
    #     print("sparse AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = dense_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
    #     print("sparse CLASSIFICATION TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['sparse'].append(curr_acc)

        # deep AE
        autoencoder, encoded = deep_ae(xt_train, enc_layers=[512,256], encoding_dim=60, dec_layers=[256,512])
    #     print("deep AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = dense_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
    #     print("deep CLASSIFICATION TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['deep'].append(curr_acc)

        # contractive AE
        autoencoder, encoded = contractive_ae(xt_train, 60)
    #     print("contractive AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = dense_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
    #     print("contractive CLASSIFICATION TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['contractive'].append(curr_acc)

end_time = datetime.now()
elapsed_time = end_time - start_time
print("Completed!", "Time elapsed:", elapsed_time)

iteration: 1 of 5 ; time elapsed: 0:00:00.010040
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
iteration: 2 of 5 ; time elapsed: 0:00:55.032695
iteration: 3 of 5 ; time elapsed: 0:01:55.831813
iteration: 4 of 5 ; time elapsed: 0:03:00.506752
iteration: 5 of 5 ; time elapsed: 0:04:11.716696
Completed! Time elapsed: 0:05:28.376478


In [24]:
# print some statistics
pandas.DataFrame.from_dict(accs)

Unnamed: 0,base,undercomplete,sparse,deep,contractive
0,0.642857,0.857143,0.857143,0.571429,0.785714
1,0.676471,0.705882,0.764706,0.529412,0.558824
2,0.694444,0.666667,0.694444,0.638889,0.611111
3,0.411765,0.558824,0.617647,0.5,0.470588
4,0.5,0.6,0.8,0.5,0.7


In [25]:
for key in accs:
    print(key, "accuracies:")
    print("- min:", np.min(accs[key]))
    print("- max:", np.max(accs[key]))
    print("- mean:", np.mean(accs[key]))
    print("- median:", np.median(accs[key]))
    print("")

base accuracies:
- min: 0.4117647
- max: 0.6944444
- mean: 0.5851073
- median: 0.64285713

undercomplete accuracies:
- min: 0.5588235
- max: 0.85714287
- mean: 0.6777031
- median: 0.6666667

sparse accuracies:
- min: 0.61764705
- max: 0.85714287
- mean: 0.7467881
- median: 0.7647059

deep accuracies:
- min: 0.5
- max: 0.6388889
- mean: 0.54794586
- median: 0.5294118

contractive accuracies:
- min: 0.47058824
- max: 0.78571427
- mean: 0.6252474
- median: 0.6111111



#### LSTM-based classifier  
based on the original author's code

Optimize hyperparameters with talos:

In [26]:
def LSTM_classifier(model, params):

    model = layers.Reshape((-1, 1), input_shape=(model.shape), name='reshape_cl') (model)

    model = layers.Dropout(params['dropout'], name='dropout_cl1') (model)
    
    model = Conv1D(params['filters'],
                     params['kernel_size'],
                     padding='valid',
                     activation=params['activation'],
                     strides=params['strides'],
                     name='conv1d_cl1') (model)
    
    model = MaxPooling1D(pool_size=params['pool_size'], name='maxpool_cl1') (model)
    
    model = Conv1D(params['filters'],
                     params['kernel_size'],
                     padding='valid',
                     activation=params['activation'],
                     strides=params['strides'],
                     name='conv1d_cl2') (model)
    
    model = MaxPooling1D(pool_size=params['pool_size'], name='maxpool_cl2') (model)
    
    model = layers.Dropout(params['dropout'], name='dropout_cl2') (model)

    model = LSTM(params['lstm_output_size'], activation='sigmoid', name='lstm_cl') (model)

    model = Dense(1, activation=params['last_activation'], name='dense_cl') (model)
    return model

In [27]:
def LSTM_classifier_base(params):
    
    model = Sequential()
    model.add(Dropout(params['dropout']))
    model.add(Conv1D(params['filters'],
                     params['kernel_size'],
                     padding='valid',
                     activation=params['activation'],
                     strides=params['strides']))

    model.add(MaxPooling1D(pool_size=params['pool_size']))
    model.add(Conv1D(params['filters'],
                     params['kernel_size'],
                     padding='valid',
                     activation=params['activation'],
                     strides=params['strides']))
    model.add(MaxPooling1D(pool_size=params['pool_size']))

    model.add(Dropout(params['dropout']))
    model.add(LSTM(params['lstm_output_size']))
    model.add(Dense(1))
    model.add(Activation(params['last_activation']))

    model.compile(loss=params['loss'],
                  optimizer=params['optimizer'],
                  metrics=['acc'])
    
    return model

In [28]:
params_base = {
    'kernel_size': 32,
    'strides': 4,
    'pool_size': 2,
    'filters': 8,
    'lstm_output_size': 236,
    'loss': 'binary_crossentropy',
    'dropout': 0.09,
    'activation': 'relu',
    'optimizer': 'Nadam',
    'last_activation': 'sigmoid'
}

In [29]:
params = {
    'kernel_size': 8,
    'filters': 3,
    'strides': 2,
    'pool_size': 2,
    'dropout': 0.09,
    'optimizer': 'Nadam',
    'loss': 'binary_crossentropy',
    'activation': 'relu',
    'last_activation': 'sigmoid',
    'lstm_output_size': 256,
    'batch_size': 64,
    'epochs': 100
}

Combine the autoencoders with the classifier: 

In [30]:
# set the variables in the dictionary
accuracies['LSTM'] = {}
accs = accuracies['LSTM']
accs['base'] = []
accs['undercomplete'] = []
accs['sparse'] = []
accs['deep'] = []
accs['contractive'] = []
start_time = datetime.now()

with tf.compat.v1.Session(config=config) as sess:
    # leave out person out validation
    for ident in range(n):

        print("iteration:", ident+1, "of", n, "; time elapsed:", datetime.now()-start_time)

        train_idents = [x for i, x in enumerate(idents) if (i != ident and i != (n-1+ident)%n)]
        validation_idents = [idents[ident]]
        test_idents = [idents[ident-1]]

        # Load data
        xt_train, y_train = get_data_from_idents(path, train_idents, seconds)
        xt_valid, y_valid = get_data_from_idents(path, validation_idents, seconds)
        xt_test, y_test = get_data_from_idents(path, test_idents, seconds)

        # Scale with standard scaler
        sscaler = StandardScaler()
        sscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
        xt_train = sscaler.transform(xt_train)
        xt_valid = sscaler.transform(xt_valid)
        xt_test = sscaler.transform(xt_test)

        # Scale with MinMax to range [0,1]
        mmscaler = MinMaxScaler()
        mmscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
        xt_train = mmscaler.transform(xt_train)
        xt_valid = mmscaler.transform(xt_valid)
        xt_test = mmscaler.transform(xt_test)

        # Base classifier
        model = LSTM_classifier_base(params_base)
        # reshape data for he base classifier
        xtt_train = xt_train.reshape(-1, xt_train[0].shape[0], 1)
        xtt_valid = xt_valid.reshape(-1, xt_valid[0].shape[0], 1)
        xtt_test = xt_test.reshape(-1, xt_test[0].shape[0], 1)
    #     print("undercomplete CLASSIFICATION TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(model, xtt_train, y_train, params['batch_size'], params['epochs'],
                                               xtt_valid, y_valid, xtt_test, y_test)
        accs['base'].append(curr_acc)

        # AE Training params
        batch_size = 256
        epochs = 100

        # undercomplete AE
        autoencoder, encoded = undercomplete_ae(xt_train, 60)
    #     print("undercomplete AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = LSTM_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
    #     print("undercomplete CLASSIFICATION TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['undercomplete'].append(curr_acc)

        # sparse AE
        autoencoder, encoded = sparse_ae(xt_train, 60)
    #     print("sparse AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = LSTM_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
    #     print("sparse CLASSIFICATION TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['sparse'].append(curr_acc)

        # deep AE
        autoencoder, encoded = deep_ae(xt_train, enc_layers=[512,256], encoding_dim=60, dec_layers=[256,512])
    #     print("deep AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = LSTM_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
    #     print("deep CLASSIFICATION TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['deep'].append(curr_acc)

        # contractive AE
        autoencoder, encoded = contractive_ae(xt_train, 60)
    #     print("contractive AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = LSTM_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
    #     print("contractive CLASSIFICATION TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['contractive'].append(curr_acc)

end_time = datetime.now()
elapsed_time = end_time - start_time
print("Completed!", "Time elapsed:", elapsed_time)

iteration: 1 of 5 ; time elapsed: 0:00:00.008949
iteration: 2 of 5 ; time elapsed: 0:03:16.818735
iteration: 3 of 5 ; time elapsed: 0:07:05.243815
iteration: 4 of 5 ; time elapsed: 0:11:07.659965
iteration: 5 of 5 ; time elapsed: 0:15:28.744721
Completed! Time elapsed: 0:20:33.367144


In [31]:
# print some statistics
pandas.DataFrame.from_dict(accs)

Unnamed: 0,base,undercomplete,sparse,deep,contractive
0,0.785714,0.75,0.857143,0.75,0.892857
1,0.764706,0.676471,0.705882,0.764706,0.735294
2,0.777778,0.666667,0.694444,0.5,0.666667
3,0.676471,0.588235,0.588235,0.5,0.647059
4,0.666667,0.533333,0.633333,0.633333,0.666667


In [32]:
for key in accs:
    print(key, "accuracies:")
    print("- min:", np.min(accs[key]))
    print("- max:", np.max(accs[key]))
    print("- mean:", np.mean(accs[key]))
    print("- median:", np.median(accs[key]))
    print("")

base accuracies:
- min: 0.6666667
- max: 0.78571427
- mean: 0.73426706
- median: 0.7647059

undercomplete accuracies:
- min: 0.53333336
- max: 0.75
- mean: 0.6429412
- median: 0.6666667

sparse accuracies:
- min: 0.5882353
- max: 0.85714287
- mean: 0.69580764
- median: 0.6944444

deep accuracies:
- min: 0.5
- max: 0.7647059
- mean: 0.62960786
- median: 0.6333333

contractive accuracies:
- min: 0.64705884
- max: 0.89285713
- mean: 0.7217087
- median: 0.6666667



#### kNN

In [33]:
from sklearn.neighbors import KNeighborsClassifier

def KNN_classifier():
    model = KNeighborsClassifier(p=3, n_neighbors=7, metric='cosine')
    return model

Combine the autoencoders with the classifier: 

In [34]:
# set the variables in the dictionary
accuracies['kNN'] = {}
accs = accuracies['kNN']
accs['base'] = []
accs['undercomplete'] = []
accs['sparse'] = []
accs['deep'] = []
accs['contractive'] = []
start_time = datetime.now()

with tf.compat.v1.Session(config=config) as sess:
    # leave out person out validation
    for ident in range(n):

        print("iteration:", ident+1, "of", n, "; time elapsed:", datetime.now()-start_time)

        train_idents = [x for i, x in enumerate(idents) if (i != ident and i != (n-1+ident)%n)]
        validation_idents = [idents[ident]]
        test_idents = [idents[ident-1]]

        # Load data
        xt_train, y_train = get_data_from_idents(path, train_idents, seconds)
        xt_valid, y_valid = get_data_from_idents(path, validation_idents, seconds)
        xt_test, y_test = get_data_from_idents(path, test_idents, seconds)

        # Scale with standard scaler
        sscaler = StandardScaler()
        sscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
        xt_train = sscaler.transform(xt_train)
        xt_valid = sscaler.transform(xt_valid)
        xt_test = sscaler.transform(xt_test)

        # Scale with MinMax to range [0,1]
        mmscaler = MinMaxScaler()
        mmscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
        xt_train = mmscaler.transform(xt_train)
        xt_valid = mmscaler.transform(xt_valid)
        xt_test = mmscaler.transform(xt_test)

        # Base classifier
        model = KNN_classifier()
    #     print("undercomplete CLASSIFICATION TRAINING: ", ident)
        model.fit(xt_train, y_train)
        curr_acc = np.sum(model.predict(xt_test) == y_test.T) / y_test.shape[0]
        accs['base'].append(curr_acc)

        # AE Training params
        batch_size = 256
        epochs = 100

        # undercomplete AE
        autoencoder, encoded = undercomplete_ae(xt_train, 60, encoded_as_model=True)
    #     print("undercomplete AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = KNN_classifier()
        xtt_train = encoded.predict(xt_train)
        xtt_test = encoded.predict(xt_test)
    #     print("undercomplete CLASSIFICATION TRAINING: ", ident)
        model.fit(xtt_train, y_train)
        curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
        accs['undercomplete'].append(curr_acc)

        # sparse AE
        autoencoder, encoded = sparse_ae(xt_train, 60, encoded_as_model=True)
    #     print("sparse AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = KNN_classifier()
        xtt_train = encoded.predict(xt_train)
        xtt_test = encoded.predict(xt_test)
    #     print("sparse CLASSIFICATION TRAINING: ", ident)
        model.fit(xtt_train, y_train)
        curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
        accs['sparse'].append(curr_acc)

        # deep AE
        autoencoder, encoded = deep_ae(xt_train, enc_layers=[512,256], encoding_dim=60, dec_layers=[256,512], encoded_as_model=True)
    #     print("deep AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = KNN_classifier()
        xtt_train = encoded.predict(xt_train)
        xtt_test = encoded.predict(xt_test)
    #     print("deep CLASSIFICATION TRAINING: ", ident)
        model.fit(xtt_train, y_train)
        curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
        accs['deep'].append(curr_acc)

        # contractive AE
        autoencoder, encoded = contractive_ae(xt_train, 60, encoded_as_model=True)
    #     print("contractive AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = KNN_classifier()
        xtt_train = encoded.predict(xt_train)
        xtt_test = encoded.predict(xt_test)
    #     print("contractive CLASSIFICATION TRAINING: ", ident)
        model.fit(xtt_train, y_train)
        curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
        accs['contractive'].append(curr_acc)

end_time = datetime.now()
elapsed_time = end_time - start_time
print("Completed!", "Time elapsed:", elapsed_time)

iteration: 1 of 5 ; time elapsed: 0:00:00.007980
iteration: 2 of 5 ; time elapsed: 0:01:42.610682
iteration: 3 of 5 ; time elapsed: 0:03:26.699434
iteration: 4 of 5 ; time elapsed: 0:05:32.739096
iteration: 5 of 5 ; time elapsed: 0:07:43.499664
Completed! Time elapsed: 0:09:57.965626


In [35]:
# print some statistics
pandas.DataFrame.from_dict(accs)

Unnamed: 0,base,undercomplete,sparse,deep,contractive
0,0.607143,0.75,0.642857,0.714286,0.571429
1,0.588235,0.705882,0.647059,0.735294,0.735294
2,0.611111,0.694444,0.666667,0.555556,0.75
3,0.588235,0.588235,0.558824,0.5,0.588235
4,0.766667,0.6,0.666667,0.733333,0.5


In [36]:
for key in accs:
    print(key, "accuracies:")
    print("- min:", np.min(accs[key]))
    print("- max:", np.max(accs[key]))
    print("- mean:", np.mean(accs[key]))
    print("- median:", np.median(accs[key]))
    print("")

base accuracies:
- min: 0.5882352941176471
- max: 0.7666666666666667
- mean: 0.6322782446311858
- median: 0.6071428571428571

undercomplete accuracies:
- min: 0.5882352941176471
- max: 0.75
- mean: 0.6677124183006538
- median: 0.6944444444444444

sparse accuracies:
- min: 0.5588235294117647
- max: 0.6666666666666666
- mean: 0.6364145658263305
- median: 0.6470588235294118

deep accuracies:
- min: 0.5
- max: 0.7352941176470589
- mean: 0.6476937441643325
- median: 0.7142857142857143

contractive accuracies:
- min: 0.5
- max: 0.75
- mean: 0.6289915966386554
- median: 0.5882352941176471



####  SVC

In [37]:
from sklearn.svm import SVC

def SVC_classifier():
    model = SVC(kernel='rbf', C=1.5)
    return model

Combine the autoencoders with the classifier: 

In [38]:
# set the variables in the dictionary
accuracies['SVC'] = {}
accs = accuracies['SVC']
accs['base'] = []
accs['undercomplete'] = []
accs['sparse'] = []
accs['deep'] = []
accs['contractive'] = []
start_time = datetime.now()

with tf.compat.v1.Session(config=config) as sess:
    # leave out person out validation
    for ident in range(n):

        print("iteration:", ident+1, "of", n, "; time elapsed:", datetime.now()-start_time)

        train_idents = [x for i, x in enumerate(idents) if (i != ident and i != (n-1+ident)%n)]
        validation_idents = [idents[ident]]
        test_idents = [idents[ident-1]]

        # Load data
        xt_train, y_train = get_data_from_idents(path, train_idents, seconds)
        xt_valid, y_valid = get_data_from_idents(path, validation_idents, seconds)
        xt_test, y_test = get_data_from_idents(path, test_idents, seconds)

        # Scale with standard scaler
        sscaler = StandardScaler()
        sscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
        xt_train = sscaler.transform(xt_train)
        xt_valid = sscaler.transform(xt_valid)
        xt_test = sscaler.transform(xt_test)

        # Scale with MinMax to range [0,1]
        mmscaler = MinMaxScaler()
        mmscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
        xt_train = mmscaler.transform(xt_train)
        xt_valid = mmscaler.transform(xt_valid)
        xt_test = mmscaler.transform(xt_test)

        # Base classifier
        model = SVC_classifier()
    #     print("undercomplete CLASSIFICATION TRAINING: ", ident)
        model.fit(xt_train, y_train)
        curr_acc = np.sum(model.predict(xt_test) == y_test.T) / y_test.shape[0]
        accs['base'].append(curr_acc)

        # AE Training params
        batch_size = 256
        epochs = 100

        # undercomplete AE
        autoencoder, encoded = undercomplete_ae(xt_train, 60, encoded_as_model=True)
    #     print("undercomplete AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = SVC_classifier()
        xtt_train = encoded.predict(xt_train)
        xtt_test = encoded.predict(xt_test)
    #     print("undercomplete CLASSIFICATION TRAINING: ", ident)
        model.fit(xtt_train, y_train)
        curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
        accs['undercomplete'].append(curr_acc)

        # sparse AE
        autoencoder, encoded = sparse_ae(xt_train, 60, encoded_as_model=True)
    #     print("sparse AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = SVC_classifier()
        xtt_train = encoded.predict(xt_train)
        xtt_test = encoded.predict(xt_test)
    #     print("sparse CLASSIFICATION TRAINING: ", ident)
        model.fit(xtt_train, y_train)
        curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
        accs['sparse'].append(curr_acc)

        # deep AE
        autoencoder, encoded = deep_ae(xt_train, enc_layers=[512,256], encoding_dim=60, dec_layers=[256,512], encoded_as_model=True)
    #     print("deep AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = SVC_classifier()
        xtt_train = encoded.predict(xt_train)
        xtt_test = encoded.predict(xt_test)
    #     print("deep CLASSIFICATION TRAINING: ", ident)
        model.fit(xtt_train, y_train)
        curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
        accs['deep'].append(curr_acc)

        # contractive AE
        autoencoder, encoded = contractive_ae(xt_train, 60, encoded_as_model=True)
    #     print("contractive AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = SVC_classifier()
        xtt_train = encoded.predict(xt_train)
        xtt_test = encoded.predict(xt_test)
    #     print("contractive CLASSIFICATION TRAINING: ", ident)
        model.fit(xtt_train, y_train)
        curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
        accs['contractive'].append(curr_acc)

end_time = datetime.now()
elapsed_time = end_time - start_time
print("Completed!", "Time elapsed:", elapsed_time)

iteration: 1 of 5 ; time elapsed: 0:00:00.007978
iteration: 2 of 5 ; time elapsed: 0:01:57.303690
iteration: 3 of 5 ; time elapsed: 0:03:53.245221
iteration: 4 of 5 ; time elapsed: 0:05:56.133007
iteration: 5 of 5 ; time elapsed: 0:08:10.454109
Completed! Time elapsed: 0:10:35.452079


In [39]:
# print some statistics
pandas.DataFrame.from_dict(accs)

Unnamed: 0,base,undercomplete,sparse,deep,contractive
0,0.75,0.714286,0.642857,0.607143,0.642857
1,0.705882,0.588235,0.676471,0.529412,0.588235
2,0.666667,0.527778,0.555556,0.611111,0.583333
3,0.470588,0.5,0.5,0.470588,0.5
4,0.6,0.6,0.533333,0.666667,0.6


In [40]:
for key in accs:
    print(key, "accuracies:")
    print("- min:", np.min(accs[key]))
    print("- max:", np.max(accs[key]))
    print("- mean:", np.mean(accs[key]))
    print("- median:", np.median(accs[key]))
    print("")

base accuracies:
- min: 0.47058823529411764
- max: 0.75
- mean: 0.6386274509803922
- median: 0.6666666666666666

undercomplete accuracies:
- min: 0.5
- max: 0.7142857142857143
- mean: 0.5860597572362278
- median: 0.5882352941176471

sparse accuracies:
- min: 0.5
- max: 0.6764705882352942
- mean: 0.5816433239962653
- median: 0.5555555555555556

deep accuracies:
- min: 0.47058823529411764
- max: 0.6666666666666666
- mean: 0.5769841269841269
- median: 0.6071428571428571

contractive accuracies:
- min: 0.5
- max: 0.6428571428571429
- mean: 0.5828851540616247
- median: 0.5882352941176471



#### Random Forest

In [41]:
from sklearn.ensemble import RandomForestClassifier
def random_forest_classifier():
    model = RandomForestClassifier(n_estimators = 250,
                                     min_samples_split = 10,
                                     min_samples_leaf = 4,
                                     max_features = 'auto',
                                     max_depth = 90,
                                     bootstrap = True)
    return model

Combine the autoencoders with the classifier: 

In [42]:
# set the variables in the dictionary
accuracies['random_forest'] = {}
accs = accuracies['random_forest']
accs['base'] = []
accs['undercomplete'] = []
accs['sparse'] = []
accs['deep'] = []
accs['contractive'] = []
start_time = datetime.now()

with tf.compat.v1.Session(config=config) as sess:
    # leave out person out validation
    for ident in range(n):

        print("iteration:", ident+1, "of", n, "; time elapsed:", datetime.now()-start_time)

        train_idents = [x for i, x in enumerate(idents) if (i != ident and i != (n-1+ident)%n)]
        validation_idents = [idents[ident]]
        test_idents = [idents[ident-1]]

        # Load data
        xt_train, y_train = get_data_from_idents(path, train_idents, seconds)
        xt_valid, y_valid = get_data_from_idents(path, validation_idents, seconds)
        xt_test, y_test = get_data_from_idents(path, test_idents, seconds)

        # Scale with standard scaler
        sscaler = StandardScaler()
        sscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
        xt_train = sscaler.transform(xt_train)
        xt_valid = sscaler.transform(xt_valid)
        xt_test = sscaler.transform(xt_test)

        # Scale with MinMax to range [0,1]
        mmscaler = MinMaxScaler()
        mmscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
        xt_train = mmscaler.transform(xt_train)
        xt_valid = mmscaler.transform(xt_valid)
        xt_test = mmscaler.transform(xt_test)

        # Base classifier
        model = random_forest_classifier()
    #     print("undercomplete CLASSIFICATION TRAINING: ", ident)
        model.fit(xt_train, y_train)
        curr_acc = np.sum(model.predict(xt_test) == y_test.T) / y_test.shape[0]
        accs['base'].append(curr_acc)

        # AE Training params
        batch_size = 256
        epochs = 100

        # undercomplete AE
        autoencoder, encoded = undercomplete_ae(xt_train, 60, encoded_as_model=True)
    #     print("undercomplete AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = random_forest_classifier()
        xtt_train = encoded.predict(xt_train)
        xtt_test = encoded.predict(xt_test)
    #     print("undercomplete CLASSIFICATION TRAINING: ", ident)
        model.fit(xtt_train, y_train)
        curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
        accs['undercomplete'].append(curr_acc)

        # sparse AE
        autoencoder, encoded = sparse_ae(xt_train, 60, encoded_as_model=True)
    #     print("sparse AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = random_forest_classifier()
        xtt_train = encoded.predict(xt_train)
        xtt_test = encoded.predict(xt_test)
    #     print("sparse CLASSIFICATION TRAINING: ", ident)
        model.fit(xtt_train, y_train)
        curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
        accs['sparse'].append(curr_acc)

        # deep AE
        autoencoder, encoded = deep_ae(xt_train, enc_layers=[512,256], encoding_dim=60, dec_layers=[256,512], encoded_as_model=True)
    #     print("deep AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = random_forest_classifier()
        xtt_train = encoded.predict(xt_train)
        xtt_test = encoded.predict(xt_test)
    #     print("deep CLASSIFICATION TRAINING: ", ident)
        model.fit(xtt_train, y_train)
        curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
        accs['deep'].append(curr_acc)

        # contractive AE
        autoencoder, encoded = contractive_ae(xt_train, 60, encoded_as_model=True)
    #     print("contractive AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = random_forest_classifier()
        xtt_train = encoded.predict(xt_train)
        xtt_test = encoded.predict(xt_test)
    #     print("contractive CLASSIFICATION TRAINING: ", ident)
        model.fit(xtt_train, y_train)
        curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
        accs['contractive'].append(curr_acc)

end_time = datetime.now()
elapsed_time = end_time - start_time
print("Completed!", "Time elapsed:", elapsed_time)

iteration: 1 of 5 ; time elapsed: 0:00:00.004987
iteration: 2 of 5 ; time elapsed: 0:02:09.820573
iteration: 3 of 5 ; time elapsed: 0:04:27.855148
iteration: 4 of 5 ; time elapsed: 0:06:45.959286
iteration: 5 of 5 ; time elapsed: 0:09:09.360952
Completed! Time elapsed: 0:11:51.139776


In [43]:
# print some statistics
pandas.DataFrame.from_dict(accs)

Unnamed: 0,base,undercomplete,sparse,deep,contractive
0,1.0,1.0,0.928571,0.857143,1.0
1,0.647059,0.705882,0.735294,0.647059,0.735294
2,0.777778,0.805556,0.833333,0.777778,0.805556
3,0.705882,0.617647,0.705882,0.411765,0.588235
4,0.566667,0.5,0.566667,0.566667,0.733333


In [44]:
for key in accs:
    print(key, "accuracies:")
    print("- min:", np.min(accs[key]))
    print("- max:", np.max(accs[key]))
    print("- mean:", np.mean(accs[key]))
    print("- median:", np.median(accs[key]))
    print("")

base accuracies:
- min: 0.5666666666666667
- max: 1.0
- mean: 0.7394771241830066
- median: 0.7058823529411765

undercomplete accuracies:
- min: 0.5
- max: 1.0
- mean: 0.7258169934640523
- median: 0.7058823529411765

sparse accuracies:
- min: 0.5666666666666667
- max: 0.9285714285714286
- mean: 0.7539495798319328
- median: 0.7352941176470589

deep accuracies:
- min: 0.4117647058823529
- max: 0.8571428571428571
- mean: 0.6520821661998133
- median: 0.6470588235294118

contractive accuracies:
- min: 0.5882352941176471
- max: 1.0
- mean: 0.772483660130719
- median: 0.7352941176470589



#### Naive Bayesian

In [45]:
from sklearn.naive_bayes import ComplementNB

def naive_bayesian_classifier():
    model = ComplementNB()
    return model

Combine the autoencoders with the classifier: 

In [46]:
# set the variables in the dictionary
accuracies['naive_bayesian'] = {}
accs = accuracies['naive_bayesian']
accs['base'] = []
accs['undercomplete'] = []
accs['sparse'] = []
accs['deep'] = []
accs['contractive'] = []
start_time = datetime.now()

with tf.compat.v1.Session(config=config) as sess:
    # leave out person out validation
    for ident in range(n):

        print("iteration:", ident+1, "of", n, "; time elapsed:", datetime.now()-start_time)

        train_idents = [x for i, x in enumerate(idents) if (i != ident and i != (n-1+ident)%n)]
        validation_idents = [idents[ident]]
        test_idents = [idents[ident-1]]

        # Load data
        xt_train, y_train = get_data_from_idents(path, train_idents, seconds)
        xt_valid, y_valid = get_data_from_idents(path, validation_idents, seconds)
        xt_test, y_test = get_data_from_idents(path, test_idents, seconds)

        # Scale with standard scaler
        sscaler = StandardScaler()
        sscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
        xt_train = sscaler.transform(xt_train)
        xt_valid = sscaler.transform(xt_valid)
        xt_test = sscaler.transform(xt_test)

        # Scale with MinMax to range [0,1]
        mmscaler = MinMaxScaler()
        mmscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
        xt_train = mmscaler.transform(xt_train)
        xt_valid = mmscaler.transform(xt_valid)
        xt_test = mmscaler.transform(xt_test)

        # Base classifier
        model = naive_bayesian_classifier()
    #     print("undercomplete CLASSIFICATION TRAINING: ", ident)
        model.fit(xt_train, y_train)
        curr_acc = np.sum(model.predict(xt_test) == y_test.T) / y_test.shape[0]
        accs['base'].append(curr_acc)

        # AE Training params
        batch_size = 256
        epochs = 100

        # undercomplete AE
        autoencoder, encoded = undercomplete_ae(xt_train, 60, encoded_as_model=True)
    #     print("undercomplete AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = naive_bayesian_classifier()
        xtt_train = encoded.predict(xt_train)
        xtt_test = encoded.predict(xt_test)
    #     print("undercomplete CLASSIFICATION TRAINING: ", ident)
        model.fit(xtt_train, y_train)
        curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
        accs['undercomplete'].append(curr_acc)

        # sparse AE
        autoencoder, encoded = sparse_ae(xt_train, 60, encoded_as_model=True)
    #     print("sparse AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = naive_bayesian_classifier()
        xtt_train = encoded.predict(xt_train)
        xtt_test = encoded.predict(xt_test)
    #     print("sparse CLASSIFICATION TRAINING: ", ident)
        model.fit(xtt_train, y_train)
        curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
        accs['sparse'].append(curr_acc)

        # deep AE
        autoencoder, encoded = deep_ae(xt_train, enc_layers=[512,256], encoding_dim=60, dec_layers=[256,512], encoded_as_model=True)
    #     print("deep AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = naive_bayesian_classifier()
        xtt_train = encoded.predict(xt_train)
        xtt_test = encoded.predict(xt_test)
    #     print("deep CLASSIFICATION TRAINING: ", ident)
        model.fit(xtt_train, y_train)
        curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
        accs['deep'].append(curr_acc)

        # contractive AE
        autoencoder, encoded = contractive_ae(xt_train, 60, encoded_as_model=True)
    #     print("contractive AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = naive_bayesian_classifier()
        xtt_train = encoded.predict(xt_train)
        xtt_test = encoded.predict(xt_test)
    #     print("contractive CLASSIFICATION TRAINING: ", ident)
        model.fit(xtt_train, y_train)
        curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
        accs['contractive'].append(curr_acc)

end_time = datetime.now()
elapsed_time = end_time - start_time
print("Completed!", "Time elapsed:", elapsed_time)

iteration: 1 of 5 ; time elapsed: 0:00:00.005958
iteration: 2 of 5 ; time elapsed: 0:02:10.568915
iteration: 3 of 5 ; time elapsed: 0:04:29.500474
iteration: 4 of 5 ; time elapsed: 0:06:52.040741
iteration: 5 of 5 ; time elapsed: 0:09:30.329344
Completed! Time elapsed: 0:12:25.619719


In [47]:
# print some statistics
pandas.DataFrame.from_dict(accs)

Unnamed: 0,base,undercomplete,sparse,deep,contractive
0,0.5,0.571429,0.571429,0.535714,0.535714
1,0.382353,0.5,0.558824,0.5,0.5
2,0.361111,0.5,0.5,0.555556,0.472222
3,0.588235,0.529412,0.529412,0.529412,0.470588
4,0.433333,0.566667,0.566667,0.533333,0.466667


In [48]:
for key in accs:
    print(key, "accuracies:")
    print("- min:", np.min(accs[key]))
    print("- max:", np.max(accs[key]))
    print("- mean:", np.mean(accs[key]))
    print("- median:", np.median(accs[key]))
    print("")

base accuracies:
- min: 0.3611111111111111
- max: 0.5882352941176471
- mean: 0.4530065359477124
- median: 0.43333333333333335

undercomplete accuracies:
- min: 0.5
- max: 0.5714285714285714
- mean: 0.5335014005602241
- median: 0.5294117647058824

sparse accuracies:
- min: 0.5
- max: 0.5714285714285714
- mean: 0.545266106442577
- median: 0.5588235294117647

deep accuracies:
- min: 0.5
- max: 0.5555555555555556
- mean: 0.5308029878618113
- median: 0.5333333333333333

contractive accuracies:
- min: 0.4666666666666667
- max: 0.5357142857142857
- mean: 0.4890382819794585
- median: 0.4722222222222222



#### XGBoost

In [49]:
from xgboost import XGBClassifier

def XGBoost_classifier():
    model = XGBClassifier(n_estimators = 83)
    return model

Combine the autoencoders with the classifier: 

In [50]:
# set the variables in the dictionary
accuracies['XGBoost'] = {}
accs = accuracies['XGBoost']
accs['base'] = []
accs['undercomplete'] = []
accs['sparse'] = []
accs['deep'] = []
accs['contractive'] = []
start_time = datetime.now()

with tf.compat.v1.Session(config=config) as sess:
    # leave out person out validation
    for ident in range(n):

        print("iteration:", ident+1, "of", n, "; time elapsed:", datetime.now()-start_time)

        train_idents = [x for i, x in enumerate(idents) if (i != ident and i != (n-1+ident)%n)]
        validation_idents = [idents[ident]]
        test_idents = [idents[ident-1]]

        # Load data
        xt_train, y_train = get_data_from_idents(path, train_idents, seconds)
        xt_valid, y_valid = get_data_from_idents(path, validation_idents, seconds)
        xt_test, y_test = get_data_from_idents(path, test_idents, seconds)

        # Scale with standard scaler
        sscaler = StandardScaler()
        sscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
        xt_train = sscaler.transform(xt_train)
        xt_valid = sscaler.transform(xt_valid)
        xt_test = sscaler.transform(xt_test)

        # Scale with MinMax to range [0,1]
        mmscaler = MinMaxScaler()
        mmscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
        xt_train = mmscaler.transform(xt_train)
        xt_valid = mmscaler.transform(xt_valid)
        xt_test = mmscaler.transform(xt_test)

        # Base classifier
        model = XGBoost_classifier()
    #     print("undercomplete CLASSIFICATION TRAINING: ", ident)
        model.fit(xt_train, y_train)
        curr_acc = np.sum(model.predict(xt_test) == y_test.T) / y_test.shape[0]
        accs['base'].append(curr_acc)

        # AE Training params
        batch_size = 256
        epochs = 100

        # undercomplete AE
        autoencoder, encoded = undercomplete_ae(xt_train, 60, encoded_as_model=True)
    #     print("undercomplete AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = XGBoost_classifier()
        xtt_train = encoded.predict(xt_train)
        xtt_test = encoded.predict(xt_test)
    #     print("undercomplete CLASSIFICATION TRAINING: ", ident)
        model.fit(xtt_train, y_train)
        curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
        accs['undercomplete'].append(curr_acc)

        # sparse AE
        autoencoder, encoded = sparse_ae(xt_train, 60, encoded_as_model=True)
    #     print("sparse AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = XGBoost_classifier()
        xtt_train = encoded.predict(xt_train)
        xtt_test = encoded.predict(xt_test)
    #     print("sparse CLASSIFICATION TRAINING: ", ident)
        model.fit(xtt_train, y_train)
        curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
        accs['sparse'].append(curr_acc)

        # deep AE
        autoencoder, encoded = deep_ae(xt_train, enc_layers=[512,256], encoding_dim=60, dec_layers=[256,512], encoded_as_model=True)
    #     print("deep AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = XGBoost_classifier()
        xtt_train = encoded.predict(xt_train)
        xtt_test = encoded.predict(xt_test)
    #     print("deep CLASSIFICATION TRAINING: ", ident)
        model.fit(xtt_train, y_train)
        curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
        accs['deep'].append(curr_acc)

        # contractive AE
        autoencoder, encoded = contractive_ae(xt_train, 60, encoded_as_model=True)
    #     print("contractive AUTOENCODER TRAINING: ", ident)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = XGBoost_classifier()
        xtt_train = encoded.predict(xt_train)
        xtt_test = encoded.predict(xt_test)
    #     print("contractive CLASSIFICATION TRAINING: ", ident)
        model.fit(xtt_train, y_train)
        curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
        accs['contractive'].append(curr_acc)

end_time = datetime.now()
elapsed_time = end_time - start_time
print("Completed!", "Time elapsed:", elapsed_time)

iteration: 1 of 5 ; time elapsed: 0:00:00.006981
iteration: 2 of 5 ; time elapsed: 0:02:24.570867
iteration: 3 of 5 ; time elapsed: 0:04:54.471335
iteration: 4 of 5 ; time elapsed: 0:07:31.323163
iteration: 5 of 5 ; time elapsed: 0:10:13.582053
Completed! Time elapsed: 0:13:16.091685


In [51]:
# print some statistics
pandas.DataFrame.from_dict(accs)

Unnamed: 0,base,undercomplete,sparse,deep,contractive
0,1.0,1.0,1.0,1.0,1.0
1,0.647059,0.735294,0.794118,0.617647,0.617647
2,0.666667,0.722222,0.777778,0.666667,0.583333
3,0.735294,0.617647,0.588235,0.617647,0.529412
4,0.6,0.566667,0.566667,0.566667,0.566667


In [52]:
for key in accs:
    print(key, "accuracies:")
    print("- min:", np.min(accs[key]))
    print("- max:", np.max(accs[key]))
    print("- mean:", np.mean(accs[key]))
    print("- median:", np.median(accs[key]))
    print("")

base accuracies:
- min: 0.6
- max: 1.0
- mean: 0.7298039215686274
- median: 0.6666666666666666

undercomplete accuracies:
- min: 0.5666666666666667
- max: 1.0
- mean: 0.7283660130718955
- median: 0.7222222222222222

sparse accuracies:
- min: 0.5666666666666667
- max: 1.0
- mean: 0.745359477124183
- median: 0.7777777777777778

deep accuracies:
- min: 0.5666666666666667
- max: 1.0
- mean: 0.6937254901960784
- median: 0.6176470588235294

contractive accuracies:
- min: 0.5294117647058824
- max: 1.0
- mean: 0.6594117647058824
- median: 0.5833333333333334



###  Compare Accuracies

Print min, max, mean, median for each clasifier/autoencoder combination:

In [53]:
for classifier in accuracies:
    print(classifier + ":")
    for key in accuracies[classifier]:
        print("  " + key, "accuracies:")
        print("   - min:", np.min(accs[key]))
        print("   - max:", np.max(accs[key]))
        print("   - mean:", np.mean(accs[key]))
        print("   - median:", np.median(accs[key]))
        print("")
    print("\n")

simple_dense:
  base accuracies:
   - min: 0.6
   - max: 1.0
   - mean: 0.7298039215686274
   - median: 0.6666666666666666

  undercomplete accuracies:
   - min: 0.5666666666666667
   - max: 1.0
   - mean: 0.7283660130718955
   - median: 0.7222222222222222

  sparse accuracies:
   - min: 0.5666666666666667
   - max: 1.0
   - mean: 0.745359477124183
   - median: 0.7777777777777778

  deep accuracies:
   - min: 0.5666666666666667
   - max: 1.0
   - mean: 0.6937254901960784
   - median: 0.6176470588235294

  contractive accuracies:
   - min: 0.5294117647058824
   - max: 1.0
   - mean: 0.6594117647058824
   - median: 0.5833333333333334



LSTM:
  base accuracies:
   - min: 0.6
   - max: 1.0
   - mean: 0.7298039215686274
   - median: 0.6666666666666666

  undercomplete accuracies:
   - min: 0.5666666666666667
   - max: 1.0
   - mean: 0.7283660130718955
   - median: 0.7222222222222222

  sparse accuracies:
   - min: 0.5666666666666667
   - max: 1.0
   - mean: 0.745359477124183
   - median: 0

Print all accuracies in table form:

In [54]:
for classifier in accuracies:
    print(classifier + ":")
    print(pandas.DataFrame.from_dict(accuracies[classifier]))
    print("\n")

simple_dense:
       base  undercomplete    sparse      deep  contractive
0  0.642857       0.857143  0.857143  0.571429     0.785714
1  0.676471       0.705882  0.764706  0.529412     0.558824
2  0.694444       0.666667  0.694444  0.638889     0.611111
3  0.411765       0.558824  0.617647  0.500000     0.470588
4  0.500000       0.600000  0.800000  0.500000     0.700000


LSTM:
       base  undercomplete    sparse      deep  contractive
0  0.785714       0.750000  0.857143  0.750000     0.892857
1  0.764706       0.676471  0.705882  0.764706     0.735294
2  0.777778       0.666667  0.694444  0.500000     0.666667
3  0.676471       0.588235  0.588235  0.500000     0.647059
4  0.666667       0.533333  0.633333  0.633333     0.666667


kNN:
       base  undercomplete    sparse      deep  contractive
0  0.607143       0.750000  0.642857  0.714286     0.571429
1  0.588235       0.705882  0.647059  0.735294     0.735294
2  0.611111       0.694444  0.666667  0.555556     0.750000
3  0.588235