# Classifiers
Exploring different classifiers with different autoencoders.

#### Table of contents:  

autoencoders:  
[Undercomplete Autoencoder](#Undercomplete-Autoencoder)  
[Sparse Autoencoder](#Sparse-Autoencoder)  
[Deep Autoencoder](#Deep-Autoencoder)  
[Contractive Autoencoder](#Contractive-Autoencoder)  

classifiers:  
[Simple dense layer](#Simple-dense-layer)  
[LSTM-based classifier](#LSTM-based-classifier)  
[kNN](#kNN)  
[SVC](#SVC)  
[Random Forest](#Random-Forest)  
[XGBoost](#XGBoost)  

In [1]:
import datareader # made by the previous author for reading the collected data
import dataextractor # same as above
import pandas
import numpy as np
import tensorflow as tf
# need to disable eager execution for .get_weights() in contractive autoencoder loss to work
tf.compat.v1.disable_eager_execution()
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Activation, Input
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Conv1D, MaxPooling1D
# required for the contractive autoencoder
import tensorflow.keras.backend as K
import json

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import RandomizedSearchCV

import talos
from talos.utils import lr_normalizer

from tensorflow import keras
from tensorflow.keras import layers, regularizers
import matplotlib.pyplot as plt

tf.keras.backend.set_floatx('float32') # call this, to set keras to use float32 to avoid a warning message
metrics = ['accuracy',
           keras.metrics.TruePositives(),
           keras.metrics.FalsePositives(),
           keras.metrics.TrueNegatives(),
           keras.metrics.FalseNegatives()]

In [2]:
np.random.seed(4)
tf.random.set_seed(2)

This is the original author's code, just copied into separate cells of this jupyter notebook:

In [3]:
def get_busy_vs_relax_timeframes(path, ident, seconds):
    """Returns raw data from either 'on task' or 'relax' time frames and their class (0 or 1).
    TODO: join functions"""

    dataread = datareader.DataReader(path, ident)  # initialize path to data
    data = dataread.read_grc_data()  # read from files
    samp_rate = int(round(len(data[1]) / max(data[0])))
    cog_res = dataread.read_cognitive_load_study(str(ident) + '-primary-extract.txt')

    tasks_data = np.empty((0, seconds*samp_rate))
    tasks_y = np.empty((0, 1))

    busy_n = dataread.get_data_task_timestamps(return_indexes=True)
    relax_n = dataread.get_relax_timestamps(return_indexes=True)

    for i in cog_res['task_number']:
        task_num_table = i - 225  # 0 - 17

        ### task versus relax (1 sample each)
        dataextract = dataextractor.DataExtractor(data[0][busy_n[task_num_table][0]:busy_n[task_num_table][1]],
                                                  data[1][busy_n[task_num_table][0]:busy_n[task_num_table][1]],
                                                  samp_rate)

        dataextract_relax = dataextractor.DataExtractor(data[0][relax_n[task_num_table][0]:relax_n[task_num_table][1]],
                                                        data[1][relax_n[task_num_table][0]:relax_n[task_num_table][1]],
                                                        samp_rate)
        try:
            tasks_data = np.vstack((tasks_data, dataextract.y[-samp_rate * seconds:]))
            tasks_y = np.vstack((tasks_y, 1))
            tasks_data = np.vstack((tasks_data, dataextract_relax.y[-samp_rate * seconds:]))
            tasks_y = np.vstack((tasks_y, 0))
        except ValueError:
            continue
#             print(ident)  # ignore short windows

    return tasks_data, tasks_y


In [4]:
def get_engagement_increase_vs_decrease_timeframes(path, ident, seconds):
    """Returns raw data from either engagement 'increase' or 'decrease' time frames and their class (0 or 1).
    TODO: join functions"""

    dataread = datareader.DataReader(path, ident)  # initialize path to data
    data = dataread.read_grc_data()  # read from files
    samp_rate = int(round(len(data[1]) / max(data[0])))
    cog_res = dataread.read_cognitive_load_study(str(ident) + '-primary-extract.txt')

    tasks_data = np.empty((0, seconds * samp_rate))
    tasks_y = np.empty((0, 1))

    busy_n = dataread.get_data_task_timestamps(return_indexes=True)
    relax_n = dataread.get_relax_timestamps(return_indexes=True)

    for i in cog_res['task_number']:
        task_num_table = i - 225  # 0 - 17

        ### engagement increase / decrease
        if task_num_table == 0:
            continue
        mid = int((relax_n[task_num_table][0] + relax_n[task_num_table][1])/2)
        length = int(samp_rate*30)
        for j in range(10):
            new_end = int(mid-j*samp_rate)

            new_start2 = int(mid+j*samp_rate)

            dataextract_decrease = dataextractor.DataExtractor(data[0][new_end - length:new_end],
                                                               data[1][new_end-length:new_end],
                                                               samp_rate)

            dataextract_increase = dataextractor.DataExtractor(data[0][new_start2: new_start2 + length],
                                                               data[1][new_start2: new_start2 + length], samp_rate)

            try:
                tasks_data = np.vstack((tasks_data, dataextract_increase.y))
                tasks_y = np.vstack((tasks_y, 1))
                tasks_data = np.vstack((tasks_data, dataextract_decrease.y))
                tasks_y = np.vstack((tasks_y, 0))
            except ValueError:
                print(ident)  # ignore short windows

    return tasks_data, tasks_y


In [5]:
def get_task_complexities_timeframes(path, ident, seconds):
    """Returns raw data along with task complexity class.
    TODO: join functions. Add parameter to choose different task types and complexities"""

    dataread = datareader.DataReader(path, ident)  # initialize path to data
    data = dataread.read_grc_data()  # read from files
    samp_rate = int(round(len(data[1]) / max(data[0])))
    cog_res = dataread.read_cognitive_load_study(str(ident) + '-primary-extract.txt')

    tasks_data = np.empty((0, seconds*samp_rate))
    tasks_y = np.empty((0, 1))

    busy_n = dataread.get_data_task_timestamps(return_indexes=True)
    relax_n = dataread.get_relax_timestamps(return_indexes=True)

    for i in cog_res['task_number']:
        task_num_table = i - 225  # 0 - 17

        ### task complexity classification
        if cog_res['task_complexity'][task_num_table] == 'medium':
            continue
        # if cog_res['task_label'][task_num_table] == 'FA' or cog_res['task_label'][task_num_table] == 'HP':
        #     continue
        if cog_res['task_label'][task_num_table] != 'NC':
            continue
        map_compl = {
            'low': 0,
            'medium': 2,
            'high': 1
        }
        for j in range(10):
            new_end = int(busy_n[task_num_table][1] - j * samp_rate)
            new_start = int(new_end - samp_rate*30)
            dataextract = dataextractor.DataExtractor(data[0][new_start:new_end],
                                                      data[1][new_start:new_end], samp_rate)
            try:
                tasks_data = np.vstack((tasks_data, dataextract.y))
                tasks_y = np.vstack((tasks_y, map_compl.get(cog_res['task_complexity'][task_num_table])))
            except ValueError:
                print(ident)

    return tasks_data, tasks_y


In [6]:
def get_TLX_timeframes(path, ident, seconds):
    """Returns raw data along with task load index class.
    TODO: join functions. Add parameter to choose different task types and complexities"""

    dataread = datareader.DataReader(path, ident)  # initialize path to data
    data = dataread.read_grc_data()  # read from files
    samp_rate = int(round(len(data[1]) / max(data[0])))
    cog_res = dataread.read_cognitive_load_study(str(ident) + '-primary-extract.txt')

    tasks_data = np.empty((0, seconds*samp_rate))
    tasks_y = np.empty((0, 1))

    busy_n = dataread.get_data_task_timestamps(return_indexes=True)
    relax_n = dataread.get_relax_timestamps(return_indexes=True)

    for i in cog_res['task_number']:
        task_num_table = i - 225  # 0 - 17

        ### task load index
        if cog_res['task_complexity'][task_num_table] == 'medium' or cog_res['task_label'][task_num_table] != 'PT':
            continue
        for j in range(10):
            new_end = int(busy_n[task_num_table][1] - j * samp_rate)
            new_start = int(new_end - samp_rate*30)
            dataextract = dataextractor.DataExtractor(data[0][new_start:new_end],
                                                      data[1][new_start:new_end], samp_rate)
            try:
                tasks_data = np.vstack((tasks_data, dataextract.y))
                tasks_y = np.vstack((tasks_y, cog_res['task_load_index'][task_num_table]))
            except ValueError:
                print(ident)

    return tasks_data, tasks_y


In [7]:
def get_data_from_idents(path, idents, seconds):
    """Go through all user data and take out windows of only <seconds> long time frames,
    along with the given class (from 'divide_each_task' function).
    """
    samp_rate = 43  # hard-coded sample rate
    data, ys = np.empty((0, samp_rate*seconds)), np.empty((0, 1))
    for i in idents:
        x, y = get_busy_vs_relax_timeframes(path, i, seconds) # either 'get_busy_vs_relax_timeframes',
        # get_engagement_increase_vs_decrease_timeframes, get_task_complexities_timeframes or get_TLX_timeframes
        # TODO: ^ modify, so that different functions can be accessible by parameter
        data = np.vstack((data, x))
        ys = np.vstack((ys, y))
    return data, ys


In [8]:
def model_train(model, x_train, y_train, batch_size, epochs, x_valid, y_valid, x_test, y_test):
    """Train model with the given training, validation, and test set, with appropriate batch size and # epochs."""
    epoch_data = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_valid, y_valid), verbose=0)
    score = model.evaluate(x_test, y_test, batch_size=batch_size)
    acc = score[1]
    score = score[0]
    return score, acc, epoch_data


In [9]:
def sequence_padding(x, maxlen):
    """Pad sequences (all have to be same length)."""
    print('Pad sequences (samples x time)')
    return sequence.pad_sequences(x, maxlen=maxlen, dtype=np.float)


## Autoencoders

#### Undercomplete Autoencoder  
from https://blog.keras.io/building-autoencoders-in-keras.html

In [10]:
def undercomplete_ae(x, encoding_dim=64, encoded_as_model=False):
    # Simplest possible autoencoder from https://blog.keras.io/building-autoencoders-in-keras.html

    # this is our input placeholder
    input_data = Input(shape=x[0].shape, name="input")
    dropout = Dropout(0.25, name="dropout")(input_data)
    # "encoded" is the encoded representation of the input
    encoded = Dense(encoding_dim, activation='relu', name="encoded")(dropout)
    
    # "decoded" is the lossy reconstruction of the input
    decoded = Dense(x[0].shape[0], activation='sigmoid', name="decoded")(encoded)

    autoencoder = Model(input_data, decoded)
    
    # compile the model
    autoencoder.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics)
    
    # if return encoder in the encoded variable
    if encoded_as_model:
        encoded = Model(input_data, encoded)
    
    return autoencoder, encoded

#### Sparse Autoencoder  
from https://blog.keras.io/building-autoencoders-in-keras.html

In [11]:
def sparse_ae(x, encoding_dim=64, encoded_as_model=False):
    # Simplest possible autoencoder from https://blog.keras.io/building-autoencoders-in-keras.html

    # this is our input placeholder
    input_data = Input(shape=x[0].shape, name="input")
    dropout = Dropout(0.25, name="dropout") (input_data)
    # "encoded" is the encoded representation of the input
    # add a sparsity constraint
    encoded = Dense(encoding_dim, activation='relu', name="encoded",
                    activity_regularizer=regularizers.l1(10e-5))(dropout)
    
    # "decoded" is the lossy reconstruction of the input
    decoded = Dense(x[0].shape[0], activation='sigmoid', name="decoded")(encoded)

    # this model maps an input to its reconstruction
    autoencoder = Model(input_data, decoded, name="sparse_ae")
    
    # compile the model
    autoencoder.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics)
    
    # if return encoder in the encoded variable
    if encoded_as_model:
        encoded = Model(input_data, encoded)
    
    return autoencoder, encoded

#### Deep Autoencoder  
from https://blog.keras.io/building-autoencoders-in-keras.html

In [12]:
def deep_ae(x, enc_layers=[512,256], encoding_dim=64, dec_layers=[256,512], encoded_as_model=False):
    # From https://www.tensorflow.org/guide/keras/functional#use_the_same_graph_of_layers_to_define_multiple_models
    input_data = keras.Input(shape=x[0].shape, name="normalized_signal")
    model = Dropout(0.25, name="dropout", autocast=False)(input_data)
    for i in enumerate(enc_layers):
        model = Dense(i[1], activation="relu", name="dense_enc_" + str(i[0]+1))(model)
    encoded_output = Dense(encoding_dim, activation="relu", name="encoded_signal")(model)

    encoded = encoded_output

    model = layers.Dense(dec_layers[0], activation="sigmoid", name="dense_dec_1")(encoded_output)
    for i in enumerate(dec_layers[1:]):
        model = Dense(i[1], activation="sigmoid", name="dense_dec_" + str(i[0]+2))(model)
    decoded_output = Dense(x[0].shape[0], activation="sigmoid", name="reconstructed_signal")(model)
    
    autoencoder = Model(input_data, decoded_output, name="autoencoder")
    
    # compile the model
    autoencoder.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics)
    
    # if return encoder in the encoded variable
    if encoded_as_model:
        encoded = Model(input_data, encoded)

    return autoencoder, encoded

#### Contractive Autoencoder
From: https://wiseodd.github.io/techblog/2016/12/05/contractive-autoencoder/

In [13]:
# define a function to be able to access the autoencoder in the loss funciton
def loss_with_params(autoencoder):
    # loss function from https://wiseodd.github.io/techblog/2016/12/05/contractive-autoencoder/
    def contractive_loss(y_pred, y_true):

        lam = 1e-4
        mse = K.mean(K.square(y_true - y_pred), axis=1)

        W = K.variable(value=autoencoder.get_layer('encoded').get_weights()[0])  # N x N_hidden
        W = K.transpose(W)  # N_hidden x N
        h = autoencoder.get_layer('encoded').output
        dh = h * (1 - h)  # N_batch x N_hidden

        # N_batch x N_hidden * N_hidden x 1 = N_batch x 1
        contractive = lam * K.sum(dh**2 * K.sum(W**2, axis=1), axis=1)

        return mse + contractive
    return contractive_loss

In [14]:
def contractive_ae(x, encoding_dim=64, encoded_as_model=False):
    # From https://wiseodd.github.io/techblog/2016/12/05/contractive-autoencoder/

    input_data = Input(shape=x[0].shape, name="input")
    encoded = Dense(encoding_dim, activation='sigmoid', name='encoded')(input_data)
    outputs = Dense(x[0].shape[0], activation='linear', name="output")(encoded)

    autoencoder = Model(input_data, outputs, name="autoencoder")
    
    # compile the model
    autoencoder.compile(optimizer='adam', loss=loss_with_params(autoencoder), metrics=metrics)
    
    # if return encoder in the encoded variable
    if encoded_as_model:
        encoded = Model(input_data, encoded)
    
    return autoencoder, encoded

## Classifiers

Initialize variables:

In [15]:
# initialize a dictionary to store accuracies for comparison
accuracies = {}

# used for reading the data into an array
seconds = 30  # time window length
idents = ['2gu87', 'iz2ps', '1mpau', '7dwjy', '7swyk', '94mnx', 'bd47a', 'c24ur', 'ctsax', 'dkhty', 'e4gay',
              'ef5rq', 'f1gjp', 'hpbxa', 'pmyfl', 'r89k1', 'tn4vl', 'td5pr', 'gyqu9', 'fzchw', 'l53hg', '3n2f9',
              '62i9y']
path = '../../../StudyData/'

# change to len(idents) at the end to use all the data
n = 5 #len(idents)

#### Simple dense layer

Define the classifier:

In [16]:
def dense_classifier(model, params):
    
    model = Dropout(params['dropout'], name='dropout_cl')(model)
    model = Dense(params['hidden_size'], activation=params['activation'], name='dense_cl1')(model)
    model = Dense(1, activation=params['last_activation'], name='dense_cl2')(model)

    return model

In [17]:
params = {
    'dropout': 0.24,
    'optimizer': 'Adam',
    'hidden_size': 64,
    'loss': 'binary_crossentropy',
    'last_activation': 'sigmoid',
    'activation': 'softmax',
    'batch_size': 256,
    'epochs': 100
}

Combine the autoencoders with the classifier: 

In [18]:
# set the variables in the dictionary
accuracies['simple_dense'] = {}
accs = accuracies['simple_dense']
accs['undercomplete'] = []
accs['sparse'] = []
accs['deep'] = []
accs['contractive'] = []

# leave out person out validation
for ident in range(n):
    
    print("iteration:", ident+1, "of", n)
    
    train_idents = [x for i, x in enumerate(idents) if (i != ident and i != (n-1+ident)%n)]
    validation_idents = [idents[ident]]
    test_idents = [idents[ident-1]]

    # Load data
    xt_train, y_train = get_data_from_idents(path, train_idents, seconds)
    xt_valid, y_valid = get_data_from_idents(path, validation_idents, seconds)
    xt_test, y_test = get_data_from_idents(path, test_idents, seconds)

    # Scale with standard scaler
    sscaler = StandardScaler()
    sscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
    xt_train = sscaler.transform(xt_train)
    xt_valid = sscaler.transform(xt_valid)
    xt_test = sscaler.transform(xt_test)

    # Scale with MinMax to range [0,1]
    mmscaler = MinMaxScaler()
    mmscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
    xt_train = mmscaler.transform(xt_train)
    xt_valid = mmscaler.transform(xt_valid)
    xt_test = mmscaler.transform(xt_test)

    # AE Training params
    batch_size = 256
    epochs = 100

    # undercomplete AE
    autoencoder, encoded = undercomplete_ae(xt_train, 60)
#     print("undercomplete AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = dense_classifier(encoded, params)
    model = Model(inputs=autoencoder.inputs, outputs=model)
    model.compile(loss=params['loss'],
                  optimizer=params['optimizer'],
                  metrics=metrics)
#     print("undercomplete CLASSIFICATION TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                           xt_valid, y_valid, xt_test, y_test)
    accs['undercomplete'].append(curr_acc)
    
    # sparse AE
    autoencoder, encoded = sparse_ae(xt_train, 60)
#     print("sparse AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = dense_classifier(encoded, params)
    model = Model(inputs=autoencoder.inputs, outputs=model)
    model.compile(loss=params['loss'],
                  optimizer=params['optimizer'],
                  metrics=metrics)
#     print("sparse CLASSIFICATION TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                           xt_valid, y_valid, xt_test, y_test)
    accs['sparse'].append(curr_acc)

    # deep AE
    autoencoder, encoded = deep_ae(xt_train, enc_layers=[512,256], encoding_dim=60, dec_layers=[256,512])
#     print("deep AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = dense_classifier(encoded, params)
    model = Model(inputs=autoencoder.inputs, outputs=model)
    model.compile(loss=params['loss'],
                  optimizer=params['optimizer'],
                  metrics=metrics)
#     print("deep CLASSIFICATION TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                           xt_valid, y_valid, xt_test, y_test)
    accs['deep'].append(curr_acc)
    
    # contractive AE
    autoencoder, encoded = contractive_ae(xt_train, 60)
#     print("contractive AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = dense_classifier(encoded, params)
    model = Model(inputs=autoencoder.inputs, outputs=model)
    model.compile(loss=params['loss'],
                  optimizer=params['optimizer'],
                  metrics=metrics)
#     print("contractive CLASSIFICATION TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                           xt_valid, y_valid, xt_test, y_test)
    accs['contractive'].append(curr_acc)

iteration: 1 of 5
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
iteration: 2 of 5
iteration: 3 of 5
iteration: 4 of 5
iteration: 5 of 5


In [19]:
# print some statistics
pandas.DataFrame.from_dict(accs)

Unnamed: 0,undercomplete,sparse,deep,contractive
0,0.821429,0.821429,0.785714,0.785714
1,0.735294,0.735294,0.676471,0.588235
2,0.638889,0.638889,0.638889,0.472222
3,0.558824,0.588235,0.588235,0.647059
4,0.566667,0.6,0.5,0.566667


In [20]:
for key in accs:
    print(key, "accuracies:")
    print("- min:", np.min(accs[key]))
    print("- max:", np.max(accs[key]))
    print("- mean:", np.mean(accs[key]))
    print("- median:", np.median(accs[key]))
    print("")

undercomplete accuracies:
- min: 0.5588235
- max: 0.8214286
- mean: 0.66422033
- median: 0.6388889

sparse accuracies:
- min: 0.5882353
- max: 0.8214286
- mean: 0.6767694
- median: 0.6388889

deep accuracies:
- min: 0.5
- max: 0.78571427
- mean: 0.63786185
- median: 0.6388889

contractive accuracies:
- min: 0.4722222
- max: 0.78571427
- mean: 0.6119794
- median: 0.5882353



#### LSTM-based classifier  
based on the original author's code

Optimize hyperparameters with talos:

In [21]:
def LSTM_classifier(model, params):

    model = layers.Reshape((-1, 1), input_shape=(model.shape), name='reshape_cl') (model)

    model = layers.Dropout(params['dropout'], name='dropout_cl1') (model)
    
    model = Conv1D(params['filters'],
                     params['kernel_size'],
                     padding='valid',
                     activation=params['activation'],
                     strides=params['strides'],
                     name='conv1d_cl1') (model)
    
    model = MaxPooling1D(pool_size=params['pool_size'], name='maxpool_cl1') (model)
    
    model = Conv1D(params['filters'],
                     params['kernel_size'],
                     padding='valid',
                     activation=params['activation'],
                     strides=params['strides'],
                     name='conv1d_cl2') (model)
    
    model = MaxPooling1D(pool_size=params['pool_size'], name='maxpool_cl2') (model)
    
    model = layers.Dropout(params['dropout'], name='dropout_cl2') (model)

    model = LSTM(params['lstm_output_size'], activation='sigmoid', name='lstm_cl') (model)

    model = Dense(1, activation=params['last_activation'], name='dense_cl') (model)
    return model

In [22]:
params = {
    'kernel_size': 8,
    'filters': 3,
    'strides': 2,
    'pool_size': 2,
    'dropout': 0.09,
    'optimizer': 'Nadam',
    'loss': 'binary_crossentropy',
    'activation': 'relu',
    'last_activation': 'sigmoid',
    'lstm_output_size': 256,
    'batch_size': 64,
    'epochs': 100
}

Combine the autoencoders with the classifier: 

In [23]:
# set the variables in the dictionary
accuracies['LSTM'] = {}
accs = accuracies['LSTM']
accs['undercomplete'] = []
accs['sparse'] = []
accs['deep'] = []
accs['contractive'] = []

# leave out person out validation
for ident in range(n):
    
    print("iteration:", ident+1, "of", n)
    
    train_idents = [x for i, x in enumerate(idents) if (i != ident and i != (n-1+ident)%n)]
    validation_idents = [idents[ident]]
    test_idents = [idents[ident-1]]

    # Load data
    xt_train, y_train = get_data_from_idents(path, train_idents, seconds)
    xt_valid, y_valid = get_data_from_idents(path, validation_idents, seconds)
    xt_test, y_test = get_data_from_idents(path, test_idents, seconds)

    # Scale with standard scaler
    sscaler = StandardScaler()
    sscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
    xt_train = sscaler.transform(xt_train)
    xt_valid = sscaler.transform(xt_valid)
    xt_test = sscaler.transform(xt_test)

    # Scale with MinMax to range [0,1]
    mmscaler = MinMaxScaler()
    mmscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
    xt_train = mmscaler.transform(xt_train)
    xt_valid = mmscaler.transform(xt_valid)
    xt_test = mmscaler.transform(xt_test)

    # AE Training params
    batch_size = 256
    epochs = 100

    # undercomplete AE
    autoencoder, encoded = undercomplete_ae(xt_train, 60)
#     print("undercomplete AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = LSTM_classifier(encoded, params)
    model = Model(inputs=autoencoder.inputs, outputs=model)
    model.compile(loss=params['loss'],
                  optimizer=params['optimizer'],
                  metrics=metrics)
#     print("undercomplete CLASSIFICATION TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                           xt_valid, y_valid, xt_test, y_test)
    accs['undercomplete'].append(curr_acc)
    
    # sparse AE
    autoencoder, encoded = sparse_ae(xt_train, 60)
#     print("sparse AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = LSTM_classifier(encoded, params)
    model = Model(inputs=autoencoder.inputs, outputs=model)
    model.compile(loss=params['loss'],
                  optimizer=params['optimizer'],
                  metrics=metrics)
#     print("sparse CLASSIFICATION TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                           xt_valid, y_valid, xt_test, y_test)
    accs['sparse'].append(curr_acc)

    # deep AE
    autoencoder, encoded = deep_ae(xt_train, enc_layers=[512,256], encoding_dim=60, dec_layers=[256,512])
#     print("deep AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = LSTM_classifier(encoded, params)
    model = Model(inputs=autoencoder.inputs, outputs=model)
    model.compile(loss=params['loss'],
                  optimizer=params['optimizer'],
                  metrics=metrics)
#     print("deep CLASSIFICATION TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                           xt_valid, y_valid, xt_test, y_test)
    accs['deep'].append(curr_acc)
    
    # contractive AE
    autoencoder, encoded = contractive_ae(xt_train, 60)
#     print("contractive AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = LSTM_classifier(encoded, params)
    model = Model(inputs=autoencoder.inputs, outputs=model)
    model.compile(loss=params['loss'],
                  optimizer=params['optimizer'],
                  metrics=metrics)
#     print("contractive CLASSIFICATION TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                           xt_valid, y_valid, xt_test, y_test)
    accs['contractive'].append(curr_acc)

iteration: 1 of 5
iteration: 2 of 5
iteration: 3 of 5
iteration: 4 of 5
iteration: 5 of 5


In [24]:
# print some statistics
pandas.DataFrame.from_dict(accs)

Unnamed: 0,undercomplete,sparse,deep,contractive
0,0.785714,0.892857,0.857143,0.857143
1,0.558824,0.735294,0.529412,0.764706
2,0.694444,0.694444,0.694444,0.75
3,0.588235,0.529412,0.5,0.588235
4,0.666667,0.633333,0.5,0.6


In [25]:
for key in accs:
    print(key, "accuracies:")
    print("- min:", np.min(accs[key]))
    print("- max:", np.max(accs[key]))
    print("- mean:", np.mean(accs[key]))
    print("- median:", np.median(accs[key]))
    print("")

undercomplete accuracies:
- min: 0.5588235
- max: 0.78571427
- mean: 0.6587769
- median: 0.6666667

sparse accuracies:
- min: 0.5294118
- max: 0.89285713
- mean: 0.6970681
- median: 0.6944444

deep accuracies:
- min: 0.5
- max: 0.85714287
- mean: 0.61619985
- median: 0.5294118

contractive accuracies:
- min: 0.5882353
- max: 0.85714287
- mean: 0.7120169
- median: 0.75



#### kNN

In [26]:
from sklearn.neighbors import KNeighborsClassifier

def KNN_classifier():
    model = KNeighborsClassifier(p=3, n_neighbors=7, metric='cosine')
    return model

Combine the autoencoders with the classifier: 

In [27]:
# set the variables in the dictionary
accuracies['kNN'] = {}
accs = accuracies['kNN']
accs['undercomplete'] = []
accs['sparse'] = []
accs['deep'] = []
accs['contractive'] = []

# leave out person out validation
for ident in range(n):
    
    print("iteration:", ident+1, "of", n)
    
    train_idents = [x for i, x in enumerate(idents) if (i != ident and i != (n-1+ident)%n)]
    validation_idents = [idents[ident]]
    test_idents = [idents[ident-1]]

    # Load data
    xt_train, y_train = get_data_from_idents(path, train_idents, seconds)
    xt_valid, y_valid = get_data_from_idents(path, validation_idents, seconds)
    xt_test, y_test = get_data_from_idents(path, test_idents, seconds)

    # Scale with standard scaler
    sscaler = StandardScaler()
    sscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
    xt_train = sscaler.transform(xt_train)
    xt_valid = sscaler.transform(xt_valid)
    xt_test = sscaler.transform(xt_test)

    # Scale with MinMax to range [0,1]
    mmscaler = MinMaxScaler()
    mmscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
    xt_train = mmscaler.transform(xt_train)
    xt_valid = mmscaler.transform(xt_valid)
    xt_test = mmscaler.transform(xt_test)

    # AE Training params
    batch_size = 256
    epochs = 100

    # undercomplete AE
    autoencoder, encoded = undercomplete_ae(xt_train, 60, encoded_as_model=True)
#     print("undercomplete AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = KNN_classifier()
    xtt_train = encoded.predict(xt_train)
    xtt_test = encoded.predict(xt_test)
#     print("undercomplete CLASSIFICATION TRAINING: ", ident)
    model.fit(xtt_train, y_train)
    curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
    accs['undercomplete'].append(curr_acc)
    
    # sparse AE
    autoencoder, encoded = sparse_ae(xt_train, 60, encoded_as_model=True)
#     print("sparse AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = KNN_classifier()
    xtt_train = encoded.predict(xt_train)
    xtt_test = encoded.predict(xt_test)
#     print("sparse CLASSIFICATION TRAINING: ", ident)
    model.fit(xtt_train, y_train)
    curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
    accs['sparse'].append(curr_acc)

    # deep AE
    autoencoder, encoded = deep_ae(xt_train, enc_layers=[512,256], encoding_dim=60, dec_layers=[256,512], encoded_as_model=True)
#     print("deep AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = KNN_classifier()
    xtt_train = encoded.predict(xt_train)
    xtt_test = encoded.predict(xt_test)
#     print("deep CLASSIFICATION TRAINING: ", ident)
    model.fit(xtt_train, y_train)
    curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
    accs['deep'].append(curr_acc)
    
    # contractive AE
    autoencoder, encoded = contractive_ae(xt_train, 60, encoded_as_model=True)
#     print("contractive AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = KNN_classifier()
    xtt_train = encoded.predict(xt_train)
    xtt_test = encoded.predict(xt_test)
#     print("contractive CLASSIFICATION TRAINING: ", ident)
    model.fit(xtt_train, y_train)
    curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
    accs['contractive'].append(curr_acc)

iteration: 1 of 5
iteration: 2 of 5
iteration: 3 of 5
iteration: 4 of 5
iteration: 5 of 5


In [28]:
# print some statistics
pandas.DataFrame.from_dict(accs)

Unnamed: 0,undercomplete,sparse,deep,contractive
0,0.714286,0.821429,0.714286,0.607143
1,0.676471,0.676471,0.676471,0.764706
2,0.694444,0.638889,0.611111,0.666667
3,0.588235,0.588235,0.382353,0.617647
4,0.733333,0.7,0.566667,0.633333


In [29]:
for key in accs:
    print(key, "accuracies:")
    print("- min:", np.min(accs[key]))
    print("- max:", np.max(accs[key]))
    print("- mean:", np.mean(accs[key]))
    print("- median:", np.median(accs[key]))
    print("")

undercomplete accuracies:
- min: 0.5882352941176471
- max: 0.7333333333333333
- mean: 0.6813538748832866
- median: 0.6944444444444444

sparse accuracies:
- min: 0.5882352941176471
- max: 0.8214285714285714
- mean: 0.6850046685340804
- median: 0.6764705882352942

deep accuracies:
- min: 0.38235294117647056
- max: 0.7142857142857143
- mean: 0.5901774042950514
- median: 0.6111111111111112

contractive accuracies:
- min: 0.6071428571428571
- max: 0.7647058823529411
- mean: 0.6578991596638655
- median: 0.6333333333333333



####  SVC

In [30]:
from sklearn.svm import SVC

def SVC_classifier():
    model = SVC(kernel='rbf', C=1.5)
    return model

Combine the autoencoders with the classifier: 

In [31]:
# set the variables in the dictionary
accuracies['SVC'] = {}
accs = accuracies['SVC']
accs['undercomplete'] = []
accs['sparse'] = []
accs['deep'] = []
accs['contractive'] = []

# leave out person out validation
for ident in range(n):
    
    print("iteration:", ident+1, "of", n)
    
    train_idents = [x for i, x in enumerate(idents) if (i != ident and i != (n-1+ident)%n)]
    validation_idents = [idents[ident]]
    test_idents = [idents[ident-1]]

    # Load data
    xt_train, y_train = get_data_from_idents(path, train_idents, seconds)
    xt_valid, y_valid = get_data_from_idents(path, validation_idents, seconds)
    xt_test, y_test = get_data_from_idents(path, test_idents, seconds)

    # Scale with standard scaler
    sscaler = StandardScaler()
    sscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
    xt_train = sscaler.transform(xt_train)
    xt_valid = sscaler.transform(xt_valid)
    xt_test = sscaler.transform(xt_test)

    # Scale with MinMax to range [0,1]
    mmscaler = MinMaxScaler()
    mmscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
    xt_train = mmscaler.transform(xt_train)
    xt_valid = mmscaler.transform(xt_valid)
    xt_test = mmscaler.transform(xt_test)

    # AE Training params
    batch_size = 256
    epochs = 100

    # undercomplete AE
    autoencoder, encoded = undercomplete_ae(xt_train, 60, encoded_as_model=True)
#     print("undercomplete AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = SVC_classifier()
    xtt_train = encoded.predict(xt_train)
    xtt_test = encoded.predict(xt_test)
#     print("undercomplete CLASSIFICATION TRAINING: ", ident)
    model.fit(xtt_train, y_train)
    curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
    accs['undercomplete'].append(curr_acc)
    
    # sparse AE
    autoencoder, encoded = sparse_ae(xt_train, 60, encoded_as_model=True)
#     print("sparse AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = SVC_classifier()
    xtt_train = encoded.predict(xt_train)
    xtt_test = encoded.predict(xt_test)
#     print("sparse CLASSIFICATION TRAINING: ", ident)
    model.fit(xtt_train, y_train)
    curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
    accs['sparse'].append(curr_acc)

    # deep AE
    autoencoder, encoded = deep_ae(xt_train, enc_layers=[512,256], encoding_dim=60, dec_layers=[256,512], encoded_as_model=True)
#     print("deep AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = SVC_classifier()
    xtt_train = encoded.predict(xt_train)
    xtt_test = encoded.predict(xt_test)
#     print("deep CLASSIFICATION TRAINING: ", ident)
    model.fit(xtt_train, y_train)
    curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
    accs['deep'].append(curr_acc)
    
    # contractive AE
    autoencoder, encoded = contractive_ae(xt_train, 60, encoded_as_model=True)
#     print("contractive AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = SVC_classifier()
    xtt_train = encoded.predict(xt_train)
    xtt_test = encoded.predict(xt_test)
#     print("contractive CLASSIFICATION TRAINING: ", ident)
    model.fit(xtt_train, y_train)
    curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
    accs['contractive'].append(curr_acc)

iteration: 1 of 5
iteration: 2 of 5
iteration: 3 of 5
iteration: 4 of 5
iteration: 5 of 5


In [32]:
# print some statistics
pandas.DataFrame.from_dict(accs)

Unnamed: 0,undercomplete,sparse,deep,contractive
0,0.642857,0.678571,0.607143,0.714286
1,0.529412,0.529412,0.558824,0.617647
2,0.5,0.694444,0.611111,0.666667
3,0.5,0.5,0.441176,0.5
4,0.5,0.733333,0.6,0.6


In [33]:
for key in accs:
    print(key, "accuracies:")
    print("- min:", np.min(accs[key]))
    print("- max:", np.max(accs[key]))
    print("- mean:", np.mean(accs[key]))
    print("- median:", np.median(accs[key]))
    print("")

undercomplete accuracies:
- min: 0.5
- max: 0.6428571428571429
- mean: 0.5344537815126051
- median: 0.5

sparse accuracies:
- min: 0.5
- max: 0.7333333333333333
- mean: 0.6271521942110179
- median: 0.6785714285714286

deep accuracies:
- min: 0.4411764705882353
- max: 0.6111111111111112
- mean: 0.5636507936507937
- median: 0.6

contractive accuracies:
- min: 0.5
- max: 0.7142857142857143
- mean: 0.6197198879551821
- median: 0.6176470588235294



#### Random Forest

In [34]:
from sklearn.ensemble import RandomForestClassifier
def random_forest_classifier():
    model = RandomForestClassifier(n_estimators = 250,
                                     min_samples_split = 10,
                                     min_samples_leaf = 4,
                                     max_features = 'auto',
                                     max_depth = 90,
                                     bootstrap = True)
    return model

Combine the autoencoders with the classifier: 

In [35]:
# set the variables in the dictionary
accuracies['random_forest'] = {}
accs = accuracies['random_forest']
accs['undercomplete'] = []
accs['sparse'] = []
accs['deep'] = []
accs['contractive'] = []

# leave out person out validation
for ident in range(n):
    
    print("iteration:", ident+1, "of", n)
    
    train_idents = [x for i, x in enumerate(idents) if (i != ident and i != (n-1+ident)%n)]
    validation_idents = [idents[ident]]
    test_idents = [idents[ident-1]]

    # Load data
    xt_train, y_train = get_data_from_idents(path, train_idents, seconds)
    xt_valid, y_valid = get_data_from_idents(path, validation_idents, seconds)
    xt_test, y_test = get_data_from_idents(path, test_idents, seconds)

    # Scale with standard scaler
    sscaler = StandardScaler()
    sscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
    xt_train = sscaler.transform(xt_train)
    xt_valid = sscaler.transform(xt_valid)
    xt_test = sscaler.transform(xt_test)

    # Scale with MinMax to range [0,1]
    mmscaler = MinMaxScaler()
    mmscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
    xt_train = mmscaler.transform(xt_train)
    xt_valid = mmscaler.transform(xt_valid)
    xt_test = mmscaler.transform(xt_test)

    # AE Training params
    batch_size = 256
    epochs = 100

    # undercomplete AE
    autoencoder, encoded = undercomplete_ae(xt_train, 60, encoded_as_model=True)
#     print("undercomplete AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = random_forest_classifier()
    xtt_train = encoded.predict(xt_train)
    xtt_test = encoded.predict(xt_test)
#     print("undercomplete CLASSIFICATION TRAINING: ", ident)
    model.fit(xtt_train, y_train)
    curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
    accs['undercomplete'].append(curr_acc)
    
    # sparse AE
    autoencoder, encoded = sparse_ae(xt_train, 60, encoded_as_model=True)
#     print("sparse AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = random_forest_classifier()
    xtt_train = encoded.predict(xt_train)
    xtt_test = encoded.predict(xt_test)
#     print("sparse CLASSIFICATION TRAINING: ", ident)
    model.fit(xtt_train, y_train)
    curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
    accs['sparse'].append(curr_acc)

    # deep AE
    autoencoder, encoded = deep_ae(xt_train, enc_layers=[512,256], encoding_dim=60, dec_layers=[256,512], encoded_as_model=True)
#     print("deep AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = random_forest_classifier()
    xtt_train = encoded.predict(xt_train)
    xtt_test = encoded.predict(xt_test)
#     print("deep CLASSIFICATION TRAINING: ", ident)
    model.fit(xtt_train, y_train)
    curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
    accs['deep'].append(curr_acc)
    
    # contractive AE
    autoencoder, encoded = contractive_ae(xt_train, 60, encoded_as_model=True)
#     print("contractive AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = random_forest_classifier()
    xtt_train = encoded.predict(xt_train)
    xtt_test = encoded.predict(xt_test)
#     print("contractive CLASSIFICATION TRAINING: ", ident)
    model.fit(xtt_train, y_train)
    curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
    accs['contractive'].append(curr_acc)

iteration: 1 of 5
iteration: 2 of 5
iteration: 3 of 5
iteration: 4 of 5
iteration: 5 of 5


In [36]:
# print some statistics
pandas.DataFrame.from_dict(accs)

Unnamed: 0,undercomplete,sparse,deep,contractive
0,1.0,0.964286,0.964286,1.0
1,0.676471,0.794118,0.5,0.647059
2,0.833333,0.722222,0.722222,0.777778
3,0.5,0.558824,0.5,0.529412
4,0.5,0.566667,0.433333,0.6


In [37]:
for key in accs:
    print(key, "accuracies:")
    print("- min:", np.min(accs[key]))
    print("- max:", np.max(accs[key]))
    print("- mean:", np.mean(accs[key]))
    print("- median:", np.median(accs[key]))
    print("")

undercomplete accuracies:
- min: 0.5
- max: 1.0
- mean: 0.7019607843137254
- median: 0.6764705882352942

sparse accuracies:
- min: 0.5588235294117647
- max: 0.9642857142857143
- mean: 0.7212231559290382
- median: 0.7222222222222222

deep accuracies:
- min: 0.43333333333333335
- max: 0.9642857142857143
- mean: 0.6239682539682541
- median: 0.5

contractive accuracies:
- min: 0.5294117647058824
- max: 1.0
- mean: 0.7108496732026144
- median: 0.6470588235294118



#### Naive Bayesian

In [38]:
from sklearn.naive_bayes import ComplementNB

def naive_bayesian_classifier():
    model = ComplementNB()
    return model

Combine the autoencoders with the classifier: 

In [39]:
# set the variables in the dictionary
accuracies['naive_bayesian'] = {}
accs = accuracies['naive_bayesian']
accs['undercomplete'] = []
accs['sparse'] = []
accs['deep'] = []
accs['contractive'] = []

# leave out person out validation
for ident in range(n):
    
    print("iteration:", ident+1, "of", n)
    
    train_idents = [x for i, x in enumerate(idents) if (i != ident and i != (n-1+ident)%n)]
    validation_idents = [idents[ident]]
    test_idents = [idents[ident-1]]

    # Load data
    xt_train, y_train = get_data_from_idents(path, train_idents, seconds)
    xt_valid, y_valid = get_data_from_idents(path, validation_idents, seconds)
    xt_test, y_test = get_data_from_idents(path, test_idents, seconds)

    # Scale with standard scaler
    sscaler = StandardScaler()
    sscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
    xt_train = sscaler.transform(xt_train)
    xt_valid = sscaler.transform(xt_valid)
    xt_test = sscaler.transform(xt_test)

    # Scale with MinMax to range [0,1]
    mmscaler = MinMaxScaler()
    mmscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
    xt_train = mmscaler.transform(xt_train)
    xt_valid = mmscaler.transform(xt_valid)
    xt_test = mmscaler.transform(xt_test)

    # AE Training params
    batch_size = 256
    epochs = 100

    # undercomplete AE
    autoencoder, encoded = undercomplete_ae(xt_train, 60, encoded_as_model=True)
#     print("undercomplete AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = naive_bayesian_classifier()
    xtt_train = encoded.predict(xt_train)
    xtt_test = encoded.predict(xt_test)
#     print("undercomplete CLASSIFICATION TRAINING: ", ident)
    model.fit(xtt_train, y_train)
    curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
    accs['undercomplete'].append(curr_acc)
    
    # sparse AE
    autoencoder, encoded = sparse_ae(xt_train, 60, encoded_as_model=True)
#     print("sparse AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = naive_bayesian_classifier()
    xtt_train = encoded.predict(xt_train)
    xtt_test = encoded.predict(xt_test)
#     print("sparse CLASSIFICATION TRAINING: ", ident)
    model.fit(xtt_train, y_train)
    curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
    accs['sparse'].append(curr_acc)

    # deep AE
    autoencoder, encoded = deep_ae(xt_train, enc_layers=[512,256], encoding_dim=60, dec_layers=[256,512], encoded_as_model=True)
#     print("deep AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = naive_bayesian_classifier()
    xtt_train = encoded.predict(xt_train)
    xtt_test = encoded.predict(xt_test)
#     print("deep CLASSIFICATION TRAINING: ", ident)
    model.fit(xtt_train, y_train)
    curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
    accs['deep'].append(curr_acc)
    
    # contractive AE
    autoencoder, encoded = contractive_ae(xt_train, 60, encoded_as_model=True)
#     print("contractive AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = naive_bayesian_classifier()
    xtt_train = encoded.predict(xt_train)
    xtt_test = encoded.predict(xt_test)
#     print("contractive CLASSIFICATION TRAINING: ", ident)
    model.fit(xtt_train, y_train)
    curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
    accs['contractive'].append(curr_acc)

iteration: 1 of 5
iteration: 2 of 5
iteration: 3 of 5
iteration: 4 of 5
iteration: 5 of 5


In [40]:
# print some statistics
pandas.DataFrame.from_dict(accs)

Unnamed: 0,undercomplete,sparse,deep,contractive
0,0.571429,0.464286,0.5,0.535714
1,0.529412,0.470588,0.5,0.5
2,0.472222,0.527778,0.472222,0.472222
3,0.5,0.5,0.5,0.5
4,0.5,0.5,0.533333,0.5


In [41]:
for key in accs:
    print(key, "accuracies:")
    print("- min:", np.min(accs[key]))
    print("- max:", np.max(accs[key]))
    print("- mean:", np.mean(accs[key]))
    print("- median:", np.median(accs[key]))
    print("")

undercomplete accuracies:
- min: 0.4722222222222222
- max: 0.5714285714285714
- mean: 0.5146125116713353
- median: 0.5

sparse accuracies:
- min: 0.4642857142857143
- max: 0.5277777777777778
- mean: 0.49253034547152197
- median: 0.5

deep accuracies:
- min: 0.4722222222222222
- max: 0.5333333333333333
- mean: 0.5011111111111111
- median: 0.5

contractive accuracies:
- min: 0.4722222222222222
- max: 0.5357142857142857
- mean: 0.5015873015873016
- median: 0.5



#### XGBoost

In [42]:
from xgboost import XGBClassifier

def XGBoost_classifier():
    model = XGBClassifier(n_estimators = 83)
    return model

Combine the autoencoders with the classifier: 

In [43]:
# set the variables in the dictionary
accuracies['XGBoost'] = {}
accs = accuracies['XGBoost']
accs['undercomplete'] = []
accs['sparse'] = []
accs['deep'] = []
accs['contractive'] = []

# leave out person out validation
for ident in range(n):
    
    print("iteration:", ident+1, "of", n)
    
    train_idents = [x for i, x in enumerate(idents) if (i != ident and i != (n-1+ident)%n)]
    validation_idents = [idents[ident]]
    test_idents = [idents[ident-1]]

    # Load data
    xt_train, y_train = get_data_from_idents(path, train_idents, seconds)
    xt_valid, y_valid = get_data_from_idents(path, validation_idents, seconds)
    xt_test, y_test = get_data_from_idents(path, test_idents, seconds)

    # Scale with standard scaler
    sscaler = StandardScaler()
    sscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
    xt_train = sscaler.transform(xt_train)
    xt_valid = sscaler.transform(xt_valid)
    xt_test = sscaler.transform(xt_test)

    # Scale with MinMax to range [0,1]
    mmscaler = MinMaxScaler()
    mmscaler.fit(np.vstack((xt_train, xt_test, xt_valid)))
    xt_train = mmscaler.transform(xt_train)
    xt_valid = mmscaler.transform(xt_valid)
    xt_test = mmscaler.transform(xt_test)

    # AE Training params
    batch_size = 256
    epochs = 100

    # undercomplete AE
    autoencoder, encoded = undercomplete_ae(xt_train, 60, encoded_as_model=True)
#     print("undercomplete AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = XGBoost_classifier()
    xtt_train = encoded.predict(xt_train)
    xtt_test = encoded.predict(xt_test)
#     print("undercomplete CLASSIFICATION TRAINING: ", ident)
    model.fit(xtt_train, y_train)
    curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
    accs['undercomplete'].append(curr_acc)
    
    # sparse AE
    autoencoder, encoded = sparse_ae(xt_train, 60, encoded_as_model=True)
#     print("sparse AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = XGBoost_classifier()
    xtt_train = encoded.predict(xt_train)
    xtt_test = encoded.predict(xt_test)
#     print("sparse CLASSIFICATION TRAINING: ", ident)
    model.fit(xtt_train, y_train)
    curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
    accs['sparse'].append(curr_acc)

    # deep AE
    autoencoder, encoded = deep_ae(xt_train, enc_layers=[512,256], encoding_dim=60, dec_layers=[256,512], encoded_as_model=True)
#     print("deep AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = XGBoost_classifier()
    xtt_train = encoded.predict(xt_train)
    xtt_test = encoded.predict(xt_test)
#     print("deep CLASSIFICATION TRAINING: ", ident)
    model.fit(xtt_train, y_train)
    curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
    accs['deep'].append(curr_acc)
    
    # contractive AE
    autoencoder, encoded = contractive_ae(xt_train, 60, encoded_as_model=True)
#     print("contractive AUTOENCODER TRAINING: ", ident)
    sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                           xt_valid, xt_valid, xt_test, xt_test)
    model = XGBoost_classifier()
    xtt_train = encoded.predict(xt_train)
    xtt_test = encoded.predict(xt_test)
#     print("contractive CLASSIFICATION TRAINING: ", ident)
    model.fit(xtt_train, y_train)
    curr_acc = np.sum(model.predict(xtt_test) == y_test.T) / y_test.shape[0]
    accs['contractive'].append(curr_acc)

iteration: 1 of 5
iteration: 2 of 5
iteration: 3 of 5
iteration: 4 of 5
iteration: 5 of 5


In [44]:
# print some statistics
pandas.DataFrame.from_dict(accs)

Unnamed: 0,undercomplete,sparse,deep,contractive
0,1.0,1.0,1.0,1.0
1,0.676471,0.676471,0.588235,0.676471
2,0.888889,0.722222,0.722222,0.694444
3,0.647059,0.735294,0.558824,0.647059
4,0.766667,0.566667,0.666667,0.533333


In [45]:
for key in accs:
    print(key, "accuracies:")
    print("- min:", np.min(accs[key]))
    print("- max:", np.max(accs[key]))
    print("- mean:", np.mean(accs[key]))
    print("- median:", np.median(accs[key]))
    print("")

undercomplete accuracies:
- min: 0.6470588235294118
- max: 1.0
- mean: 0.7958169934640522
- median: 0.7666666666666667

sparse accuracies:
- min: 0.5666666666666667
- max: 1.0
- mean: 0.7401307189542484
- median: 0.7222222222222222

deep accuracies:
- min: 0.5588235294117647
- max: 1.0
- mean: 0.7071895424836602
- median: 0.6666666666666666

contractive accuracies:
- min: 0.5333333333333333
- max: 1.0
- mean: 0.7102614379084967
- median: 0.6764705882352942



###  Compare Accuracies

Print min, max, mean, median for each clasifier/autoencoder combination:

In [46]:
for classifier in accuracies:
    print(classifier + ":")
    for key in accuracies[classifier]:
        print("  " + key, "accuracies:")
        print("   - min:", np.min(accs[key]))
        print("   - max:", np.max(accs[key]))
        print("   - mean:", np.mean(accs[key]))
        print("   - median:", np.median(accs[key]))
        print("")
    print("\n")

simple_dense:
  undercomplete accuracies:
   - min: 0.6470588235294118
   - max: 1.0
   - mean: 0.7958169934640522
   - median: 0.7666666666666667

  sparse accuracies:
   - min: 0.5666666666666667
   - max: 1.0
   - mean: 0.7401307189542484
   - median: 0.7222222222222222

  deep accuracies:
   - min: 0.5588235294117647
   - max: 1.0
   - mean: 0.7071895424836602
   - median: 0.6666666666666666

  contractive accuracies:
   - min: 0.5333333333333333
   - max: 1.0
   - mean: 0.7102614379084967
   - median: 0.6764705882352942



LSTM:
  undercomplete accuracies:
   - min: 0.6470588235294118
   - max: 1.0
   - mean: 0.7958169934640522
   - median: 0.7666666666666667

  sparse accuracies:
   - min: 0.5666666666666667
   - max: 1.0
   - mean: 0.7401307189542484
   - median: 0.7222222222222222

  deep accuracies:
   - min: 0.5588235294117647
   - max: 1.0
   - mean: 0.7071895424836602
   - median: 0.6666666666666666

  contractive accuracies:
   - min: 0.5333333333333333
   - max: 1.0
   - 

Print all accuracies in table form:

In [47]:
for classifier in accuracies:
    print(classifier + ":")
    print(pandas.DataFrame.from_dict(accuracies[classifier]))
    print("\n")

simple_dense:
   undercomplete    sparse      deep  contractive
0       0.821429  0.821429  0.785714     0.785714
1       0.735294  0.735294  0.676471     0.588235
2       0.638889  0.638889  0.638889     0.472222
3       0.558824  0.588235  0.588235     0.647059
4       0.566667  0.600000  0.500000     0.566667


LSTM:
   undercomplete    sparse      deep  contractive
0       0.785714  0.892857  0.857143     0.857143
1       0.558824  0.735294  0.529412     0.764706
2       0.694444  0.694444  0.694444     0.750000
3       0.588235  0.529412  0.500000     0.588235
4       0.666667  0.633333  0.500000     0.600000


kNN:
   undercomplete    sparse      deep  contractive
0       0.714286  0.821429  0.714286     0.607143
1       0.676471  0.676471  0.676471     0.764706
2       0.694444  0.638889  0.611111     0.666667
3       0.588235  0.588235  0.382353     0.617647
4       0.733333  0.700000  0.566667     0.633333


SVC:
   undercomplete    sparse      deep  contractive
0       0.6428