# Classifiers - busy vs relaxed
Exploring different classifiers with different autoencoders.

#### Table of contents:  

autoencoders:  
[Undercomplete Autoencoder](#Undercomplete-Autoencoder)  
[Sparse Autoencoder](#Sparse-Autoencoder)  
[Deep Autoencoder](#Deep-Autoencoder)  
[Contractive Autoencoder](#Contractive-Autoencoder)  

classifiers:  
[Simple dense layer](#Simple-dense-layer)  
[LSTM-based classifier](#LSTM-based-classifier)  
[kNN](#kNN)  
[SVC](#SVC)  
[Random Forest](#Random-Forest)  
[XGBoost](#XGBoost)  

In [1]:
import datareader # made by the previous author for reading the collected data
import dataextractor # same as above
import pandas
import numpy as np
import tensorflow as tf
# need to disable eager execution for .get_weights() in contractive autoencoder loss to work
tf.compat.v1.disable_eager_execution()
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Activation, Input
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Conv1D, MaxPooling1D
# required for the contractive autoencoder
import tensorflow.keras.backend as K
import json
from datetime import datetime

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import RandomizedSearchCV

import warnings

import talos
from talos.utils import lr_normalizer

from tensorflow import keras
from tensorflow.keras import layers, regularizers
import matplotlib.pyplot as plt

tf.keras.backend.set_floatx('float32') # call this, to set keras to use float32 to avoid a warning message
metrics = ['accuracy']#,
#            keras.metrics.TruePositives(),
#            keras.metrics.FalsePositives(),
#            keras.metrics.TrueNegatives(),
#            keras.metrics.FalseNegatives()]

In [2]:
# from https://github.com/ageron/handson-ml/blob/master/extra_tensorflow_reproducibility.ipynb
config = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1,
                        inter_op_parallelism_threads=1)

with tf.compat.v1.Session(config=config) as sess:
    #... this will run single threaded
    pass

In [3]:
import random

random.seed(1)
np.random.seed(4)
tf.random.set_seed(2)

In [4]:
# Start the notebook in the terminal with "PYTHONHASHSEED=0 jupyter notebook" 
# or in anaconda "set PYTHONHASHSEED=0" then start jupyter notebook
import os
if os.environ.get("PYTHONHASHSEED") != "0":
    raise Exception("You must set PYTHONHASHSEED=0 when starting the Jupyter server to get reproducible results.")

This is modfied original author's code for reading data:

In [5]:
def model_train(model, x_train, y_train, batch_size, epochs, x_valid, y_valid, x_test, y_test):
    """Train model with the given training, validation, and test set, with appropriate batch size and # epochs."""
    epoch_data = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_valid, y_valid), verbose=0)
    score = model.evaluate(x_test, y_test, batch_size=batch_size)
    acc = score[1]
    score = score[0]
    return score, acc, epoch_data


In [6]:
def get_busy_vs_relax_timeframes_br_hb(path, ident, seconds):
    """Returns raw data from either 'on task' or 'relax' time frames and their class (0 or 1)."""
    
    dataread = datareader.DataReader(path, ident)  # initialize path to data
    data = dataread.read_grc_data()  # read from files
    samp_rate = int(round(len(data[1]) / max(data[0])))
    cog_res = dataread.read_cognitive_load_study(str(ident) + '-primary-extract.txt')

    tasks_data = np.empty((0, seconds*samp_rate))
    tasks_y = np.empty((0, 1))
    breathing = np.empty((0,12))
    heartbeat = np.empty((0,10))

    busy_n = dataread.get_data_task_timestamps(return_indexes=True)
    relax_n = dataread.get_relax_timestamps(return_indexes=True)

    for i in cog_res['task_number']:
        task_num_table = i - 225  # 0 - 17
        tmp_tasks_data = np.empty((0, seconds*samp_rate))
        tmp_tasks_y = np.empty((0, 1))
        tmp_breathing = np.empty((0,12))
        tmp_heartbeat = np.empty((0,10))

        ### task versus relax (1 sample each)
        dataextract = dataextractor.DataExtractor(data[0][busy_n[task_num_table][0]:busy_n[task_num_table][1]],
                                                  data[1][busy_n[task_num_table][0]:busy_n[task_num_table][1]],
                                                  samp_rate)

        dataextract_relax = dataextractor.DataExtractor(data[0][relax_n[task_num_table][0]:relax_n[task_num_table][1]],
                                                        data[1][relax_n[task_num_table][0]:relax_n[task_num_table][1]],
                                                        samp_rate)

        try:

            # get extracted features for breathing
            tmpBR_busy = dataextract.extract_from_breathing_time(dataextract.t[-samp_rate*seconds:],
                                                                 dataextract.y[-samp_rate*seconds:])
            tmpBR_relax = dataextract_relax.extract_from_breathing_time(dataextract_relax.t[-samp_rate*seconds:],
                                                                 dataextract_relax.y[-samp_rate*seconds:])
            #get extracted features for heartbeat
            tmpHB_busy = dataextract.extract_from_heartbeat_time(dataextract.t[-samp_rate*seconds:],
                                                                 dataextract.y[-samp_rate*seconds:])
            tmpHB_relax = dataextract.extract_from_heartbeat_time(dataextract_relax.t[-samp_rate*seconds:],
                                                                 dataextract_relax.y[-samp_rate*seconds:])

            tmp_tasks_data = np.vstack((tmp_tasks_data, dataextract.y[-samp_rate * seconds:]))
            tmp_tasks_y = np.vstack((tasks_y, 1))
            tmp_tasks_data = np.vstack((tmp_tasks_data, dataextract_relax.y[-samp_rate * seconds:]))
            tmp_tasks_y = np.vstack((tmp_tasks_y, 0))

            # put busy frames then relaxed frames under the previous frames
            tmp_breathing = np.vstack((tmp_breathing, tmpBR_busy.to_numpy(dtype='float64', na_value=0)[0][:-1]))
            tmp_breathing = np.vstack((tmp_breathing, tmpBR_relax.to_numpy(dtype='float64', na_value=0)[0][:-1]))

            tmp_heartbeat = np.vstack((tmp_heartbeat, tmpHB_busy.to_numpy(dtype='float64', na_value=0)[0][:-1]))
            tmp_heartbeat = np.vstack((tmp_heartbeat, tmpHB_relax.to_numpy(dtype='float64', na_value=0)[0][:-1]))

        except ValueError:
#             print(ident)  # ignore short windows
            continue

        # put busy frames then relaxed frames under the previous frames
        tasks_data = np.vstack((tasks_data, dataextract.y[-samp_rate * seconds:]))
        tasks_y = np.vstack((tasks_y, 1))
        tasks_data = np.vstack((tasks_data, dataextract_relax.y[-samp_rate * seconds:]))
        tasks_y = np.vstack((tasks_y, 0))

        breathing = np.vstack((breathing, tmpBR_busy.to_numpy(dtype='float64', na_value=0)[0][:-1]))
        breathing = np.vstack((breathing, tmpBR_relax.to_numpy(dtype='float64', na_value=0)[0][:-1]))

        heartbeat = np.vstack((heartbeat, tmpHB_busy.to_numpy(dtype='float64', na_value=0)[0][:-1]))
        heartbeat = np.vstack((heartbeat, tmpHB_relax.to_numpy(dtype='float64', na_value=0)[0][:-1]))

    return tasks_data, tasks_y, breathing, heartbeat

In [7]:
def get_data_from_idents_br_hb(path, idents, seconds):
    """Go through all user data and take out windows of only <seconds> long time frames,
    along with the given class (from 'divide_each_task' function).
    """
    samp_rate = 43  # hard-coded sample rate
    data, ys = np.empty((0, samp_rate*seconds)), np.empty((0, 1))
    brs = np.empty((0,12))
    hbs = np.empty((0,10))
    combined = np.empty((0,22))
    
    # was gettign some weird warnings; stack overflow said to ignore them
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=RuntimeWarning)
        for i in idents:
            x, y, br, hb = get_busy_vs_relax_timeframes_br_hb(path, i, seconds) # either 'get_busy_vs_relax_timeframes',
            # get_engagement_increase_vs_decrease_timeframes, get_task_complexities_timeframes or get_TLX_timeframes

            data = np.vstack((data, x))
            ys = np.vstack((ys, y))
            brs = np.vstack((brs, br))
            hbs = np.vstack((hbs, hb))
        combined = np.hstack((brs,hbs))
    
    return data, ys, brs, hbs, combined

In [8]:
def scale_data(x, standardScaler=True, minMaxScaler=True):
    
    if standardScaler:
        # Scale with standard scaler
        sscaler = StandardScaler()
        sscaler.fit(x)
        x = sscaler.transform(x)

    if minMaxScaler:
        # Scale with MinMax to range [0,1]
        mmscaler = MinMaxScaler((0,1))
        mmscaler.fit(x)
        x = mmscaler.transform(x)
    
    return x

In [9]:
# Accs is a dictionary which holds 1d arrays of accuracies in each key
# except the key 'test id' which holds strings of the id which yielded the coresponding accuracies
def print_accs_stats(accs):
    
    printDict = {}
    # loop over each key
    for key in accs:
    
        if (key == 'test id'):
            # skip calculating ids
            continue
        printDict[key] = {}
        tmpDict = printDict[key]
        # calculate and print some statistics
        tmpDict['min'] = np.min(accs[key])
        tmpDict['max'] = np.max(accs[key])
        tmpDict['mean'] = np.mean(accs[key])
        tmpDict['median'] = np.median(accs[key])
    
    print(pandas.DataFrame.from_dict(printDict).to_string())

In [10]:
def set_random_seeds():
    # clear session and set seeds again
    # cannot clear session due to tf.compat.v1 graphs, but add tf.compat.v1.set_random_seed
#     K.clear_session()
    tf.compat.v1.set_random_seed(2)
    random.seed(1)
    np.random.seed(4)
    tf.random.set_seed(2)

## Autoencoders

#### Undercomplete Autoencoder  
from https://blog.keras.io/building-autoencoders-in-keras.html

In [11]:
def undercomplete_ae(x, encoding_dim=64, encoded_as_model=False):
    # Simplest possible autoencoder from https://blog.keras.io/building-autoencoders-in-keras.html

    # this is our input placeholder
    input_data = Input(shape=x[0].shape, name="input")
    dropout = Dropout(0.25, name="dropout")(input_data)
    # "encoded" is the encoded representation of the input
    encoded = Dense(encoding_dim, activation='relu', name="encoded")(dropout)
    
    # "decoded" is the lossy reconstruction of the input
    decoded = Dense(x[0].shape[0], activation='sigmoid', name="decoded")(encoded)

    autoencoder = Model(input_data, decoded)
    
    # compile the model
    autoencoder.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics)
    
    # if return encoder in the encoded variable
    if encoded_as_model:
        encoded = Model(input_data, encoded)
    
    return autoencoder, encoded

#### Sparse Autoencoder  
from https://blog.keras.io/building-autoencoders-in-keras.html

In [12]:
def sparse_ae(x, encoding_dim=64, encoded_as_model=False):
    # Simplest possible autoencoder from https://blog.keras.io/building-autoencoders-in-keras.html

    # this is our input placeholder
    input_data = Input(shape=x[0].shape, name="input")
    dropout = Dropout(0.25, name="dropout") (input_data)
    # "encoded" is the encoded representation of the input
    # add a sparsity constraint
    encoded = Dense(encoding_dim, activation='relu', name="encoded",
                    activity_regularizer=regularizers.l1(10e-5))(dropout)
    
    # "decoded" is the lossy reconstruction of the input
    decoded = Dense(x[0].shape[0], activation='sigmoid', name="decoded")(encoded)

    # this model maps an input to its reconstruction
    autoencoder = Model(input_data, decoded, name="sparse_ae")
    
    # compile the model
    autoencoder.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics)
    
    # if return encoder in the encoded variable
    if encoded_as_model:
        encoded = Model(input_data, encoded)
    
    return autoencoder, encoded

#### Deep Autoencoder  
from https://blog.keras.io/building-autoencoders-in-keras.html

In [13]:
def deep_ae(x, enc_layers=[512,256], encoding_dim=64, dec_layers=[256,512], encoded_as_model=False):
    # From https://www.tensorflow.org/guide/keras/functional#use_the_same_graph_of_layers_to_define_multiple_models
    input_data = keras.Input(shape=x[0].shape, name="normalized_signal")
    model = Dropout(0.25, name="dropout", autocast=False)(input_data)
    for i in enumerate(enc_layers):
        model = Dense(i[1], activation="relu", name="dense_enc_" + str(i[0]+1))(model)
    encoded_output = Dense(encoding_dim, activation="relu", name="encoded_signal")(model)

    encoded = encoded_output

    model = layers.Dense(dec_layers[0], activation="sigmoid", name="dense_dec_1")(encoded_output)
    for i in enumerate(dec_layers[1:]):
        model = Dense(i[1], activation="sigmoid", name="dense_dec_" + str(i[0]+2))(model)
    decoded_output = Dense(x[0].shape[0], activation="sigmoid", name="reconstructed_signal")(model)
    
    autoencoder = Model(input_data, decoded_output, name="autoencoder")
    
    # compile the model
    autoencoder.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics)
    
    # if return encoder in the encoded variable
    if encoded_as_model:
        encoded = Model(input_data, encoded)

    return autoencoder, encoded

#### Contractive Autoencoder
From: https://wiseodd.github.io/techblog/2016/12/05/contractive-autoencoder/

In [14]:
# define a function to be able to access the autoencoder in the loss funciton
def loss_with_params(autoencoder):
    # loss function from https://wiseodd.github.io/techblog/2016/12/05/contractive-autoencoder/
    def contractive_loss(y_pred, y_true):

        lam = 1e-4
        mse = K.mean(K.square(y_true - y_pred), axis=1)

        W = K.variable(value=autoencoder.get_layer('encoded').get_weights()[0])  # N x N_hidden
        W = K.transpose(W)  # N_hidden x N
        h = autoencoder.get_layer('encoded').output
        dh = h * (1 - h)  # N_batch x N_hidden

        # N_batch x N_hidden * N_hidden x 1 = N_batch x 1
        contractive = lam * K.sum(dh**2 * K.sum(W**2, axis=1), axis=1)

        return mse + contractive
    return contractive_loss

In [15]:
def contractive_ae(x, encoding_dim=64, encoded_as_model=False):
    # From https://wiseodd.github.io/techblog/2016/12/05/contractive-autoencoder/

    input_data = Input(shape=x[0].shape, name="input")
    encoded = Dense(encoding_dim, activation='sigmoid', name='encoded')(input_data)
    outputs = Dense(x[0].shape[0], activation='linear', name="output")(encoded)

    autoencoder = Model(input_data, outputs, name="autoencoder")
    
    # compile the model
    autoencoder.compile(optimizer='adam', loss=loss_with_params(autoencoder), metrics=metrics)
    
    # if return encoder in the encoded variable
    if encoded_as_model:
        encoded = Model(input_data, encoded)
    
    return autoencoder, encoded

## Classifiers

Initialize variables:

In [16]:
# initialize a dictionary to store accuracies for comparison
accuracies = {}

# used for reading the data into an array
seconds = 30  # time window length
idents = ['2gu87', 'iz2ps', '1mpau', '7dwjy', '7swyk', '94mnx', 'bd47a', 'c24ur', 'ctsax', 'dkhty', 'e4gay',
              'ef5rq', 'f1gjp', 'hpbxa', 'pmyfl', 'r89k1', 'tn4vl', 'td5pr', 'gyqu9', 'fzchw', 'l53hg', '3n2f9',
              '62i9y']
path = '../../../StudyData/'

# change to len(idents) at the end to use all the data
n = 10 #len(idents)

#### Simple dense layer

Define the classifier:

In [17]:
def dense_classifier(model, params):
    
    model = Dropout(params['dropout'], name='dropout_cl')(model)
    model = Dense(params['hidden_size'], activation=params['activation'], name='dense_cl1')(model)
    model = Dense(1, activation=params['last_activation'], name='dense_cl2')(model)

    return model

In [18]:
def dense_classifier_base():
    model = Sequential()
    model.add(Dropout(0))
    model.add(Dense(16))
    model.add(Activation('sigmoid'))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=metrics)
    return model

In [19]:
params = {
    'dropout': 0.24,
    'optimizer': 'Adam',
    'hidden_size': 32,
    'loss': 'binary_crossentropy',
    'last_activation': 'sigmoid',
    'activation': 'softmax',
    'batch_size': 256,
    'epochs': 100
}

Combine the autoencoders with the classifier: 

In [20]:
# set the variables in the dictionary
accuracies['simple_dense'] = {}
accs = accuracies['simple_dense']
accs['phase'] = []
accs['breathing'] = []
accs['heartbeat'] = []
accs['combined br hb'] = []
accs['undercomplete'] = []
accs['sparse'] = []
accs['deep'] = []
accs['contractive'] = []
accs['test id'] = []
start_time = datetime.now()

with tf.compat.v1.Session(config=config) as sess:
    # leave out person out validation
    for i in range(n):

        # print current iteration and time elapsed from start
        print("iteration:", i+1, "of", n, "; time elapsed:", datetime.now()-start_time)

        ## ----- Data preparation:
        validation_idents = [idents[i]]
        test_idents = [idents[i-1]]

        train_idents = []
        for ident in idents:
            if (ident not in test_idents) and (ident not in validation_idents):
                train_idents.append(ident)
        
        # save test id to see which id yielded which accuracies
        accs['test id'].append(test_idents[0])

        # Load data (xt-raw phase data, y-class, br-breathing data, hb-heartbeat data, cmb-combined [br,hb])
        xt_train, y_train, br_train, hb_train, cmb_train = get_data_from_idents_br_hb(path, train_idents, seconds)
        xt_valid, y_valid, br_valid, hb_valid, cmb_valid = get_data_from_idents_br_hb(path, validation_idents, seconds)
        xt_test, y_test, br_test, hb_test, cmb_test = get_data_from_idents_br_hb(path, test_idents, seconds)

        # Scale data with standard scaler then MinMax scaler
        # Raw Phase data:
        xt_train = scale_data(xt_train, standardScaler=True, minMaxScaler=True)
        xt_valid = scale_data(xt_valid, standardScaler=True, minMaxScaler=True)
        xt_test = scale_data(xt_test, standardScaler=True, minMaxScaler=True)
        # Hand extracted breathing data:
        br_train = scale_data(br_train, standardScaler=True, minMaxScaler=True)
        br_valid = scale_data(br_valid, standardScaler=True, minMaxScaler=True)
        br_test = scale_data(br_test, standardScaler=True, minMaxScaler=True)
        # Hand extracted Heartbeat data:
        hb_train = scale_data(hb_train, standardScaler=True, minMaxScaler=True)
        hb_valid = scale_data(hb_valid, standardScaler=True, minMaxScaler=True)
        hb_test = scale_data(hb_test, standardScaler=True, minMaxScaler=True)
        # Combined breathing and heartbeat data (joined together into one matrix)
        cmb_train = scale_data(cmb_train, standardScaler=True, minMaxScaler=True)
        cmb_valid = scale_data(cmb_valid, standardScaler=True, minMaxScaler=True)
        cmb_test = scale_data(cmb_test, standardScaler=True, minMaxScaler=True)
        
        
        
        ## ----- Classify without autoencoders:
        # Phase classifier:
        model = dense_classifier_base()
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['phase'].append(curr_acc)

        # Breathing classifier:
        model = dense_classifier_base()
        sc, curr_acc, epoch_data = model_train(model, br_train, y_train, params['batch_size'], params['epochs'],
                                               br_valid, y_valid, br_test, y_test)
        accs['breathing'].append(curr_acc)

        # Heartbeat classifier:
        model = dense_classifier_base()
        sc, curr_acc, epoch_data = model_train(model, hb_train, y_train, params['batch_size'], params['epochs'],
                                               hb_valid, y_valid, hb_test, y_test)
        accs['heartbeat'].append(curr_acc)

        # Combined classifier:
        model = dense_classifier_base()
        sc, curr_acc, epoch_data = model_train(model, cmb_train, y_train, params['batch_size'], params['epochs'],
                                               cmb_valid, y_valid, cmb_test, y_test)
        accs['combined br hb'].append(curr_acc)
        
        
        
        ## ----- Classify with autoencoders:
        # AE Training params
        batch_size = 256
        epochs = 100
        encoding_dim = 64

        # Undercomplete AE:
        autoencoder, encoded = undercomplete_ae(xt_train, encoding_dim)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = dense_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['undercomplete'].append(curr_acc)

        # Sparse AE:
        autoencoder, encoded = sparse_ae(xt_train, encoding_dim)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = dense_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['sparse'].append(curr_acc)

        # Deep AE:
        autoencoder, encoded = deep_ae(xt_train, enc_layers=[512,256], encoding_dim=encoding_dim, dec_layers=[256,512])
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = dense_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['deep'].append(curr_acc)

        # Contractive AE:
        autoencoder, encoded = contractive_ae(xt_train, encoding_dim)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = dense_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['contractive'].append(curr_acc)

# Print total time required to run this
end_time = datetime.now()
elapsed_time = end_time - start_time
print("Completed!", "Time elapsed:", elapsed_time)

iteration: 1 of 10 ; time elapsed: 0:00:00.007983
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
iteration: 2 of 10 ; time elapsed: 0:01:02.815492
iteration: 3 of 10 ; time elapsed: 0:02:13.373106
iteration: 4 of 10 ; time elapsed: 0:03:31.528102
iteration: 5 of 10 ; time elapsed: 0:04:58.975050
iteration: 6 of 10 ; time elapsed: 0:06:34.209059
iteration: 7 of 10 ; time elapsed: 0:08:18.948272
iteration: 8 of 10 ; time elapsed: 0:10:14.032609
iteration: 9 of 10 ; time elapsed: 0:12:20.634588
iteration: 10 of 10 ; time elapsed: 0:14:37.126364
Completed! Time elapsed: 0:17:11.856668


In [21]:
# print accuracies of each method and corresponding id which yielded that accuracy (same row)
pandas.DataFrame.from_dict(accs)

Unnamed: 0,phase,breathing,heartbeat,combined br hb,undercomplete,sparse,deep,contractive,test id
0,0.416667,0.5,0.583333,0.375,0.458333,0.458333,0.416667,0.375,62i9y
1,0.794118,0.735294,0.382353,0.588235,0.764706,0.735294,0.735294,0.588235,2gu87
2,0.611111,0.722222,0.416667,0.805556,0.611111,0.694444,0.611111,0.555556,iz2ps
3,0.558824,0.735294,0.529412,0.764706,0.558824,0.676471,0.529412,0.588235,1mpau
4,0.5,0.566667,0.566667,0.466667,0.6,0.6,0.466667,0.7,7dwjy
5,0.59375,0.90625,0.5625,0.875,0.6875,0.8125,0.5,0.6875,7swyk
6,0.59375,0.6875,0.625,0.65625,0.5,0.59375,0.59375,0.5,94mnx
7,0.533333,0.7,0.6,0.666667,0.666667,0.633333,0.633333,0.633333,bd47a
8,0.533333,0.8,0.7,0.766667,0.666667,0.566667,0.6,0.466667,c24ur
9,0.6,0.833333,0.6,0.833333,0.6,0.666667,0.633333,0.6,ctsax


In [22]:
# print some statistics for each method
print_accs_stats(accs)

           phase  breathing  heartbeat  combined br hb  undercomplete    sparse      deep  contractive
min     0.416667   0.500000   0.382353        0.375000       0.458333  0.458333  0.416667     0.375000
max     0.794118   0.906250   0.700000        0.875000       0.764706  0.812500  0.735294     0.700000
mean    0.573489   0.718656   0.556593        0.679808       0.611381  0.643746  0.571957     0.569453
median  0.576287   0.728758   0.575000        0.715686       0.605556  0.650000  0.596875     0.588235


#### LSTM-based classifier  
based on the original author's code

Optimize hyperparameters with talos:

In [23]:
def LSTM_classifier(model, params):

    model = layers.Reshape((-1, 1), input_shape=(model.shape), name='reshape_cl') (model)

    model = layers.Dropout(params['dropout'], name='dropout_cl1') (model)
    
    model = Conv1D(params['filters'],
                     params['kernel_size'],
                     padding='valid',
                     activation=params['activation'],
                     strides=params['strides'],
                     name='conv1d_cl1') (model)
    
    model = MaxPooling1D(pool_size=params['pool_size'], name='maxpool_cl1') (model)
    
    model = Conv1D(params['filters'],
                     params['kernel_size'],
                     padding='valid',
                     activation=params['activation'],
                     strides=params['strides'],
                     name='conv1d_cl2') (model)
    
    model = MaxPooling1D(pool_size=params['pool_size'], name='maxpool_cl2') (model)
    
    model = layers.Dropout(params['dropout'], name='dropout_cl2') (model)

    model = LSTM(params['lstm_output_size'], activation='sigmoid', name='lstm_cl') (model)

    model = Dense(1, activation=params['last_activation'], name='dense_cl') (model)
    return model

In [24]:
def LSTM_classifier_base(params):
    
    model = Sequential()
    model.add(Dropout(params['dropout']))
    model.add(Conv1D(params['filters'],
                     params['kernel_size'],
                     padding='valid',
                     activation=params['activation'],
                     strides=params['strides']))

    model.add(MaxPooling1D(pool_size=params['pool_size']))
    model.add(Conv1D(params['filters'],
                     params['kernel_size'],
                     padding='valid',
                     activation=params['activation'],
                     strides=params['strides']))
    model.add(MaxPooling1D(pool_size=params['pool_size']))

    model.add(Dropout(params['dropout']))
    model.add(LSTM(params['lstm_output_size']))
    model.add(Dense(1))
    model.add(Activation(params['last_activation']))

    model.compile(loss=params['loss'],
                  optimizer=params['optimizer'],
                  metrics=['acc'])
    
    return model

In [25]:
params_phase = {
    'kernel_size': 32,
    'strides': 4,
    'pool_size': 2,
    'filters': 8,
    'lstm_output_size': 236,
    'loss': 'binary_crossentropy',
    'dropout': 0.09,
    'activation': 'relu',
    'optimizer': 'Nadam',
    'last_activation': 'sigmoid'
}

In [26]:
params_br_hb = {
    'kernel_size': 2,
    'strides': 1,
    'pool_size': 1,
    'filters': 2,
    'lstm_output_size': 4,
    'loss': 'binary_crossentropy',
    'dropout': 0.09,
    'activation': 'relu',
    'optimizer': 'Nadam',
    'last_activation': 'sigmoid'
}

In [29]:
params = {
    'kernel_size': 4,
    'filters': 2,
    'strides': 2,
    'pool_size': 2,
    'dropout': 0.09,
    'optimizer': 'Nadam',
    'loss': 'binary_crossentropy',
    'activation': 'relu',
    'last_activation': 'sigmoid',
    'lstm_output_size': 32,
    'batch_size': 64,
    'epochs': 100
}

Combine the autoencoders with the classifier: 

In [30]:
# set the variables in the dictionary
accuracies['LSTM'] = {}
accs = accuracies['LSTM']
accs['phase'] = []
accs['breathing'] = []
accs['heartbeat'] = []
accs['combined br hb'] = []
accs['undercomplete'] = []
accs['sparse'] = []
accs['deep'] = []
accs['contractive'] = []
accs['test id'] = []
start_time = datetime.now()

with tf.compat.v1.Session(config=config) as sess:
    # leave out person out validation
    for i in range(5): ##### <--------------------------------- TODO: change to range(n)

        # print current iteration and time elapsed from start
        print("iteration:", i+1, "of", n, "; time elapsed:", datetime.now()-start_time)

        ## ----- Data preparation:
        validation_idents = [idents[i]]
        test_idents = [idents[i-1]]

        train_idents = []
        for ident in idents:
            if (ident not in test_idents) and (ident not in validation_idents):
                train_idents.append(ident)

        # save test id to see which id yielded which accuracies
        accs['test id'].append(test_idents[0])
        
        # Load data (xt-raw phase data, y-class, br-breathing data, hb-heartbeat data, cmb-combined [br,hb])
        xt_train, y_train, br_train, hb_train, cmb_train = get_data_from_idents_br_hb(path, train_idents, seconds)
        xt_valid, y_valid, br_valid, hb_valid, cmb_valid = get_data_from_idents_br_hb(path, validation_idents, seconds)
        xt_test, y_test, br_test, hb_test, cmb_test = get_data_from_idents_br_hb(path, test_idents, seconds)

        # Scale data with standard scaler then MinMax scaler
        # Raw Phase data:
        xt_train = scale_data(xt_train, standardScaler=True, minMaxScaler=True)
        xt_valid = scale_data(xt_valid, standardScaler=True, minMaxScaler=True)
        xt_test = scale_data(xt_test, standardScaler=True, minMaxScaler=True)
        # Hand extracted breathing data:
        br_train = scale_data(br_train, standardScaler=True, minMaxScaler=True)
        br_valid = scale_data(br_valid, standardScaler=True, minMaxScaler=True)
        br_test = scale_data(br_test, standardScaler=True, minMaxScaler=True)
        # Hand extracted Heartbeat data:
        hb_train = scale_data(hb_train, standardScaler=True, minMaxScaler=True)
        hb_valid = scale_data(hb_valid, standardScaler=True, minMaxScaler=True)
        hb_test = scale_data(hb_test, standardScaler=True, minMaxScaler=True)
        # Combined breathing and heartbeat data (joined together into one matrix)
        cmb_train = scale_data(cmb_train, standardScaler=True, minMaxScaler=True)
        cmb_valid = scale_data(cmb_valid, standardScaler=True, minMaxScaler=True)
        cmb_test = scale_data(cmb_test, standardScaler=True, minMaxScaler=True)
        
        
        
        ## ----- Classify without autoencoders:
        # Phase classifier:
        model = LSTM_classifier_base(params_phase)
        # reshape data for the classifier
        xtt_train = xt_train.reshape(-1, xt_train[0].shape[0], 1)
        xtt_valid = xt_valid.reshape(-1, xt_valid[0].shape[0], 1)
        xtt_test = xt_test.reshape(-1, xt_test[0].shape[0], 1)
        # train and evaluate
        sc, curr_acc, epoch_data = model_train(model, xtt_train, y_train, params['batch_size'], params['epochs'],
                                               xtt_valid, y_valid, xtt_test, y_test)
        accs['phase'].append(curr_acc)

        # Breathing classifier:
        model = LSTM_classifier_base(params_br_hb)
        # reshape data for the classifier
        brt_train = br_train.reshape(-1, br_train[0].shape[0], 1)
        brt_valid = br_valid.reshape(-1, br_valid[0].shape[0], 1)
        brt_test = br_test.reshape(-1, br_test[0].shape[0], 1)
        # train and evaluate
        sc, curr_acc, epoch_data = model_train(model, brt_train, y_train, params['batch_size'], params['epochs'],
                                               brt_valid, y_valid, brt_test, y_test)
        accs['breathing'].append(curr_acc)

        # Heartbeat classifier:
        model = LSTM_classifier_base(params_br_hb)
        # reshape data for the classifier
        hbt_train = hb_train.reshape(-1, hb_train[0].shape[0], 1)
        hbt_valid = hb_valid.reshape(-1, hb_valid[0].shape[0], 1)
        hbt_test = hb_test.reshape(-1, hb_test[0].shape[0], 1)
        # train and evaluate
        sc, curr_acc, epoch_data = model_train(model, hbt_train, y_train, params['batch_size'], params['epochs'],
                                               hbt_valid, y_valid, hbt_test, y_test)
        accs['heartbeat'].append(curr_acc)

        # Combined classifier:
        model = LSTM_classifier_base(params_br_hb)
        # reshape data for the classifier
        cmbt_train = cmb_train.reshape(-1, cmb_train[0].shape[0], 1)
        cmbt_valid = cmb_valid.reshape(-1, cmb_valid[0].shape[0], 1)
        cmbt_test = cmb_test.reshape(-1, cmb_test[0].shape[0], 1)
        # train and evaluate
        sc, curr_acc, epoch_data = model_train(model, cmbt_train, y_train, params['batch_size'], params['epochs'],
                                               cmbt_valid, y_valid, cmbt_test, y_test)
        accs['combined br hb'].append(curr_acc)

        
        
        ## ----- Classify with autoencoders:
        # AE Training params
        batch_size = 256
        epochs = 100
        encoding_dim = 64

        # undercomplete AE
        autoencoder, encoded = undercomplete_ae(xt_train, encoding_dim)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = LSTM_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['undercomplete'].append(curr_acc)

        # sparse AE
        autoencoder, encoded = sparse_ae(xt_train, encoding_dim)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = LSTM_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['sparse'].append(curr_acc)

        # deep AE
        autoencoder, encoded = deep_ae(xt_train, enc_layers=[512,256], encoding_dim=encoding_dim, dec_layers=[256,512])
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = LSTM_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['deep'].append(curr_acc)

        # contractive AE
        autoencoder, encoded = contractive_ae(xt_train, encoding_dim)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = LSTM_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['contractive'].append(curr_acc)

end_time = datetime.now()
elapsed_time = end_time - start_time
print("Completed!", "Time elapsed:", elapsed_time)

iteration: 1 of 10 ; time elapsed: 0:00:00.007980
iteration: 2 of 10 ; time elapsed: 0:07:32.942588
iteration: 3 of 10 ; time elapsed: 0:16:17.983277
iteration: 4 of 10 ; time elapsed: 0:26:27.489231
iteration: 5 of 10 ; time elapsed: 0:38:16.004422
Completed! Time elapsed: 0:52:39.377632


In [31]:
# print accuracies of each method and corresponding id which yielded that accuracy (same row)
pandas.DataFrame.from_dict(accs)

Unnamed: 0,phase,breathing,heartbeat,combined br hb,undercomplete,sparse,deep,contractive,test id
0,0.541667,0.541667,0.5,0.333333,0.416667,0.416667,0.666667,0.416667,62i9y
1,0.735294,0.294118,0.294118,0.470588,0.794118,0.735294,0.529412,0.852941,2gu87
2,0.833333,0.666667,0.333333,0.388889,0.638889,0.722222,0.75,0.583333,iz2ps
3,0.588235,0.705882,0.5,0.705882,0.558824,0.705882,0.529412,0.676471,1mpau
4,0.766667,0.6,0.333333,0.7,0.633333,0.5,0.6,0.566667,7dwjy


In [32]:
# print some statistics for each method
print_accs_stats(accs)

           phase  breathing  heartbeat  combined br hb  undercomplete    sparse      deep  contractive
min     0.541667   0.294118   0.294118        0.333333       0.416667  0.416667  0.529412     0.416667
max     0.833333   0.705882   0.500000        0.705882       0.794118  0.735294  0.750000     0.852941
mean    0.693039   0.561667   0.392157        0.519739       0.608366  0.616013  0.615098     0.619216
median  0.735294   0.600000   0.333333        0.470588       0.633333  0.705882  0.600000     0.583333


### Helper loop function definition

In [33]:
# a helper loop funciton for the sklearn and XGBoost classifiers
def helper_loop(classifier_function, idents, n=5):
    #returns a dictionary with accuracies

    # set the variables in the dictionary
    accs = {}
    accs['phase'] = []
    accs['breathing'] = []
    accs['heartbeat'] = []
    accs['combined br hb'] = []
    accs['undercomplete'] = []
    accs['sparse'] = []
    accs['deep'] = []
    accs['contractive'] = []
    accs['test id'] = []
    start_time = datetime.now()

    with tf.compat.v1.Session(config=config) as sess:
        # leave out person out validation
        for i in range(n):

            # print current iteration and time elapsed from start
            print("iteration:", i+1, "of", n, "; time elapsed:", datetime.now()-start_time)

            ## ----- Data preparation:
            validation_idents = [idents[i]]
            test_idents = [idents[i-1]]

            train_idents = []
            for ident in idents:
                if (ident not in test_idents) and (ident not in validation_idents):
                    train_idents.append(ident)

            # save test id to see which id yielded which accuracies
            accs['test id'].append(test_idents[0])

            # Load data (xt-raw phase data, y-class, br-breathing data, hb-heartbeat data, cmb-combined [br,hb])
            xt_train, y_train, br_train, hb_train, cmb_train = get_data_from_idents_br_hb(path, train_idents, seconds)
            xt_valid, y_valid, br_valid, hb_valid, cmb_valid = get_data_from_idents_br_hb(path, validation_idents, seconds)
            xt_test, y_test, br_test, hb_test, cmb_test = get_data_from_idents_br_hb(path, test_idents, seconds)

            # change the y arrays to flat 1d arrays
            y_train = y_train.ravel()
            y_valid = y_valid.ravel()
            y_test = y_test.ravel()
            
            # Scale data with standard scaler then MinMax scaler
            # Raw Phase data:
            xt_train = scale_data(xt_train, standardScaler=True, minMaxScaler=True)
            xt_valid = scale_data(xt_valid, standardScaler=True, minMaxScaler=True)
            xt_test = scale_data(xt_test, standardScaler=True, minMaxScaler=True)
            # Hand extracted breathing data:
            br_train = scale_data(br_train, standardScaler=True, minMaxScaler=True)
            br_valid = scale_data(br_valid, standardScaler=True, minMaxScaler=True)
            br_test = scale_data(br_test, standardScaler=True, minMaxScaler=True)
            # Hand extracted Heartbeat data:
            hb_train = scale_data(hb_train, standardScaler=True, minMaxScaler=True)
            hb_valid = scale_data(hb_valid, standardScaler=True, minMaxScaler=True)
            hb_test = scale_data(hb_test, standardScaler=True, minMaxScaler=True)
            # Combined breathing and heartbeat data (joined together into one matrix)
            cmb_train = scale_data(cmb_train, standardScaler=True, minMaxScaler=True)
            cmb_valid = scale_data(cmb_valid, standardScaler=True, minMaxScaler=True)
            cmb_test = scale_data(cmb_test, standardScaler=True, minMaxScaler=True)

            

            ## ----- Classify without autoencoders:
            # Phase classifier:
            set_random_seeds()
            model = classifier_function()
            model.fit(xt_train, y_train)
            curr_acc = np.sum(model.predict(xt_test) == y_test) / len(y_test)
            accs['phase'].append(curr_acc)
            
            # Breathing classifier:
            set_random_seeds()
            base_model = classifier_function()
            base_model.fit(br_train, y_train)
            curr_acc = np.sum(base_model.predict(br_test) == y_test) / len(y_test)
            accs['breathing'].append(curr_acc)

            # Heartbeat classifier:
            set_random_seeds()
            base_model = classifier_function()
            base_model.fit(hb_train, y_train)
            curr_acc = np.sum(base_model.predict(hb_test) == y_test) / len(y_test)
            accs['heartbeat'].append(curr_acc)

            # Combined classifier:
            set_random_seeds()
            base_model = classifier_function()
            base_model.fit(cmb_train, y_train)
            curr_acc = np.sum(base_model.predict(cmb_test) == y_test) / len(y_test)
            accs['combined br hb'].append(curr_acc)



            ## ----- Classify with autoencoders:
            # AE Training params
            batch_size = 256
            epochs = 100
            encoding_dim = 64

            # undercomplete AE
            set_random_seeds()
            autoencoder, encoded = undercomplete_ae(xt_train, encoding_dim, encoded_as_model=True)
            sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                                   xt_valid, xt_valid, xt_test, xt_test)
            model = classifier_function()
            xtt_train = encoded.predict(xt_train)
            xtt_test = encoded.predict(xt_test)
            model.fit(xtt_train, y_train)
            curr_acc = np.sum(model.predict(xtt_test) == y_test) / len(y_test)
            accs['undercomplete'].append(curr_acc)

            # sparse AE
            set_random_seeds()
            autoencoder, encoded = sparse_ae(xt_train, encoding_dim, encoded_as_model=True)
            sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                                   xt_valid, xt_valid, xt_test, xt_test)
            model = classifier_function()
            xtt_train = encoded.predict(xt_train)
            xtt_test = encoded.predict(xt_test)
            model.fit(xtt_train, y_train)
            curr_acc = np.sum(model.predict(xtt_test) == y_test) / len(y_test)
            accs['sparse'].append(curr_acc)

            # deep AE
            set_random_seeds()
            autoencoder, encoded = deep_ae(xt_train, enc_layers=[512,256], encoding_dim=encoding_dim, dec_layers=[256,512], encoded_as_model=True)
            sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                                   xt_valid, xt_valid, xt_test, xt_test)
            model = classifier_function()
            xtt_train = encoded.predict(xt_train)
            xtt_test = encoded.predict(xt_test)
            model.fit(xtt_train, y_train)
            curr_acc = np.sum(model.predict(xtt_test) == y_test) / len(y_test)
            accs['deep'].append(curr_acc)

            # contractive AE
            set_random_seeds()
            autoencoder, encoded = contractive_ae(xt_train, encoding_dim, encoded_as_model=True)
            sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                                   xt_valid, xt_valid, xt_test, xt_test)
            model = classifier_function()
            xtt_train = encoded.predict(xt_train)
            xtt_test = encoded.predict(xt_test)
            model.fit(xtt_train, y_train)
            curr_acc = np.sum(model.predict(xtt_test) == y_test) / len(y_test)
            accs['contractive'].append(curr_acc)

    # Print total time required to run this
    end_time = datetime.now()
    elapsed_time = end_time - start_time
    print("Completed!", "Time elapsed:", elapsed_time)
    
    return accs

#### kNN

In [34]:
from sklearn.neighbors import KNeighborsClassifier

def KNN_classifier():
    model = KNeighborsClassifier(p=3, n_neighbors=7, metric='cosine')
    return model

Combine the autoencoders with the classifier: 

In [35]:
accs = helper_loop(KNN_classifier, idents, n)

iteration: 1 of 10 ; time elapsed: 0:00:00.006868
iteration: 2 of 10 ; time elapsed: 0:03:34.286554
iteration: 3 of 10 ; time elapsed: 0:07:26.667358
iteration: 4 of 10 ; time elapsed: 0:11:41.780649
iteration: 5 of 10 ; time elapsed: 0:16:14.355082
iteration: 6 of 10 ; time elapsed: 0:21:11.670588
iteration: 7 of 10 ; time elapsed: 0:26:25.721028
iteration: 8 of 10 ; time elapsed: 0:31:45.418098
iteration: 9 of 10 ; time elapsed: 0:37:04.764994
iteration: 10 of 10 ; time elapsed: 0:42:40.801805
Completed! Time elapsed: 0:49:50.872476


In [36]:
accuracies['kNN'] = accs

In [37]:
# print accuracies of each method and corresponding id which yielded that accuracy (same row)
pandas.DataFrame.from_dict(accs)

Unnamed: 0,phase,breathing,heartbeat,combined br hb,undercomplete,sparse,deep,contractive,test id
0,0.5,0.458333,0.333333,0.416667,0.5,0.5,0.416667,0.625,62i9y
1,0.588235,0.647059,0.352941,0.676471,0.823529,0.558824,0.647059,0.617647,2gu87
2,0.638889,0.638889,0.472222,0.75,0.611111,0.638889,0.611111,0.611111,iz2ps
3,0.676471,0.676471,0.441176,0.617647,0.588235,0.647059,0.588235,0.441176,1mpau
4,0.7,0.6,0.4,0.6,0.533333,0.6,0.666667,0.633333,7dwjy
5,0.8125,0.8125,0.4375,0.78125,0.59375,0.625,0.53125,0.5625,7swyk
6,0.6875,0.53125,0.5625,0.53125,0.65625,0.5,0.5625,0.59375,94mnx
7,0.6,0.633333,0.6,0.7,0.7,0.633333,0.6,0.5,bd47a
8,0.7,0.7,0.533333,0.7,0.6,0.5,0.6,0.6,c24ur
9,0.666667,0.833333,0.466667,0.733333,0.7,0.8,0.5,0.666667,ctsax


In [38]:
# print some statistics for each method
print_accs_stats(accs)

           phase  breathing  heartbeat  combined br hb  undercomplete   sparse      deep  contractive
min     0.500000   0.458333   0.333333        0.416667       0.500000  0.50000  0.416667     0.441176
max     0.812500   0.833333   0.600000        0.781250       0.823529  0.80000  0.666667     0.666667
mean    0.657026   0.653117   0.459967        0.650662       0.630621  0.60031  0.572349     0.585118
median  0.671569   0.642974   0.453922        0.688235       0.605556  0.61250  0.594118     0.605556


####  SVC

In [39]:
from sklearn.svm import SVC

def SVC_classifier():
    model = SVC(kernel='rbf', C=1.5)
    return model

Combine the autoencoders with the classifier: 

In [40]:
accs = helper_loop(SVC_classifier, idents, n)

iteration: 1 of 10 ; time elapsed: 0:00:00
iteration: 2 of 10 ; time elapsed: 0:03:49.998369
iteration: 3 of 10 ; time elapsed: 0:08:00.404254
iteration: 4 of 10 ; time elapsed: 0:12:37.615514
iteration: 5 of 10 ; time elapsed: 0:17:43.294081
iteration: 6 of 10 ; time elapsed: 0:23:23.011519
iteration: 7 of 10 ; time elapsed: 0:29:06.546522
iteration: 8 of 10 ; time elapsed: 0:35:25.707640
iteration: 9 of 10 ; time elapsed: 0:42:20.816218
iteration: 10 of 10 ; time elapsed: 0:49:47.072801
Completed! Time elapsed: 0:58:52.162650


In [41]:
accuracies['SVC'] = accs

In [42]:
# print accuracies of each method and corresponding id which yielded that accuracy (same row)
pandas.DataFrame.from_dict(accs)

Unnamed: 0,phase,breathing,heartbeat,combined br hb,undercomplete,sparse,deep,contractive,test id
0,0.416667,0.5,0.5,0.5,0.333333,0.5,0.458333,0.416667,62i9y
1,0.676471,0.705882,0.352941,0.735294,0.588235,0.705882,0.588235,0.735294,2gu87
2,0.777778,0.666667,0.333333,0.75,0.527778,0.694444,0.555556,0.638889,iz2ps
3,0.529412,0.764706,0.588235,0.764706,0.441176,0.5,0.441176,0.529412,1mpau
4,0.666667,0.533333,0.466667,0.566667,0.6,0.566667,0.633333,0.566667,7dwjy
5,0.46875,0.65625,0.65625,0.78125,0.46875,0.59375,0.625,0.46875,7swyk
6,0.65625,0.625,0.65625,0.625,0.59375,0.46875,0.6875,0.625,94mnx
7,0.633333,0.633333,0.633333,0.566667,0.566667,0.566667,0.5,0.6,bd47a
8,0.733333,0.733333,0.566667,0.766667,0.566667,0.633333,0.533333,0.6,c24ur
9,0.633333,0.866667,0.566667,0.7,0.5,0.6,0.566667,0.533333,ctsax


In [43]:
# print some statistics for each method
print_accs_stats(accs)

           phase  breathing  heartbeat  combined br hb  undercomplete    sparse      deep  contractive
min     0.416667   0.500000   0.333333        0.500000       0.333333  0.468750  0.441176     0.416667
max     0.777778   0.866667   0.656250        0.781250       0.600000  0.705882  0.687500     0.735294
mean    0.619199   0.668517   0.532034        0.675625       0.518636  0.582949  0.558913     0.571401
median  0.644792   0.661458   0.566667        0.717647       0.547222  0.580208  0.561111     0.583333


#### Random Forest

In [44]:
from sklearn.ensemble import RandomForestClassifier
def random_forest_classifier():
    model = RandomForestClassifier(n_estimators = 250,
                                     min_samples_split = 10,
                                     min_samples_leaf = 4,
                                     max_features = 'auto',
                                     max_depth = 90,
                                     bootstrap = True)
    return model

Combine the autoencoders with the classifier: 

In [45]:
accs = helper_loop(random_forest_classifier, idents, n)

iteration: 1 of 10 ; time elapsed: 0:00:00
iteration: 2 of 10 ; time elapsed: 0:04:48.687027
iteration: 3 of 10 ; time elapsed: 0:10:06.150430
iteration: 4 of 10 ; time elapsed: 0:16:05.568345
iteration: 5 of 10 ; time elapsed: 0:22:59.438858
iteration: 6 of 10 ; time elapsed: 0:30:21.932782
iteration: 7 of 10 ; time elapsed: 0:38:20.992617
iteration: 8 of 10 ; time elapsed: 0:46:57.970766
iteration: 9 of 10 ; time elapsed: 0:56:04.322792
iteration: 10 of 10 ; time elapsed: 1:05:36.305051
Completed! Time elapsed: 1:17:44.588450


In [46]:
accuracies['random_forest'] = accs

In [47]:
# print accuracies of each method and corresponding id which yielded that accuracy (same row)
pandas.DataFrame.from_dict(accs)

Unnamed: 0,phase,breathing,heartbeat,combined br hb,undercomplete,sparse,deep,contractive,test id
0,0.458333,0.458333,0.541667,0.5,0.458333,0.458333,0.375,0.458333,62i9y
1,0.617647,0.647059,0.264706,0.647059,0.676471,0.764706,0.647059,0.705882,2gu87
2,0.638889,0.777778,0.444444,0.722222,0.75,0.694444,0.583333,0.888889,iz2ps
3,0.558824,0.764706,0.529412,0.647059,0.5,0.529412,0.617647,0.588235,1mpau
4,0.5,0.566667,0.433333,0.533333,0.566667,0.533333,0.566667,0.533333,7dwjy
5,0.5625,0.71875,0.5625,0.71875,0.59375,0.625,0.5625,0.5,7swyk
6,0.59375,0.65625,0.75,0.59375,0.59375,0.4375,0.625,0.5625,94mnx
7,0.533333,0.633333,0.533333,0.633333,0.566667,0.733333,0.533333,0.5,bd47a
8,0.533333,0.666667,0.633333,0.633333,0.433333,0.433333,0.5,0.5,c24ur
9,0.733333,0.733333,0.566667,0.733333,0.666667,0.7,0.7,0.666667,ctsax


In [48]:
# print some statistics for each method
print_accs_stats(accs)

           phase  breathing  heartbeat  combined br hb  undercomplete    sparse      deep  contractive
min     0.458333   0.458333   0.264706        0.500000       0.433333  0.433333  0.375000     0.458333
max     0.733333   0.777778   0.750000        0.733333       0.750000  0.764706  0.700000     0.888889
mean    0.572994   0.662288   0.525940        0.636217       0.580564  0.590940  0.571054     0.590384
median  0.560662   0.661458   0.537500        0.640196       0.580208  0.579167  0.575000     0.547917


#### Naive Bayesian

In [49]:
from sklearn.naive_bayes import ComplementNB

def naive_bayesian_classifier():
    model = ComplementNB()
    return model

Combine the autoencoders with the classifier: 

In [50]:
accs = helper_loop(naive_bayesian_classifier, idents, n)

iteration: 1 of 10 ; time elapsed: 0:00:00.046641
iteration: 2 of 10 ; time elapsed: 0:05:12.614349
iteration: 3 of 10 ; time elapsed: 0:11:35.920935
iteration: 4 of 10 ; time elapsed: 0:19:42.404757
iteration: 5 of 10 ; time elapsed: 0:29:02.720470
iteration: 6 of 10 ; time elapsed: 0:40:30.272203
iteration: 7 of 10 ; time elapsed: 0:54:05.043907
iteration: 8 of 10 ; time elapsed: 1:07:20.009039
iteration: 9 of 10 ; time elapsed: 1:21:26.170789
iteration: 10 of 10 ; time elapsed: 1:35:48.511020
Completed! Time elapsed: 1:54:42.948537


In [51]:
accuracies['naive_bayesian'] = accs

In [52]:
# print accuracies of each method and corresponding id which yielded that accuracy (same row)
pandas.DataFrame.from_dict(accs)

Unnamed: 0,phase,breathing,heartbeat,combined br hb,undercomplete,sparse,deep,contractive,test id
0,0.625,0.458333,0.375,0.458333,0.75,0.833333,0.625,0.625,62i9y
1,0.382353,0.676471,0.294118,0.705882,0.5,0.529412,0.470588,0.5,2gu87
2,0.305556,0.75,0.388889,0.75,0.5,0.472222,0.444444,0.472222,iz2ps
3,0.5,0.823529,0.441176,0.823529,0.441176,0.5,0.5,0.470588,1mpau
4,0.433333,0.466667,0.466667,0.466667,0.533333,0.533333,0.5,0.466667,7dwjy
5,0.6875,0.875,0.40625,0.875,0.71875,0.84375,0.6875,0.65625,7swyk
6,0.5,0.6875,0.53125,0.6875,0.53125,0.5625,0.5,0.53125,94mnx
7,0.4,0.666667,0.466667,0.7,0.533333,0.533333,0.6,0.566667,bd47a
8,0.466667,0.8,0.666667,0.866667,0.433333,0.433333,0.633333,0.433333,c24ur
9,0.666667,0.933333,0.566667,0.933333,0.566667,0.566667,0.666667,0.533333,ctsax


In [53]:
# print some statistics for each method
print_accs_stats(accs)

           phase  breathing  heartbeat  combined br hb  undercomplete    sparse      deep  contractive
min     0.305556   0.458333   0.294118        0.458333       0.433333  0.433333  0.444444     0.433333
max     0.687500   0.933333   0.666667        0.933333       0.750000  0.843750  0.687500     0.656250
mean    0.496708   0.713750   0.460335        0.726691       0.550784  0.580788  0.562753     0.525531
median  0.483333   0.718750   0.453922        0.727941       0.532292  0.533333  0.550000     0.515625


#### XGBoost

In [54]:
from xgboost import XGBClassifier

def XGBoost_classifier():
    model = XGBClassifier(n_estimators = 83)
    return model

Combine the autoencoders with the classifier: 

In [55]:
accs = helper_loop(XGBoost_classifier, idents, n)

iteration: 1 of 10 ; time elapsed: 0:00:00.042596
iteration: 2 of 10 ; time elapsed: 0:06:48.364817
iteration: 3 of 10 ; time elapsed: 0:13:59.915302
iteration: 4 of 10 ; time elapsed: 0:22:30.225411
iteration: 5 of 10 ; time elapsed: 0:32:56.642506
iteration: 6 of 10 ; time elapsed: 0:45:21.155916
iteration: 7 of 10 ; time elapsed: 1:00:02.503103
iteration: 8 of 10 ; time elapsed: 1:16:48.544250
iteration: 9 of 10 ; time elapsed: 1:32:40.011866
iteration: 10 of 10 ; time elapsed: 1:50:52.378628
Completed! Time elapsed: 2:10:52.144726


In [56]:
accuracies['XGBoost'] = accs

In [57]:
# print accuracies of each method and corresponding id which yielded that accuracy (same row)
pandas.DataFrame.from_dict(accs)

Unnamed: 0,phase,breathing,heartbeat,combined br hb,undercomplete,sparse,deep,contractive,test id
0,0.375,0.541667,0.5,0.5,0.458333,0.458333,0.458333,0.416667,62i9y
1,0.705882,0.676471,0.441176,0.617647,0.764706,0.588235,0.617647,0.558824,2gu87
2,0.583333,0.722222,0.416667,0.666667,0.694444,0.722222,0.555556,0.694444,iz2ps
3,0.588235,0.705882,0.647059,0.705882,0.529412,0.647059,0.647059,0.529412,1mpau
4,0.566667,0.6,0.5,0.533333,0.6,0.466667,0.6,0.6,7dwjy
5,0.6875,0.84375,0.40625,0.5625,0.59375,0.6875,0.5625,0.59375,7swyk
6,0.59375,0.6875,0.59375,0.59375,0.5,0.59375,0.5625,0.53125,94mnx
7,0.633333,0.666667,0.566667,0.7,0.6,0.666667,0.566667,0.6,bd47a
8,0.633333,0.8,0.633333,0.733333,0.533333,0.533333,0.566667,0.633333,c24ur
9,0.633333,0.666667,0.466667,0.666667,0.566667,0.5,0.6,0.6,ctsax


In [58]:
# print some statistics for each method
print_accs_stats(accs)

           phase  breathing  heartbeat  combined br hb  undercomplete    sparse      deep  contractive
min     0.375000   0.541667   0.406250        0.500000       0.458333  0.458333  0.458333     0.416667
max     0.705882   0.843750   0.647059        0.733333       0.764706  0.722222  0.647059     0.694444
mean    0.600037   0.691083   0.517157        0.627978       0.584065  0.586377  0.573693     0.575768
median  0.613542   0.681985   0.500000        0.642157       0.580208  0.590993  0.566667     0.596875


###  Compare Accuracies

Print min, max, mean, median for each clasifier/autoencoder combination:

In [59]:
for classifier in accuracies:
    print("-----------", classifier + ":", "-----------")
    accs = accuracies[classifier]
    print_accs_stats(accs)
    print("\n")

----------- simple_dense: -----------
           phase  breathing  heartbeat  combined br hb  undercomplete    sparse      deep  contractive
min     0.416667   0.500000   0.382353        0.375000       0.458333  0.458333  0.416667     0.375000
max     0.794118   0.906250   0.700000        0.875000       0.764706  0.812500  0.735294     0.700000
mean    0.573489   0.718656   0.556593        0.679808       0.611381  0.643746  0.571957     0.569453
median  0.576287   0.728758   0.575000        0.715686       0.605556  0.650000  0.596875     0.588235


----------- LSTM: -----------
           phase  breathing  heartbeat  combined br hb  undercomplete    sparse      deep  contractive
min     0.541667   0.294118   0.294118        0.333333       0.416667  0.416667  0.529412     0.416667
max     0.833333   0.705882   0.500000        0.705882       0.794118  0.735294  0.750000     0.852941
mean    0.693039   0.561667   0.392157        0.519739       0.608366  0.616013  0.615098     0.619216
med

Print all accuracies in table form:

In [60]:
for classifier in accuracies:
    print(classifier + ":")
#     print(pandas.DataFrame.from_dict(accuracies[classifier]))
    # Using .to_string() gives nicer loooking results (doesn't split into new line)
    print(pandas.DataFrame.from_dict(accuracies[classifier]).to_string())
    print("\n")

simple_dense:
      phase  breathing  heartbeat  combined br hb  undercomplete    sparse      deep  contractive test id
0  0.416667   0.500000   0.583333        0.375000       0.458333  0.458333  0.416667     0.375000   62i9y
1  0.794118   0.735294   0.382353        0.588235       0.764706  0.735294  0.735294     0.588235   2gu87
2  0.611111   0.722222   0.416667        0.805556       0.611111  0.694444  0.611111     0.555556   iz2ps
3  0.558824   0.735294   0.529412        0.764706       0.558824  0.676471  0.529412     0.588235   1mpau
4  0.500000   0.566667   0.566667        0.466667       0.600000  0.600000  0.466667     0.700000   7dwjy
5  0.593750   0.906250   0.562500        0.875000       0.687500  0.812500  0.500000     0.687500   7swyk
6  0.593750   0.687500   0.625000        0.656250       0.500000  0.593750  0.593750     0.500000   94mnx
7  0.533333   0.700000   0.600000        0.666667       0.666667  0.633333  0.633333     0.633333   bd47a
8  0.533333   0.800000   0.70000