# Classifiers - busy vs relaxed
Exploring different classifiers with different autoencoders.

#### Table of contents:  

autoencoders:  
[Undercomplete Autoencoder](#Undercomplete-Autoencoder)  
[Sparse Autoencoder](#Sparse-Autoencoder)  
[Deep Autoencoder](#Deep-Autoencoder)  
[Contractive Autoencoder](#Contractive-Autoencoder)  

classifiers:  
[Simple dense layer](#Simple-dense-layer)  
[LSTM-based classifier](#LSTM-based-classifier)  
[kNN](#kNN)  
[SVC](#SVC)  
[Random Forest](#Random-Forest)  
[XGBoost](#XGBoost)  

In [1]:
import datareader # made by the previous author for reading the collected data
import dataextractor # same as above
import pandas
import numpy as np
import tensorflow as tf
# need to disable eager execution for .get_weights() in contractive autoencoder loss to work
tf.compat.v1.disable_eager_execution()
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Activation, Input
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Conv1D, MaxPooling1D
# required for the contractive autoencoder
import tensorflow.keras.backend as K
import json
from datetime import datetime

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import RandomizedSearchCV

import warnings

import talos
from talos.utils import lr_normalizer

from tensorflow import keras
from tensorflow.keras import layers, regularizers
import matplotlib.pyplot as plt

tf.keras.backend.set_floatx('float32') # call this, to set keras to use float32 to avoid a warning message
metrics = ['accuracy']#,
#            keras.metrics.TruePositives(),
#            keras.metrics.FalsePositives(),
#            keras.metrics.TrueNegatives(),
#            keras.metrics.FalseNegatives()]

In [2]:
# from https://github.com/ageron/handson-ml/blob/master/extra_tensorflow_reproducibility.ipynb
config = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1,
                        inter_op_parallelism_threads=1)

with tf.compat.v1.Session(config=config) as sess:
    #... this will run single threaded
    pass

In [3]:
import random

random.seed(1)
np.random.seed(4)
tf.random.set_seed(2)

In [4]:
# Start the notebook in the terminal with "PYTHONHASHSEED=0 jupyter notebook" 
# or in anaconda "set PYTHONHASHSEED=0" then start jupyter notebook
import os
if os.environ.get("PYTHONHASHSEED") != "0":
    raise Exception("You must set PYTHONHASHSEED=0 when starting the Jupyter server to get reproducible results.")

This is modfied original author's code for reading data:

In [5]:
def model_train(model, x_train, y_train, batch_size, epochs, x_valid, y_valid, x_test, y_test):
    """Train model with the given training, validation, and test set, with appropriate batch size and # epochs."""
    epoch_data = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_valid, y_valid), verbose=0)
    score = model.evaluate(x_test, y_test, batch_size=batch_size)
    acc = score[1]
    score = score[0]
    return score, acc, epoch_data


In [6]:
def get_busy_vs_relax_timeframes_br_hb(path, ident, seconds):
    """Returns raw data from either 'on task' or 'relax' time frames and their class (0 or 1)."""
    
    dataread = datareader.DataReader(path, ident)  # initialize path to data
    data = dataread.read_grc_data()  # read from files
    samp_rate = int(round(len(data[1]) / max(data[0])))
    cog_res = dataread.read_cognitive_load_study(str(ident) + '-primary-extract.txt')

    tasks_data = np.empty((0, seconds*samp_rate))
    tasks_y = np.empty((0, 1))
    breathing = np.empty((0,12))
    heartbeat = np.empty((0,10))

    busy_n = dataread.get_data_task_timestamps(return_indexes=True)
    relax_n = dataread.get_relax_timestamps(return_indexes=True)

    for i in cog_res['task_number']:
        task_num_table = i - 225  # 0 - 17
        tmp_tasks_data = np.empty((0, seconds*samp_rate))
        tmp_tasks_y = np.empty((0, 1))
        tmp_breathing = np.empty((0,12))
        tmp_heartbeat = np.empty((0,10))

        ### task versus relax (1 sample each)
        dataextract = dataextractor.DataExtractor(data[0][busy_n[task_num_table][0]:busy_n[task_num_table][1]],
                                                  data[1][busy_n[task_num_table][0]:busy_n[task_num_table][1]],
                                                  samp_rate)

        dataextract_relax = dataextractor.DataExtractor(data[0][relax_n[task_num_table][0]:relax_n[task_num_table][1]],
                                                        data[1][relax_n[task_num_table][0]:relax_n[task_num_table][1]],
                                                        samp_rate)

        try:

            # get extracted features for breathing
            tmpBR_busy = dataextract.extract_from_breathing_time(dataextract.t[-samp_rate*seconds:],
                                                                 dataextract.y[-samp_rate*seconds:])
            tmpBR_relax = dataextract_relax.extract_from_breathing_time(dataextract_relax.t[-samp_rate*seconds:],
                                                                 dataextract_relax.y[-samp_rate*seconds:])
            #get extracted features for heartbeat
            tmpHB_busy = dataextract.extract_from_heartbeat_time(dataextract.t[-samp_rate*seconds:],
                                                                 dataextract.y[-samp_rate*seconds:])
            tmpHB_relax = dataextract.extract_from_heartbeat_time(dataextract_relax.t[-samp_rate*seconds:],
                                                                 dataextract_relax.y[-samp_rate*seconds:])

            tmp_tasks_data = np.vstack((tmp_tasks_data, dataextract.y[-samp_rate * seconds:]))
            tmp_tasks_y = np.vstack((tasks_y, 1))
            tmp_tasks_data = np.vstack((tmp_tasks_data, dataextract_relax.y[-samp_rate * seconds:]))
            tmp_tasks_y = np.vstack((tmp_tasks_y, 0))

            # put busy frames then relaxed frames under the previous frames
            tmp_breathing = np.vstack((tmp_breathing, tmpBR_busy.to_numpy(dtype='float64', na_value=0)[0][:-1]))
            tmp_breathing = np.vstack((tmp_breathing, tmpBR_relax.to_numpy(dtype='float64', na_value=0)[0][:-1]))

            tmp_heartbeat = np.vstack((tmp_heartbeat, tmpHB_busy.to_numpy(dtype='float64', na_value=0)[0][:-1]))
            tmp_heartbeat = np.vstack((tmp_heartbeat, tmpHB_relax.to_numpy(dtype='float64', na_value=0)[0][:-1]))

        except ValueError:
#             print(ident)  # ignore short windows
            continue

        # put busy frames then relaxed frames under the previous frames
        tasks_data = np.vstack((tasks_data, dataextract.y[-samp_rate * seconds:]))
        tasks_y = np.vstack((tasks_y, 1))
        tasks_data = np.vstack((tasks_data, dataextract_relax.y[-samp_rate * seconds:]))
        tasks_y = np.vstack((tasks_y, 0))

        breathing = np.vstack((breathing, tmpBR_busy.to_numpy(dtype='float64', na_value=0)[0][:-1]))
        breathing = np.vstack((breathing, tmpBR_relax.to_numpy(dtype='float64', na_value=0)[0][:-1]))

        heartbeat = np.vstack((heartbeat, tmpHB_busy.to_numpy(dtype='float64', na_value=0)[0][:-1]))
        heartbeat = np.vstack((heartbeat, tmpHB_relax.to_numpy(dtype='float64', na_value=0)[0][:-1]))

    return tasks_data, tasks_y, breathing, heartbeat

In [7]:
def get_data_from_idents_br_hb(path, idents, seconds):
    """Go through all user data and take out windows of only <seconds> long time frames,
    along with the given class (from 'divide_each_task' function).
    """
    samp_rate = 43  # hard-coded sample rate
    data, ys = np.empty((0, samp_rate*seconds)), np.empty((0, 1))
    brs = np.empty((0,12))
    hbs = np.empty((0,10))
    combined = np.empty((0,22))
    
    # was gettign some weird warnings; stack overflow said to ignore them
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=RuntimeWarning)
        for i in idents:
            x, y, br, hb = get_busy_vs_relax_timeframes_br_hb(path, i, seconds) # either 'get_busy_vs_relax_timeframes',
            # get_engagement_increase_vs_decrease_timeframes, get_task_complexities_timeframes or get_TLX_timeframes

            data = np.vstack((data, x))
            ys = np.vstack((ys, y))
            brs = np.vstack((brs, br))
            hbs = np.vstack((hbs, hb))
        combined = np.hstack((brs,hbs))
    
    return data, ys, brs, hbs, combined

In [8]:
def scale_data(x_train, x_valid, x_test, standardScaler=True, minMaxScaler=True):
    
    # copy data variables
    xt_train = x_train
    xt_valid = x_valid
    xt_test = x_test
    
    if standardScaler:
        # Scale with standard scaler
        sscaler = StandardScaler()
        sscaler.fit(np.vstack((xt_train, xt_valid, xt_test)))
        xt_train = sscaler.transform(xt_train)
        xt_valid = sscaler.transform(xt_valid)
        xt_test = sscaler.transform(xt_test)

    if minMaxScaler:
        # Scale with MinMax to range [0,1]
        mmscaler = MinMaxScaler((0,1))
        mmscaler.fit(np.vstack((xt_train, xt_valid, xt_test)))
        xt_train = mmscaler.transform(xt_train)
        xt_valid = mmscaler.transform(xt_valid)
        xt_test = mmscaler.transform(xt_test)
    
    return xt_train, xt_valid, xt_test

In [9]:
# Accs is a dictionary which holds 1d arrays of accuracies in each key
# except the key 'test id' which holds strings of the id which yielded the coresponding accuracies
def print_accs_stats(accs):
    # loop over each key
    for key in accs:
    
        if (key == 'test id'):
            # skip calculating ids
            continue

        # calculate and print some statistics
        print(key, "accuracies:")
        print("- min:", np.min(accs[key]))
        print("- max:", np.max(accs[key]))
        print("- mean:", np.mean(accs[key]))
        print("- median:", np.median(accs[key]))
        print("")

## Autoencoders

#### Undercomplete Autoencoder  
from https://blog.keras.io/building-autoencoders-in-keras.html

In [10]:
def undercomplete_ae(x, encoding_dim=64, encoded_as_model=False):
    # Simplest possible autoencoder from https://blog.keras.io/building-autoencoders-in-keras.html

    # this is our input placeholder
    input_data = Input(shape=x[0].shape, name="input")
    dropout = Dropout(0.25, name="dropout")(input_data)
    # "encoded" is the encoded representation of the input
    encoded = Dense(encoding_dim, activation='relu', name="encoded")(dropout)
    
    # "decoded" is the lossy reconstruction of the input
    decoded = Dense(x[0].shape[0], activation='sigmoid', name="decoded")(encoded)

    autoencoder = Model(input_data, decoded)
    
    # compile the model
    autoencoder.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics)
    
    # if return encoder in the encoded variable
    if encoded_as_model:
        encoded = Model(input_data, encoded)
    
    return autoencoder, encoded

#### Sparse Autoencoder  
from https://blog.keras.io/building-autoencoders-in-keras.html

In [11]:
def sparse_ae(x, encoding_dim=64, encoded_as_model=False):
    # Simplest possible autoencoder from https://blog.keras.io/building-autoencoders-in-keras.html

    # this is our input placeholder
    input_data = Input(shape=x[0].shape, name="input")
    dropout = Dropout(0.25, name="dropout") (input_data)
    # "encoded" is the encoded representation of the input
    # add a sparsity constraint
    encoded = Dense(encoding_dim, activation='relu', name="encoded",
                    activity_regularizer=regularizers.l1(10e-5))(dropout)
    
    # "decoded" is the lossy reconstruction of the input
    decoded = Dense(x[0].shape[0], activation='sigmoid', name="decoded")(encoded)

    # this model maps an input to its reconstruction
    autoencoder = Model(input_data, decoded, name="sparse_ae")
    
    # compile the model
    autoencoder.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics)
    
    # if return encoder in the encoded variable
    if encoded_as_model:
        encoded = Model(input_data, encoded)
    
    return autoencoder, encoded

#### Deep Autoencoder  
from https://blog.keras.io/building-autoencoders-in-keras.html

In [12]:
def deep_ae(x, enc_layers=[512,256], encoding_dim=64, dec_layers=[256,512], encoded_as_model=False):
    # From https://www.tensorflow.org/guide/keras/functional#use_the_same_graph_of_layers_to_define_multiple_models
    input_data = keras.Input(shape=x[0].shape, name="normalized_signal")
    model = Dropout(0.25, name="dropout", autocast=False)(input_data)
    for i in enumerate(enc_layers):
        model = Dense(i[1], activation="relu", name="dense_enc_" + str(i[0]+1))(model)
    encoded_output = Dense(encoding_dim, activation="relu", name="encoded_signal")(model)

    encoded = encoded_output

    model = layers.Dense(dec_layers[0], activation="sigmoid", name="dense_dec_1")(encoded_output)
    for i in enumerate(dec_layers[1:]):
        model = Dense(i[1], activation="sigmoid", name="dense_dec_" + str(i[0]+2))(model)
    decoded_output = Dense(x[0].shape[0], activation="sigmoid", name="reconstructed_signal")(model)
    
    autoencoder = Model(input_data, decoded_output, name="autoencoder")
    
    # compile the model
    autoencoder.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics)
    
    # if return encoder in the encoded variable
    if encoded_as_model:
        encoded = Model(input_data, encoded)

    return autoencoder, encoded

#### Contractive Autoencoder
From: https://wiseodd.github.io/techblog/2016/12/05/contractive-autoencoder/

In [13]:
# define a function to be able to access the autoencoder in the loss funciton
def loss_with_params(autoencoder):
    # loss function from https://wiseodd.github.io/techblog/2016/12/05/contractive-autoencoder/
    def contractive_loss(y_pred, y_true):

        lam = 1e-4
        mse = K.mean(K.square(y_true - y_pred), axis=1)

        W = K.variable(value=autoencoder.get_layer('encoded').get_weights()[0])  # N x N_hidden
        W = K.transpose(W)  # N_hidden x N
        h = autoencoder.get_layer('encoded').output
        dh = h * (1 - h)  # N_batch x N_hidden

        # N_batch x N_hidden * N_hidden x 1 = N_batch x 1
        contractive = lam * K.sum(dh**2 * K.sum(W**2, axis=1), axis=1)

        return mse + contractive
    return contractive_loss

In [14]:
def contractive_ae(x, encoding_dim=64, encoded_as_model=False):
    # From https://wiseodd.github.io/techblog/2016/12/05/contractive-autoencoder/

    input_data = Input(shape=x[0].shape, name="input")
    encoded = Dense(encoding_dim, activation='sigmoid', name='encoded')(input_data)
    outputs = Dense(x[0].shape[0], activation='linear', name="output")(encoded)

    autoencoder = Model(input_data, outputs, name="autoencoder")
    
    # compile the model
    autoencoder.compile(optimizer='adam', loss=loss_with_params(autoencoder), metrics=metrics)
    
    # if return encoder in the encoded variable
    if encoded_as_model:
        encoded = Model(input_data, encoded)
    
    return autoencoder, encoded

## Classifiers

Initialize variables:

In [15]:
# initialize a dictionary to store accuracies for comparison
accuracies = {}

# used for reading the data into an array
seconds = 30  # time window length
idents = ['2gu87', 'iz2ps', '1mpau', '7dwjy', '7swyk', '94mnx', 'bd47a', 'c24ur', 'ctsax', 'dkhty', 'e4gay',
              'ef5rq', 'f1gjp', 'hpbxa', 'pmyfl', 'r89k1', 'tn4vl', 'td5pr', 'gyqu9', 'fzchw', 'l53hg', '3n2f9',
              '62i9y']
path = '../../../StudyData/'

# change to len(idents) at the end to use all the data
n = 5 #len(idents)

#### Simple dense layer

Define the classifier:

In [16]:
def dense_classifier(model, params):
    
    model = Dropout(params['dropout'], name='dropout_cl')(model)
    model = Dense(params['hidden_size'], activation=params['activation'], name='dense_cl1')(model)
    model = Dense(1, activation=params['last_activation'], name='dense_cl2')(model)

    return model

In [17]:
def dense_classifier_base():
    model = Sequential()
    model.add(Dropout(0))
    model.add(Dense(32))
    model.add(Activation('sigmoid'))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=metrics)
    return model

In [18]:
params = {
    'dropout': 0.24,
    'optimizer': 'Adam',
    'hidden_size': 64,
    'loss': 'binary_crossentropy',
    'last_activation': 'sigmoid',
    'activation': 'softmax',
    'batch_size': 256,
    'epochs': 100
}

Combine the autoencoders with the classifier: 

In [19]:
# set the variables in the dictionary
accuracies['simple_dense'] = {}
accs = accuracies['simple_dense']
accs['phase'] = []
accs['breathing'] = []
accs['heartbeat'] = []
accs['combined br hb'] = []
accs['undercomplete'] = []
accs['sparse'] = []
accs['deep'] = []
accs['contractive'] = []
accs['test id'] = []
start_time = datetime.now()

with tf.compat.v1.Session(config=config) as sess:
    # leave out person out validation
    for ident in range(n):

        # print current iteration and time elapsed from start
        print("iteration:", ident+1, "of", n, "; time elapsed:", datetime.now()-start_time)

        ## ----- Data preparation:
        # Split the data
        train_idents = [x for i, x in enumerate(idents) if (i != ident and i != (n-1+ident)%n)]
        validation_idents = [idents[ident]]
        test_idents = [idents[ident-1]]
        
        # save test id to see which id yielded which accuracies
        accs['test id'].append(test_idents[0])

        # Load data (xt-raw phase data, y-class, br-breathing data, hb-heartbeat data, cmb-combined [br,hb])
        xt_train, y_train, br_train, hb_train, cmb_train = get_data_from_idents_br_hb(path, train_idents, seconds)
        xt_valid, y_valid, br_valid, hb_valid, cmb_valid = get_data_from_idents_br_hb(path, validation_idents, seconds)
        xt_test, y_test, br_test, hb_test, cmb_test = get_data_from_idents_br_hb(path, test_idents, seconds)

        # Scale data with standard scaler then MinMax scaler
        # Raw Phase data:
        xt_train, xt_valid, xt_test = scale_data(xt_train, xt_valid, xt_test, standardScaler=True, minMaxScaler=True)
        # Hand extracted breathing data:
        br_train, br_valid, br_test = scale_data(br_train, br_valid, br_test, standardScaler=True, minMaxScaler=True)
        # Hand extracted Heartbeat data:
        hb_train, hb_valid, hb_test = scale_data(hb_train, hb_valid, hb_test, standardScaler=True, minMaxScaler=True)
        # Combined breathing and heartbeat data (joined together into one matrix)
        cmb_train, cmb_valid, cmb_test = scale_data(cmb_train, cmb_valid, cmb_test, standardScaler=True, minMaxScaler=True)
        
        
        
        ## ----- Classify without autoencoders:
        # Phase classifier:
        model = dense_classifier_base()
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['phase'].append(curr_acc)

        # Breathing classifier:
        model = dense_classifier_base()
        sc, curr_acc, epoch_data = model_train(model, br_train, y_train, params['batch_size'], params['epochs'],
                                               br_valid, y_valid, br_test, y_test)
        accs['breathing'].append(curr_acc)

        # Heartbeat classifier:
        model = dense_classifier_base()
        sc, curr_acc, epoch_data = model_train(model, hb_train, y_train, params['batch_size'], params['epochs'],
                                               hb_valid, y_valid, hb_test, y_test)
        accs['heartbeat'].append(curr_acc)

        # Combined classifier:
        model = dense_classifier_base()
        sc, curr_acc, epoch_data = model_train(model, cmb_train, y_train, params['batch_size'], params['epochs'],
                                               cmb_valid, y_valid, cmb_test, y_test)
        accs['combined br hb'].append(curr_acc)
        
        
        
        ## ----- Classify with autoencoders:
        # AE Training params
        batch_size = 256
        epochs = 100

        # Undercomplete AE:
        autoencoder, encoded = undercomplete_ae(xt_train, 40)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = dense_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['undercomplete'].append(curr_acc)

        # Sparse AE:
        autoencoder, encoded = sparse_ae(xt_train, 40)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = dense_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['sparse'].append(curr_acc)

        # Deep AE:
        autoencoder, encoded = deep_ae(xt_train, enc_layers=[512,256], encoding_dim=40, dec_layers=[256,512])
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = dense_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['deep'].append(curr_acc)

        # Contractive AE:
        autoencoder, encoded = contractive_ae(xt_train, 40)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = dense_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['contractive'].append(curr_acc)

# Print total time required to run this
end_time = datetime.now()
elapsed_time = end_time - start_time
print("Completed!", "Time elapsed:", elapsed_time)

iteration: 1 of 5 ; time elapsed: 0:00:00.006667
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
iteration: 2 of 5 ; time elapsed: 0:01:04.699263
iteration: 3 of 5 ; time elapsed: 0:02:17.470242
iteration: 4 of 5 ; time elapsed: 0:03:39.764664
iteration: 5 of 5 ; time elapsed: 0:05:10.010630
Completed! Time elapsed: 0:06:45.058357


In [20]:
# print accuracies of each method and corresponding id which yielded that accuracy (same row)
pandas.DataFrame.from_dict(accs)

Unnamed: 0,phase,breathing,heartbeat,combined br hb,undercomplete,sparse,deep,contractive,test id
0,0.791667,0.416667,0.666667,0.458333,0.791667,0.833333,0.666667,0.833333,62i9y
1,0.647059,0.676471,0.235294,0.617647,0.794118,0.764706,0.647059,0.558824,2gu87
2,0.666667,0.861111,0.361111,0.888889,0.638889,0.583333,0.611111,0.472222,iz2ps
3,0.5,0.558824,0.352941,0.617647,0.588235,0.617647,0.529412,0.470588,1mpau
4,0.566667,0.566667,0.4,0.566667,0.666667,0.666667,0.533333,0.5,7dwjy


In [21]:
# print some statistics for each method
print_accs_stats(accs)

phase accuracies:
- min: 0.5
- max: 0.7916667
- mean: 0.63441175
- median: 0.64705884

breathing accuracies:
- min: 0.41666666
- max: 0.8611111
- mean: 0.6159477
- median: 0.56666666

heartbeat accuracies:
- min: 0.23529412
- max: 0.6666667
- mean: 0.40320262
- median: 0.3611111

combined br hb accuracies:
- min: 0.45833334
- max: 0.8888889
- mean: 0.62983656
- median: 0.61764705

undercomplete accuracies:
- min: 0.5882353
- max: 0.7941176
- mean: 0.6959151
- median: 0.6666667

sparse accuracies:
- min: 0.5833333
- max: 0.8333333
- mean: 0.6931372
- median: 0.6666667

deep accuracies:
- min: 0.5294118
- max: 0.6666667
- mean: 0.59751636
- median: 0.6111111

contractive accuracies:
- min: 0.47058824
- max: 0.8333333
- mean: 0.5669935
- median: 0.5



#### LSTM-based classifier  
based on the original author's code

Optimize hyperparameters with talos:

In [22]:
def LSTM_classifier(model, params):

    model = layers.Reshape((-1, 1), input_shape=(model.shape), name='reshape_cl') (model)

    model = layers.Dropout(params['dropout'], name='dropout_cl1') (model)
    
    model = Conv1D(params['filters'],
                     params['kernel_size'],
                     padding='valid',
                     activation=params['activation'],
                     strides=params['strides'],
                     name='conv1d_cl1') (model)
    
    model = MaxPooling1D(pool_size=params['pool_size'], name='maxpool_cl1') (model)
    
    model = Conv1D(params['filters'],
                     params['kernel_size'],
                     padding='valid',
                     activation=params['activation'],
                     strides=params['strides'],
                     name='conv1d_cl2') (model)
    
    model = MaxPooling1D(pool_size=params['pool_size'], name='maxpool_cl2') (model)
    
    model = layers.Dropout(params['dropout'], name='dropout_cl2') (model)

    model = LSTM(params['lstm_output_size'], activation='sigmoid', name='lstm_cl') (model)

    model = Dense(1, activation=params['last_activation'], name='dense_cl') (model)
    return model

In [23]:
def LSTM_classifier_base(params):
    
    model = Sequential()
    model.add(Dropout(params['dropout']))
    model.add(Conv1D(params['filters'],
                     params['kernel_size'],
                     padding='valid',
                     activation=params['activation'],
                     strides=params['strides']))

    model.add(MaxPooling1D(pool_size=params['pool_size']))
    model.add(Conv1D(params['filters'],
                     params['kernel_size'],
                     padding='valid',
                     activation=params['activation'],
                     strides=params['strides']))
    model.add(MaxPooling1D(pool_size=params['pool_size']))

    model.add(Dropout(params['dropout']))
    model.add(LSTM(params['lstm_output_size']))
    model.add(Dense(1))
    model.add(Activation(params['last_activation']))

    model.compile(loss=params['loss'],
                  optimizer=params['optimizer'],
                  metrics=['acc'])
    
    return model

In [24]:
params_phase = {
    'kernel_size': 32,
    'strides': 4,
    'pool_size': 2,
    'filters': 8,
    'lstm_output_size': 236,
    'loss': 'binary_crossentropy',
    'dropout': 0.09,
    'activation': 'relu',
    'optimizer': 'Nadam',
    'last_activation': 'sigmoid'
}

In [25]:
params_br_hb = {
    'kernel_size': 2,
    'strides': 1,
    'pool_size': 1,
    'filters': 2,
    'lstm_output_size': 4,
    'loss': 'binary_crossentropy',
    'dropout': 0.09,
    'activation': 'relu',
    'optimizer': 'Nadam',
    'last_activation': 'sigmoid'
}

In [26]:
params = {
    'kernel_size': 4,
    'filters': 2,
    'strides': 2,
    'pool_size': 2,
    'dropout': 0.09,
    'optimizer': 'Nadam',
    'loss': 'binary_crossentropy',
    'activation': 'relu',
    'last_activation': 'sigmoid',
    'lstm_output_size': 256,
    'batch_size': 64,
    'epochs': 100
}

Combine the autoencoders with the classifier: 

In [27]:
# set the variables in the dictionary
accuracies['LSTM'] = {}
accs = accuracies['LSTM']
accs['phase'] = []
accs['breathing'] = []
accs['heartbeat'] = []
accs['combined br hb'] = []
accs['undercomplete'] = []
accs['sparse'] = []
accs['deep'] = []
accs['contractive'] = []
accs['test id'] = []
start_time = datetime.now()

with tf.compat.v1.Session(config=config) as sess:
    # leave out person out validation
    for ident in range(n):

        # print current iteration and time elapsed from start
        print("iteration:", ident+1, "of", n, "; time elapsed:", datetime.now()-start_time)

        ## ----- Data preparation:
        # Split the data
        train_idents = [x for i, x in enumerate(idents) if (i != ident and i != (n-1+ident)%n)]
        validation_idents = [idents[ident]]
        test_idents = [idents[ident-1]]

        # save test id to see which id yielded which accuracies
        accs['test id'].append(test_idents[0])
        
        # Load data (xt-raw phase data, y-class, br-breathing data, hb-heartbeat data, cmb-combined [br,hb])
        xt_train, y_train, br_train, hb_train, cmb_train = get_data_from_idents_br_hb(path, train_idents, seconds)
        xt_valid, y_valid, br_valid, hb_valid, cmb_valid = get_data_from_idents_br_hb(path, validation_idents, seconds)
        xt_test, y_test, br_test, hb_test, cmb_test = get_data_from_idents_br_hb(path, test_idents, seconds)

        # Scale data with standard scaler then MinMax scaler
        # Raw Phase data:
        xt_train, xt_valid, xt_test = scale_data(xt_train, xt_valid, xt_test, standardScaler=True, minMaxScaler=True)
        # Hand extracted breathing data:
        br_train, br_valid, br_test = scale_data(br_train, br_valid, br_test, standardScaler=True, minMaxScaler=True)
        # Hand extracted Heartbeat data:
        hb_train, hb_valid, hb_test = scale_data(hb_train, hb_valid, hb_test, standardScaler=True, minMaxScaler=True)
        # Combined breathing and heartbeat data (joined together into one matrix)
        cmb_train, cmb_valid, cmb_test = scale_data(cmb_train, cmb_valid, cmb_test, standardScaler=True, minMaxScaler=True)
        
        
        
        ## ----- Classify without autoencoders:
        # Phase classifier:
        model = LSTM_classifier_base(params_phase)
        # reshape data for the classifier
        xtt_train = xt_train.reshape(-1, xt_train[0].shape[0], 1)
        xtt_valid = xt_valid.reshape(-1, xt_valid[0].shape[0], 1)
        xtt_test = xt_test.reshape(-1, xt_test[0].shape[0], 1)
        # train and evaluate
        sc, curr_acc, epoch_data = model_train(model, xtt_train, y_train, params['batch_size'], params['epochs'],
                                               xtt_valid, y_valid, xtt_test, y_test)
        accs['phase'].append(curr_acc)

        # Breathing classifier:
        model = LSTM_classifier_base(params_br_hb)
        # reshape data for the classifier
        brt_train = br_train.reshape(-1, br_train[0].shape[0], 1)
        brt_valid = br_valid.reshape(-1, br_valid[0].shape[0], 1)
        brt_test = br_test.reshape(-1, br_test[0].shape[0], 1)
        # train and evaluate
        sc, curr_acc, epoch_data = model_train(model, brt_train, y_train, params['batch_size'], params['epochs'],
                                               brt_valid, y_valid, brt_test, y_test)
        accs['breathing'].append(curr_acc)

        # Heartbeat classifier:
        model = LSTM_classifier_base(params_br_hb)
        # reshape data for the classifier
        hbt_train = hb_train.reshape(-1, hb_train[0].shape[0], 1)
        hbt_valid = hb_valid.reshape(-1, hb_valid[0].shape[0], 1)
        hbt_test = hb_test.reshape(-1, hb_test[0].shape[0], 1)
        # train and evaluate
        sc, curr_acc, epoch_data = model_train(model, hbt_train, y_train, params['batch_size'], params['epochs'],
                                               hbt_valid, y_valid, hbt_test, y_test)
        accs['heartbeat'].append(curr_acc)

        # Combined classifier:
        model = LSTM_classifier_base(params_br_hb)
        # reshape data for the classifier
        cmbt_train = cmb_train.reshape(-1, cmb_train[0].shape[0], 1)
        cmbt_valid = cmb_valid.reshape(-1, cmb_valid[0].shape[0], 1)
        cmbt_test = cmb_test.reshape(-1, cmb_test[0].shape[0], 1)
        # train and evaluate
        sc, curr_acc, epoch_data = model_train(model, cmbt_train, y_train, params['batch_size'], params['epochs'],
                                               cmbt_valid, y_valid, cmbt_test, y_test)
        accs['combined br hb'].append(curr_acc)

        
        
        ## ----- Classify with autoencoders:
        # AE Training params
        batch_size = 256
        epochs = 100

        # undercomplete AE
        autoencoder, encoded = undercomplete_ae(xt_train, 40)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = LSTM_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['undercomplete'].append(curr_acc)

        # sparse AE
        autoencoder, encoded = sparse_ae(xt_train, 40)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = LSTM_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['sparse'].append(curr_acc)

        # deep AE
        autoencoder, encoded = deep_ae(xt_train, enc_layers=[512,256], encoding_dim=40, dec_layers=[256,512])
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = LSTM_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['deep'].append(curr_acc)

        # contractive AE
        autoencoder, encoded = contractive_ae(xt_train, 40)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = LSTM_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['contractive'].append(curr_acc)

end_time = datetime.now()
elapsed_time = end_time - start_time
print("Completed!", "Time elapsed:", elapsed_time)

iteration: 1 of 5 ; time elapsed: 0:00:00.005956
iteration: 2 of 5 ; time elapsed: 0:04:02.561549
iteration: 3 of 5 ; time elapsed: 0:08:54.164319
iteration: 4 of 5 ; time elapsed: 0:14:26.771113
iteration: 5 of 5 ; time elapsed: 0:20:42.618662
Completed! Time elapsed: 0:28:30.138938


In [28]:
# print accuracies of each method and corresponding id which yielded that accuracy (same row)
pandas.DataFrame.from_dict(accs)

Unnamed: 0,phase,breathing,heartbeat,combined br hb,undercomplete,sparse,deep,contractive,test id
0,0.875,0.541667,0.458333,0.458333,0.833333,0.75,0.708333,0.791667,62i9y
1,0.764706,0.529412,0.352941,0.588235,0.5,0.735294,0.529412,0.735294,2gu87
2,0.888889,0.5,0.5,0.555556,0.638889,0.722222,0.722222,0.666667,iz2ps
3,0.735294,0.647059,0.323529,0.294118,0.676471,0.588235,0.5,0.647059,1mpau
4,0.7,0.7,0.5,0.533333,0.7,0.666667,0.566667,0.6,7dwjy


In [29]:
# print some statistics for each method
print_accs_stats(accs)

phase accuracies:
- min: 0.7
- max: 0.8888889
- mean: 0.7927778
- median: 0.7647059

breathing accuracies:
- min: 0.5
- max: 0.7
- mean: 0.58362746
- median: 0.5416667

heartbeat accuracies:
- min: 0.32352942
- max: 0.5
- mean: 0.4269608
- median: 0.45833334

combined br hb accuracies:
- min: 0.29411766
- max: 0.5882353
- mean: 0.48591504
- median: 0.53333336

undercomplete accuracies:
- min: 0.5
- max: 0.8333333
- mean: 0.66973853
- median: 0.6764706

sparse accuracies:
- min: 0.5882353
- max: 0.75
- mean: 0.69248366
- median: 0.7222222

deep accuracies:
- min: 0.5
- max: 0.7222222
- mean: 0.6053268
- median: 0.56666666

contractive accuracies:
- min: 0.6
- max: 0.7916667
- mean: 0.68813723
- median: 0.6666667



### Helper loop function definition

In [30]:
# a helper loop funciton for the sklearn and XGBoost classifiers
def helper_loop(classifier_function, idents, n=5):
    #returns a dictionary with accuracies

    # set the variables in the dictionary
    accs = {}
    accs['phase'] = []
    accs['breathing'] = []
    accs['heartbeat'] = []
    accs['combined br hb'] = []
    accs['undercomplete'] = []
    accs['sparse'] = []
    accs['deep'] = []
    accs['contractive'] = []
    accs['test id'] = []
    start_time = datetime.now()

    with tf.compat.v1.Session(config=config) as sess:
        # leave out person out validation
        for ident in range(n):

            # print current iteration and time elapsed from start
            print("iteration:", ident+1, "of", n, "; time elapsed:", datetime.now()-start_time)

            ## ----- Data preparation:
            # Split the data
            train_idents = [x for i, x in enumerate(idents) if (i != ident and i != (n-1+ident)%n)]
            validation_idents = [idents[ident]]
            test_idents = [idents[ident-1]]

            # save test id to see which id yielded which accuracies
            accs['test id'].append(test_idents[0])

            # Load data (xt-raw phase data, y-class, br-breathing data, hb-heartbeat data, cmb-combined [br,hb])
            xt_train, y_train, br_train, hb_train, cmb_train = get_data_from_idents_br_hb(path, train_idents, seconds)
            xt_valid, y_valid, br_valid, hb_valid, cmb_valid = get_data_from_idents_br_hb(path, validation_idents, seconds)
            xt_test, y_test, br_test, hb_test, cmb_test = get_data_from_idents_br_hb(path, test_idents, seconds)

            # change the y arrays to flat 1d arrays
            y_train = y_train.ravel()
            y_valid = y_valid.ravel()
            y_test = y_test.ravel()
            
            # Scale data with standard scaler then MinMax scaler
            # Raw Phase data:
            xt_train, xt_valid, xt_test = scale_data(xt_train, xt_valid, xt_test, standardScaler=True, minMaxScaler=True)
            # Hand extracted breathing data:
            br_train, br_valid, br_test = scale_data(br_train, br_valid, br_test, standardScaler=True, minMaxScaler=True)
            # Hand extracted Heartbeat data:
            hb_train, hb_valid, hb_test = scale_data(hb_train, hb_valid, hb_test, standardScaler=True, minMaxScaler=True)
            # Combined breathing and heartbeat data (joined together into one matrix)
            cmb_train, cmb_valid, cmb_test = scale_data(cmb_train, cmb_valid, cmb_test, standardScaler=True, minMaxScaler=True)



            ## ----- Classify without autoencoders:
            # Phase classifier:
            model = classifier_function()
            model.fit(xt_train, y_train)
            curr_acc = np.sum(model.predict(xt_test) == y_test) / y_test.shape[0]
            accs['phase'].append(curr_acc)

            # Breathing classifier:
            base_model = classifier_function()
            base_model.fit(br_train, y_train)
            curr_acc = np.sum(base_model.predict(br_valid) == y_valid) / y_test.shape[0]
            accs['breathing'].append(curr_acc)

            # Heartbeat classifier:
            base_model = classifier_function()
            base_model.fit(hb_train, y_train)
            curr_acc = np.sum(base_model.predict(hb_valid) == y_valid) / y_test.shape[0]
            accs['heartbeat'].append(curr_acc)

            # Combined classifier:
            base_model = classifier_function()
            base_model.fit(cmb_train, y_train)
            curr_acc = np.sum(base_model.predict(cmb_valid) == y_valid) / y_test.shape[0]
            accs['combined br hb'].append(curr_acc)



            ## ----- Classify with autoencoders:
            # AE Training params
            batch_size = 256
            epochs = 100

            # undercomplete AE
            autoencoder, encoded = undercomplete_ae(xt_train, 40, encoded_as_model=True)
            sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                                   xt_valid, xt_valid, xt_test, xt_test)
            model = classifier_function()
            xtt_train = encoded.predict(xt_train)
            xtt_test = encoded.predict(xt_test)
            model.fit(xtt_train, y_train)
            curr_acc = np.sum(model.predict(xtt_test) == y_test) / y_test.shape[0]
            accs['undercomplete'].append(curr_acc)

            # sparse AE
            autoencoder, encoded = sparse_ae(xt_train, 40, encoded_as_model=True)
            sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                                   xt_valid, xt_valid, xt_test, xt_test)
            model = classifier_function()
            xtt_train = encoded.predict(xt_train)
            xtt_test = encoded.predict(xt_test)
            model.fit(xtt_train, y_train)
            curr_acc = np.sum(model.predict(xtt_test) == y_test) / y_test.shape[0]
            accs['sparse'].append(curr_acc)

            # deep AE
            autoencoder, encoded = deep_ae(xt_train, enc_layers=[512,256], encoding_dim=40, dec_layers=[256,512], encoded_as_model=True)
            sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                                   xt_valid, xt_valid, xt_test, xt_test)
            model = classifier_function()
            xtt_train = encoded.predict(xt_train)
            xtt_test = encoded.predict(xt_test)
            model.fit(xtt_train, y_train)
            curr_acc = np.sum(model.predict(xtt_test) == y_test) / y_test.shape[0]
            accs['deep'].append(curr_acc)

            # contractive AE
            autoencoder, encoded = contractive_ae(xt_train, 40, encoded_as_model=True)
            sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                                   xt_valid, xt_valid, xt_test, xt_test)
            model = classifier_function()
            xtt_train = encoded.predict(xt_train)
            xtt_test = encoded.predict(xt_test)
            model.fit(xtt_train, y_train)
            curr_acc = np.sum(model.predict(xtt_test) == y_test) / y_test.shape[0]
            accs['contractive'].append(curr_acc)

    # Print total time required to run this
    end_time = datetime.now()
    elapsed_time = end_time - start_time
    print("Completed!", "Time elapsed:", elapsed_time)
    
    return accs

#### kNN

In [31]:
from sklearn.neighbors import KNeighborsClassifier

def KNN_classifier():
    model = KNeighborsClassifier(p=3, n_neighbors=7, metric='cosine')
    return model

Combine the autoencoders with the classifier: 

In [32]:
accs = helper_loop(KNN_classifier, idents, n)

iteration: 1 of 5 ; time elapsed: 0:00:00.007785
iteration: 2 of 5 ; time elapsed: 0:02:26.157877
iteration: 3 of 5 ; time elapsed: 0:05:05.572012
iteration: 4 of 5 ; time elapsed: 0:07:56.657989
iteration: 5 of 5 ; time elapsed: 0:10:53.323731
Completed! Time elapsed: 0:14:10.583202


In [33]:
accuracies['kNN'] = accs

In [34]:
# print accuracies of each method and corresponding id which yielded that accuracy (same row)
pandas.DataFrame.from_dict(accs)

Unnamed: 0,phase,breathing,heartbeat,combined br hb,undercomplete,sparse,deep,contractive,test id
0,0.583333,0.875,0.583333,0.916667,0.666667,0.75,0.708333,0.666667,62i9y
1,0.588235,0.970588,0.676471,0.941176,0.705882,0.676471,0.529412,0.647059,2gu87
2,0.611111,0.666667,0.583333,0.638889,0.694444,0.638889,0.5,0.722222,iz2ps
3,0.588235,0.529412,0.294118,0.411765,0.5,0.558824,0.558824,0.558824,1mpau
4,0.766667,0.9,0.533333,0.7,0.666667,0.633333,0.766667,0.5,7dwjy


In [35]:
# print some statistics for each method
print_accs_stats(accs)

phase accuracies:
- min: 0.5833333333333334
- max: 0.7666666666666667
- mean: 0.627516339869281
- median: 0.5882352941176471

breathing accuracies:
- min: 0.5294117647058824
- max: 0.9705882352941176
- mean: 0.7883333333333333
- median: 0.875

heartbeat accuracies:
- min: 0.29411764705882354
- max: 0.6764705882352942
- mean: 0.5341176470588235
- median: 0.5833333333333334

combined br hb accuracies:
- min: 0.4117647058823529
- max: 0.9411764705882353
- mean: 0.7216993464052287
- median: 0.7

undercomplete accuracies:
- min: 0.5
- max: 0.7058823529411765
- mean: 0.6467320261437908
- median: 0.6666666666666666

sparse accuracies:
- min: 0.5588235294117647
- max: 0.75
- mean: 0.6515032679738562
- median: 0.6388888888888888

deep accuracies:
- min: 0.5
- max: 0.7666666666666667
- mean: 0.6126470588235294
- median: 0.5588235294117647

contractive accuracies:
- min: 0.5
- max: 0.7222222222222222
- mean: 0.618954248366013
- median: 0.6470588235294118



####  SVC

In [36]:
from sklearn.svm import SVC

def SVC_classifier():
    model = SVC(kernel='rbf', C=1.5)
    return model

Combine the autoencoders with the classifier: 

In [37]:
accs = helper_loop(SVC_classifier, idents, n)

iteration: 1 of 5 ; time elapsed: 0:00:00.006981
iteration: 2 of 5 ; time elapsed: 0:02:36.242425
iteration: 3 of 5 ; time elapsed: 0:05:21.212615
iteration: 4 of 5 ; time elapsed: 0:08:17.344705
iteration: 5 of 5 ; time elapsed: 0:11:19.241802
Completed! Time elapsed: 0:14:47.668711


In [38]:
accuracies['SVC'] = accs

In [39]:
# print accuracies of each method and corresponding id which yielded that accuracy (same row)
pandas.DataFrame.from_dict(accs)

Unnamed: 0,phase,breathing,heartbeat,combined br hb,undercomplete,sparse,deep,contractive,test id
0,0.75,0.916667,0.833333,0.958333,0.666667,0.666667,0.583333,0.666667,62i9y
1,0.705882,0.911765,0.470588,0.941176,0.617647,0.588235,0.558824,0.617647,2gu87
2,0.694444,0.555556,0.555556,0.666667,0.527778,0.583333,0.638889,0.666667,iz2ps
3,0.470588,0.529412,0.588235,0.529412,0.5,0.5,0.411765,0.441176,1mpau
4,0.6,0.833333,0.333333,0.866667,0.733333,0.7,0.633333,0.566667,7dwjy


In [40]:
# print some statistics for each method
print_accs_stats(accs)

phase accuracies:
- min: 0.47058823529411764
- max: 0.75
- mean: 0.6441830065359478
- median: 0.6944444444444444

breathing accuracies:
- min: 0.5294117647058824
- max: 0.9166666666666666
- mean: 0.7493464052287582
- median: 0.8333333333333334

heartbeat accuracies:
- min: 0.3333333333333333
- max: 0.8333333333333334
- mean: 0.5562091503267974
- median: 0.5555555555555556

combined br hb accuracies:
- min: 0.5294117647058824
- max: 0.9583333333333334
- mean: 0.7924509803921568
- median: 0.8666666666666667

undercomplete accuracies:
- min: 0.5
- max: 0.7333333333333333
- mean: 0.6090849673202614
- median: 0.6176470588235294

sparse accuracies:
- min: 0.5
- max: 0.7
- mean: 0.6076470588235294
- median: 0.5882352941176471

deep accuracies:
- min: 0.4117647058823529
- max: 0.6388888888888888
- mean: 0.5652287581699346
- median: 0.5833333333333334

contractive accuracies:
- min: 0.4411764705882353
- max: 0.6666666666666666
- mean: 0.5917647058823527
- median: 0.6176470588235294



#### Random Forest

In [41]:
from sklearn.ensemble import RandomForestClassifier
def random_forest_classifier():
    model = RandomForestClassifier(n_estimators = 250,
                                     min_samples_split = 10,
                                     min_samples_leaf = 4,
                                     max_features = 'auto',
                                     max_depth = 90,
                                     bootstrap = True)
    return model

Combine the autoencoders with the classifier: 

In [42]:
accs = helper_loop(random_forest_classifier, idents, n)

iteration: 1 of 5 ; time elapsed: 0:00:00.006981
iteration: 2 of 5 ; time elapsed: 0:02:47.474198
iteration: 3 of 5 ; time elapsed: 0:05:44.092931
iteration: 4 of 5 ; time elapsed: 0:08:54.958436
iteration: 5 of 5 ; time elapsed: 0:12:14.097375
Completed! Time elapsed: 0:16:11.639597


In [43]:
accuracies['random_forest'] = accs

In [44]:
# print accuracies of each method and corresponding id which yielded that accuracy (same row)
pandas.DataFrame.from_dict(accs)

Unnamed: 0,phase,breathing,heartbeat,combined br hb,undercomplete,sparse,deep,contractive,test id
0,1.0,1.041667,0.833333,1.041667,0.958333,0.958333,1.0,1.0,62i9y
1,0.558824,0.941176,0.470588,0.941176,0.705882,0.676471,0.676471,0.705882,2gu87
2,0.666667,0.611111,0.611111,0.666667,0.861111,0.777778,0.75,0.861111,iz2ps
3,0.705882,0.588235,0.411765,0.5,0.735294,0.588235,0.470588,0.588235,1mpau
4,0.566667,0.866667,0.433333,0.766667,0.633333,0.633333,0.566667,0.566667,7dwjy


In [45]:
# print some statistics for each method
print_accs_stats(accs)

phase accuracies:
- min: 0.5588235294117647
- max: 1.0
- mean: 0.6996078431372549
- median: 0.6666666666666666

breathing accuracies:
- min: 0.5882352941176471
- max: 1.0416666666666667
- mean: 0.8097712418300654
- median: 0.8666666666666667

heartbeat accuracies:
- min: 0.4117647058823529
- max: 0.8333333333333334
- mean: 0.5520261437908497
- median: 0.47058823529411764

combined br hb accuracies:
- min: 0.5
- max: 1.0416666666666667
- mean: 0.783235294117647
- median: 0.7666666666666667

undercomplete accuracies:
- min: 0.6333333333333333
- max: 0.9583333333333334
- mean: 0.7787908496732027
- median: 0.7352941176470589

sparse accuracies:
- min: 0.5882352941176471
- max: 0.9583333333333334
- mean: 0.7268300653594771
- median: 0.6764705882352942

deep accuracies:
- min: 0.47058823529411764
- max: 1.0
- mean: 0.6927450980392157
- median: 0.6764705882352942

contractive accuracies:
- min: 0.5666666666666667
- max: 1.0
- mean: 0.7443790849673203
- median: 0.7058823529411765



#### Naive Bayesian

In [46]:
from sklearn.naive_bayes import ComplementNB

def naive_bayesian_classifier():
    model = ComplementNB()
    return model

Combine the autoencoders with the classifier: 

In [47]:
accs = helper_loop(naive_bayesian_classifier, idents, n)

iteration: 1 of 5 ; time elapsed: 0:00:00.007350
iteration: 2 of 5 ; time elapsed: 0:03:01.096305
iteration: 3 of 5 ; time elapsed: 0:06:09.902065
iteration: 4 of 5 ; time elapsed: 0:09:25.510734
iteration: 5 of 5 ; time elapsed: 0:12:50.099557
Completed! Time elapsed: 0:16:56.380130


In [48]:
accuracies['naive_bayesian'] = accs

In [49]:
# print accuracies of each method and corresponding id which yielded that accuracy (same row)
pandas.DataFrame.from_dict(accs)

Unnamed: 0,phase,breathing,heartbeat,combined br hb,undercomplete,sparse,deep,contractive,test id
0,0.583333,0.916667,0.541667,0.958333,0.708333,0.708333,0.708333,0.583333,62i9y
1,0.382353,0.911765,0.441176,0.941176,0.5,0.529412,0.558824,0.5,2gu87
2,0.388889,0.5,0.388889,0.555556,0.5,0.527778,0.5,0.472222,iz2ps
3,0.588235,0.470588,0.352941,0.441176,0.5,0.5,0.558824,0.441176,1mpau
4,0.433333,0.866667,0.533333,0.9,0.533333,0.5,0.433333,0.5,7dwjy


In [50]:
# print some statistics for each method
print_accs_stats(accs)

phase accuracies:
- min: 0.38235294117647056
- max: 0.5882352941176471
- mean: 0.4752287581699347
- median: 0.43333333333333335

breathing accuracies:
- min: 0.47058823529411764
- max: 0.9166666666666666
- mean: 0.7331372549019608
- median: 0.8666666666666667

heartbeat accuracies:
- min: 0.35294117647058826
- max: 0.5416666666666666
- mean: 0.4516013071895425
- median: 0.4411764705882353

combined br hb accuracies:
- min: 0.4411764705882353
- max: 0.9583333333333334
- mean: 0.7592483660130719
- median: 0.9

undercomplete accuracies:
- min: 0.5
- max: 0.7083333333333334
- mean: 0.5483333333333333
- median: 0.5

sparse accuracies:
- min: 0.5
- max: 0.7083333333333334
- mean: 0.5531045751633987
- median: 0.5277777777777778

deep accuracies:
- min: 0.43333333333333335
- max: 0.7083333333333334
- mean: 0.5518627450980393
- median: 0.5588235294117647

contractive accuracies:
- min: 0.4411764705882353
- max: 0.5833333333333334
- mean: 0.4993464052287582
- median: 0.5



#### XGBoost

In [51]:
from xgboost import XGBClassifier

def XGBoost_classifier():
    model = XGBClassifier(n_estimators = 83)
    return model

Combine the autoencoders with the classifier: 

In [52]:
accs = helper_loop(XGBoost_classifier, idents, n)

iteration: 1 of 5 ; time elapsed: 0:00:00.010971
iteration: 2 of 5 ; time elapsed: 0:03:09.365335
iteration: 3 of 5 ; time elapsed: 0:06:25.005927
iteration: 4 of 5 ; time elapsed: 0:09:54.151359
iteration: 5 of 5 ; time elapsed: 0:13:22.651073
Completed! Time elapsed: 0:17:23.868358


In [53]:
accuracies['XGBoost'] = accs

In [54]:
# print accuracies of each method and corresponding id which yielded that accuracy (same row)
pandas.DataFrame.from_dict(accs)

Unnamed: 0,phase,breathing,heartbeat,combined br hb,undercomplete,sparse,deep,contractive,test id
0,1.0,1.0,0.791667,1.041667,1.0,1.0,1.0,1.0,62i9y
1,0.617647,0.911765,0.558824,0.911765,0.735294,0.558824,0.470588,0.676471,2gu87
2,0.666667,0.638889,0.583333,0.666667,0.888889,0.722222,0.638889,0.777778,iz2ps
3,0.735294,0.588235,0.441176,0.588235,0.529412,0.558824,0.529412,0.529412,1mpau
4,0.6,0.7,0.5,0.766667,0.633333,0.566667,0.533333,0.633333,7dwjy


In [55]:
# print some statistics for each method
print_accs_stats(accs)

phase accuracies:
- min: 0.6
- max: 1.0
- mean: 0.7239215686274509
- median: 0.6666666666666666

breathing accuracies:
- min: 0.5882352941176471
- max: 1.0
- mean: 0.7677777777777777
- median: 0.7

heartbeat accuracies:
- min: 0.4411764705882353
- max: 0.7916666666666666
- mean: 0.575
- median: 0.5588235294117647

combined br hb accuracies:
- min: 0.5882352941176471
- max: 1.0416666666666667
- mean: 0.795
- median: 0.7666666666666667

undercomplete accuracies:
- min: 0.5294117647058824
- max: 1.0
- mean: 0.7573856209150327
- median: 0.7352941176470589

sparse accuracies:
- min: 0.5588235294117647
- max: 1.0
- mean: 0.6813071895424836
- median: 0.5666666666666667

deep accuracies:
- min: 0.47058823529411764
- max: 1.0
- mean: 0.6344444444444444
- median: 0.5333333333333333

contractive accuracies:
- min: 0.5294117647058824
- max: 1.0
- mean: 0.7233986928104575
- median: 0.6764705882352942



###  Compare Accuracies

Print min, max, mean, median for each clasifier/autoencoder combination:

In [56]:
for classifier in accuracies:
    print("-----------", classifier + ":", "-----------")
    accs = accuracies[classifier]
    print_accs_stats(accs)
    print("\n")

----------- simple_dense: -----------
phase accuracies:
- min: 0.5
- max: 0.7916667
- mean: 0.63441175
- median: 0.64705884

breathing accuracies:
- min: 0.41666666
- max: 0.8611111
- mean: 0.6159477
- median: 0.56666666

heartbeat accuracies:
- min: 0.23529412
- max: 0.6666667
- mean: 0.40320262
- median: 0.3611111

combined br hb accuracies:
- min: 0.45833334
- max: 0.8888889
- mean: 0.62983656
- median: 0.61764705

undercomplete accuracies:
- min: 0.5882353
- max: 0.7941176
- mean: 0.6959151
- median: 0.6666667

sparse accuracies:
- min: 0.5833333
- max: 0.8333333
- mean: 0.6931372
- median: 0.6666667

deep accuracies:
- min: 0.5294118
- max: 0.6666667
- mean: 0.59751636
- median: 0.6111111

contractive accuracies:
- min: 0.47058824
- max: 0.8333333
- mean: 0.5669935
- median: 0.5



----------- LSTM: -----------
phase accuracies:
- min: 0.7
- max: 0.8888889
- mean: 0.7927778
- median: 0.7647059

breathing accuracies:
- min: 0.5
- max: 0.7
- mean: 0.58362746
- median: 0.5416667

hea

Print all accuracies in table form:

In [57]:
for classifier in accuracies:
    print(classifier + ":")
#     print(pandas.DataFrame.from_dict(accuracies[classifier]))
    # Using .to_string() gives nicer loooking results (doesn't split into new line)
    print(pandas.DataFrame.from_dict(accuracies[classifier]).to_string())
    print("\n")

simple_dense:
      phase  breathing  heartbeat  combined br hb  undercomplete    sparse      deep  contractive test id
0  0.791667   0.416667   0.666667        0.458333       0.791667  0.833333  0.666667     0.833333   62i9y
1  0.647059   0.676471   0.235294        0.617647       0.794118  0.764706  0.647059     0.558824   2gu87
2  0.666667   0.861111   0.361111        0.888889       0.638889  0.583333  0.611111     0.472222   iz2ps
3  0.500000   0.558824   0.352941        0.617647       0.588235  0.617647  0.529412     0.470588   1mpau
4  0.566667   0.566667   0.400000        0.566667       0.666667  0.666667  0.533333     0.500000   7dwjy


LSTM:
      phase  breathing  heartbeat  combined br hb  undercomplete    sparse      deep  contractive test id
0  0.875000   0.541667   0.458333        0.458333       0.833333  0.750000  0.708333     0.791667   62i9y
1  0.764706   0.529412   0.352941        0.588235       0.500000  0.735294  0.529412     0.735294   2gu87
2  0.888889   0.500000  