# Classifiers - low vs high
Exploring different classifiers with different autoencoders.

#### Table of contents:  

autoencoders:  
[Undercomplete Autoencoder](#Undercomplete-Autoencoder)  
[Sparse Autoencoder](#Sparse-Autoencoder)  
[Deep Autoencoder](#Deep-Autoencoder)  
[Contractive Autoencoder](#Contractive-Autoencoder)  

classifiers:  
[Simple dense layer](#Simple-dense-layer)  
[LSTM-based classifier](#LSTM-based-classifier)  
[kNN](#kNN)  
[SVC](#SVC)  
[Random Forest](#Random-Forest)  
[XGBoost](#XGBoost)  

In [1]:
import datareader # made by the previous author for reading the collected data
import dataextractor # same as above
import pandas
import numpy as np
import tensorflow as tf
# need to disable eager execution for .get_weights() in contractive autoencoder loss to work
tf.compat.v1.disable_eager_execution()
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Activation, Input
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Conv1D, MaxPooling1D
# required for the contractive autoencoder
import tensorflow.keras.backend as K
import json
from datetime import datetime

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import RandomizedSearchCV

import warnings

import talos
from talos.utils import lr_normalizer

from tensorflow import keras
from tensorflow.keras import layers, regularizers
import matplotlib.pyplot as plt

tf.keras.backend.set_floatx('float32') # call this, to set keras to use float32 to avoid a warning message
metrics = ['accuracy']#,
#            keras.metrics.TruePositives(),
#            keras.metrics.FalsePositives(),
#            keras.metrics.TrueNegatives(),
#            keras.metrics.FalseNegatives()]

In [2]:
# from https://github.com/ageron/handson-ml/blob/master/extra_tensorflow_reproducibility.ipynb
config = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1,
                        inter_op_parallelism_threads=1)

with tf.compat.v1.Session(config=config) as sess:
    #... this will run single threaded
    pass

In [3]:
import random

random.seed(1)
np.random.seed(4)
tf.random.set_seed(2)

In [4]:
# Start the notebook in the terminal with "PYTHONHASHSEED=0 jupyter notebook" 
# or in anaconda "set PYTHONHASHSEED=0" then start jupyter notebook
import os
if os.environ.get("PYTHONHASHSEED") != "0":
    raise Exception("You must set PYTHONHASHSEED=0 when starting the Jupyter server to get reproducible results.")

This is modfied original author's code for reading data:

In [5]:
def model_train(model, x_train, y_train, batch_size, epochs, x_valid, y_valid, x_test, y_test):
    """Train model with the given training, validation, and test set, with appropriate batch size and # epochs."""
    epoch_data = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_valid, y_valid), verbose=0)
    score = model.evaluate(x_test, y_test, batch_size=batch_size)
    acc = score[1]
    score = score[0]
    return score, acc, epoch_data


In [6]:
def get_task_complexities_timeframes_br_hb(path, ident, seconds):
    """Returns raw data along with task complexity class.
    TODO: join functions. Add parameter to choose different task types and complexities"""

    dataread = datareader.DataReader(path, ident)  # initialize path to data
    data = dataread.read_grc_data()  # read from files
    samp_rate = int(round(len(data[1]) / max(data[0])))
    cog_res = dataread.read_cognitive_load_study(str(ident) + '-primary-extract.txt')

    tasks_data = np.empty((0, seconds*samp_rate))
    tasks_y = np.empty((0, 1))
    breathing = np.empty((0,12))
    heartbeat = np.empty((0,10))

    busy_n = dataread.get_data_task_timestamps(return_indexes=True)
    
    for i in cog_res['task_number']:
        task_num_table = i - 225  # 0 - 17
        tmp_tasks_data = np.empty((0, seconds*samp_rate))
        tmp_tasks_y = np.empty((0, 1))
        tmp_breathing = np.empty((0,12))
        tmp_heartbeat = np.empty((0,10))
        
        ### task complexity classification
        if cog_res['task_complexity'][task_num_table] == 'medium':
            continue
        # if cog_res['task_label'][task_num_table] == 'FA' or cog_res['task_label'][task_num_table] == 'HP':
        #     continue
#         if cog_res['task_label'][task_num_table] != 'NC':
#             continue
            
        map_compl = {
            'low': 0,
            'medium': 2,
            'high': 1
        }
        for j in range(10):
            new_end = int(busy_n[task_num_table][1] - j * samp_rate)
            new_start = int(new_end - samp_rate*30)
            dataextract = dataextractor.DataExtractor(data[0][new_start:new_end],
                                                      data[1][new_start:new_end], samp_rate)
            # get extracted features for breathing
            tmpBR = dataextract.extract_from_breathing_time(data[0][new_start:new_end],
                                                                 data[1][new_start:new_end])
            #get extracted features for heartbeat
            tmpHB = dataextract.extract_from_heartbeat_time(data[0][new_start:new_end],
                                                                 data[1][new_start:new_end])
            
            try:
                
                tmp_tasks_data = np.vstack((tmp_tasks_data, dataextract.y[-samp_rate * seconds:]))
                tmp_tasks_y = np.vstack((tmp_tasks_y, map_compl.get(cog_res['task_complexity'][task_num_table])))

                tmp_breathing = np.vstack((tmp_breathing, tmpBR.to_numpy(dtype='float64', na_value=0)[0][:-1]))
                tmp_heartbeat = np.vstack((tmp_heartbeat, tmpHB.to_numpy(dtype='float64', na_value=0)[0][:-1]))
                
            except ValueError:
#                 print(ident)
                continue

            tasks_data = np.vstack((tasks_data, dataextract.y))
            tasks_y = np.vstack((tasks_y, map_compl.get(cog_res['task_complexity'][task_num_table])))
            breathing = np.vstack((breathing, tmpBR.to_numpy(dtype='float64', na_value=0)[0][:-1]))
            heartbeat = np.vstack((heartbeat, tmpHB.to_numpy(dtype='float64', na_value=0)[0][:-1]))
            
    return tasks_data, tasks_y, breathing, heartbeat

In [7]:
def get_data_from_idents_br_hb(path, idents, seconds):
    """Go through all user data and take out windows of only <seconds> long time frames,
    along with the given class (from 'divide_each_task' function).
    """
    samp_rate = 43  # hard-coded sample rate
    data, ys = np.empty((0, samp_rate*seconds)), np.empty((0, 1))
    brs = np.empty((0,12))
    hbs = np.empty((0,10))
    combined = np.empty((0,22))
    
    # was gettign some weird warnings; stack overflow said to ignore them
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=RuntimeWarning)
        for i in idents:
            #x, y, br, hb = get_busy_vs_relax_timeframes_br_hb(path, i, seconds) # either 'get_busy_vs_relax_timeframes',
            # get_engagement_increase_vs_decrease_timeframes, get_task_complexities_timeframes or get_TLX_timeframes
            x, y, br, hb = get_task_complexities_timeframes_br_hb(path, i, seconds)
            
            data = np.vstack((data, x))
            ys = np.vstack((ys, y))
            brs = np.vstack((brs, br))
            hbs = np.vstack((hbs, hb))
        combined = np.hstack((brs,hbs))
    
    return data, ys, brs, hbs, combined

In [8]:
def scale_data(x, standardScaler=True, minMaxScaler=True):
    
    if standardScaler:
        # Scale with standard scaler
        sscaler = StandardScaler()
        sscaler.fit(x)
        x = sscaler.transform(x)

    if minMaxScaler:
        # Scale with MinMax to range [0,1]
        mmscaler = MinMaxScaler((0,1))
        mmscaler.fit(x)
        x = mmscaler.transform(x)
    
    return x

In [9]:
# Accs is a dictionary which holds 1d arrays of accuracies in each key
# except the key 'test id' which holds strings of the id which yielded the coresponding accuracies
def print_accs_stats(accs):
    
    printDict = {}
    # loop over each key
    for key in accs:
    
        if (key == 'test id'):
            # skip calculating ids
            continue
        printDict[key] = {}
        tmpDict = printDict[key]
        # calculate and print some statistics
        tmpDict['min'] = np.min(accs[key])
        tmpDict['max'] = np.max(accs[key])
        tmpDict['mean'] = np.mean(accs[key])
        tmpDict['median'] = np.median(accs[key])
    
    print(pandas.DataFrame.from_dict(printDict).to_string())

In [10]:
def set_random_seeds():
    # clear session and set seeds again
    # cannot clear session due to tf.compat.v1 graphs, but add tf.compat.v1.set_random_seed
#     K.clear_session()
    tf.compat.v1.set_random_seed(2)
    random.seed(1)
    np.random.seed(4)
    tf.random.set_seed(2)

## Autoencoders

#### Undercomplete Autoencoder  
from https://blog.keras.io/building-autoencoders-in-keras.html

In [11]:
def undercomplete_ae(x, encoding_dim=64, encoded_as_model=False):
    # Simplest possible autoencoder from https://blog.keras.io/building-autoencoders-in-keras.html

    # this is our input placeholder
    input_data = Input(shape=x[0].shape, name="input")
    dropout = Dropout(0.25, name="dropout")(input_data)
    # "encoded" is the encoded representation of the input
    encoded = Dense(encoding_dim, activation='relu', name="encoded")(dropout)
    
    # "decoded" is the lossy reconstruction of the input
    decoded = Dense(x[0].shape[0], activation='sigmoid', name="decoded")(encoded)

    autoencoder = Model(input_data, decoded)
    
    # compile the model
    autoencoder.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics)
    
    # if return encoder in the encoded variable
    if encoded_as_model:
        encoded = Model(input_data, encoded)
    
    return autoencoder, encoded

#### Sparse Autoencoder  
from https://blog.keras.io/building-autoencoders-in-keras.html

In [12]:
def sparse_ae(x, encoding_dim=64, encoded_as_model=False):
    # Simplest possible autoencoder from https://blog.keras.io/building-autoencoders-in-keras.html

    # this is our input placeholder
    input_data = Input(shape=x[0].shape, name="input")
    dropout = Dropout(0.25, name="dropout") (input_data)
    # "encoded" is the encoded representation of the input
    # add a sparsity constraint
    encoded = Dense(encoding_dim, activation='relu', name="encoded",
                    activity_regularizer=regularizers.l1(10e-5))(dropout)
    
    # "decoded" is the lossy reconstruction of the input
    decoded = Dense(x[0].shape[0], activation='sigmoid', name="decoded")(encoded)

    # this model maps an input to its reconstruction
    autoencoder = Model(input_data, decoded, name="sparse_ae")
    
    # compile the model
    autoencoder.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics)
    
    # if return encoder in the encoded variable
    if encoded_as_model:
        encoded = Model(input_data, encoded)
    
    return autoencoder, encoded

#### Deep Autoencoder  
from https://blog.keras.io/building-autoencoders-in-keras.html

In [13]:
def deep_ae(x, enc_layers=[512,256], encoding_dim=64, dec_layers=[256,512], encoded_as_model=False):
    # From https://www.tensorflow.org/guide/keras/functional#use_the_same_graph_of_layers_to_define_multiple_models
    input_data = keras.Input(shape=x[0].shape, name="normalized_signal")
    model = Dropout(0.25, name="dropout", autocast=False)(input_data)
    for i in enumerate(enc_layers):
        model = Dense(i[1], activation="relu", name="dense_enc_" + str(i[0]+1))(model)
    encoded_output = Dense(encoding_dim, activation="relu", name="encoded_signal")(model)

    encoded = encoded_output

    model = layers.Dense(dec_layers[0], activation="sigmoid", name="dense_dec_1")(encoded_output)
    for i in enumerate(dec_layers[1:]):
        model = Dense(i[1], activation="sigmoid", name="dense_dec_" + str(i[0]+2))(model)
    decoded_output = Dense(x[0].shape[0], activation="sigmoid", name="reconstructed_signal")(model)
    
    autoencoder = Model(input_data, decoded_output, name="autoencoder")
    
    # compile the model
    autoencoder.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics)
    
    # if return encoder in the encoded variable
    if encoded_as_model:
        encoded = Model(input_data, encoded)

    return autoencoder, encoded

#### Contractive Autoencoder
From: https://wiseodd.github.io/techblog/2016/12/05/contractive-autoencoder/

In [14]:
# define a function to be able to access the autoencoder in the loss funciton
def loss_with_params(autoencoder):
    # loss function from https://wiseodd.github.io/techblog/2016/12/05/contractive-autoencoder/
    def contractive_loss(y_pred, y_true):

        lam = 1e-4
        mse = K.mean(K.square(y_true - y_pred), axis=1)

        W = K.variable(value=autoencoder.get_layer('encoded').get_weights()[0])  # N x N_hidden
        W = K.transpose(W)  # N_hidden x N
        h = autoencoder.get_layer('encoded').output
        dh = h * (1 - h)  # N_batch x N_hidden

        # N_batch x N_hidden * N_hidden x 1 = N_batch x 1
        contractive = lam * K.sum(dh**2 * K.sum(W**2, axis=1), axis=1)

        return mse + contractive
    return contractive_loss

In [15]:
def contractive_ae(x, encoding_dim=64, encoded_as_model=False):
    # From https://wiseodd.github.io/techblog/2016/12/05/contractive-autoencoder/

    input_data = Input(shape=x[0].shape, name="input")
    encoded = Dense(encoding_dim, activation='sigmoid', name='encoded')(input_data)
    outputs = Dense(x[0].shape[0], activation='linear', name="output")(encoded)

    autoencoder = Model(input_data, outputs, name="autoencoder")
    
    # compile the model
    autoencoder.compile(optimizer='adam', loss=loss_with_params(autoencoder), metrics=metrics)
    
    # if return encoder in the encoded variable
    if encoded_as_model:
        encoded = Model(input_data, encoded)
    
    return autoencoder, encoded

## Classifiers

Initialize variables:

In [16]:
# initialize a dictionary to store accuracies for comparison
accuracies = {}

# used for reading the data into an array
seconds = 30  # time window length
idents = ['2gu87', 'iz2ps', '1mpau', '7dwjy', '7swyk', '94mnx', 'bd47a', 'c24ur', 'ctsax', 'dkhty', 'e4gay',
              'ef5rq', 'f1gjp', 'hpbxa', 'pmyfl', 'r89k1', 'tn4vl', 'td5pr', 'gyqu9', 'fzchw', 'l53hg', '3n2f9',
              '62i9y']
path = '../../../StudyData/'

# change to len(idents) at the end to use all the data
n = 7 #len(idents)

### Helper loop function definition

In [17]:
# a helper loop funciton for the sklearn and XGBoost classifiers
def helper_loop(classifier_function, idents, n=5):
    #returns a dictionary with accuracies

    # set the variables in the dictionary
    accs = {}
    accs['phase'] = []
    accs['breathing'] = []
    accs['heartbeat'] = []
    accs['combined br hb'] = []
    accs['undercomplete'] = []
    accs['sparse'] = []
    accs['deep'] = []
    accs['contractive'] = []
    accs['test id'] = []
    start_time = datetime.now()

    with tf.compat.v1.Session(config=config) as sess:
        # leave out person out validation
        for i in range(n):

            # print current iteration and time elapsed from start
            print("iteration:", i+1, "of", n, "; time elapsed:", datetime.now()-start_time)

            ## ----- Data preparation:
            validation_idents = [idents[i]]
            test_idents = [idents[i-1]]

            train_idents = []
            for ident in idents:
                if (ident not in test_idents) and (ident not in validation_idents):
                    train_idents.append(ident)

            # save test id to see which id yielded which accuracies
            accs['test id'].append(test_idents[0])

            # Load data (xt-raw phase data, y-class, br-breathing data, hb-heartbeat data, cmb-combined [br,hb])
            xt_train, y_train, br_train, hb_train, cmb_train = get_data_from_idents_br_hb(path, train_idents, seconds)
            xt_valid, y_valid, br_valid, hb_valid, cmb_valid = get_data_from_idents_br_hb(path, validation_idents, seconds)
            xt_test, y_test, br_test, hb_test, cmb_test = get_data_from_idents_br_hb(path, test_idents, seconds)

            # change the y arrays to flat 1d arrays
            y_train = y_train.ravel()
            y_valid = y_valid.ravel()
            y_test = y_test.ravel()
            
            # Scale data with standard scaler then MinMax scaler
            # Raw Phase data:
            xt_train = scale_data(xt_train, standardScaler=True, minMaxScaler=True)
            xt_valid = scale_data(xt_valid, standardScaler=True, minMaxScaler=True)
            xt_test = scale_data(xt_test, standardScaler=True, minMaxScaler=True)
            # Hand extracted breathing data:
            br_train = scale_data(br_train, standardScaler=True, minMaxScaler=True)
            br_valid = scale_data(br_valid, standardScaler=True, minMaxScaler=True)
            br_test = scale_data(br_test, standardScaler=True, minMaxScaler=True)
            # Hand extracted Heartbeat data:
            hb_train = scale_data(hb_train, standardScaler=True, minMaxScaler=True)
            hb_valid = scale_data(hb_valid, standardScaler=True, minMaxScaler=True)
            hb_test = scale_data(hb_test, standardScaler=True, minMaxScaler=True)
            # Combined breathing and heartbeat data (joined together into one matrix)
            cmb_train = scale_data(cmb_train, standardScaler=True, minMaxScaler=True)
            cmb_valid = scale_data(cmb_valid, standardScaler=True, minMaxScaler=True)
            cmb_test = scale_data(cmb_test, standardScaler=True, minMaxScaler=True)

            

            ## ----- Classify without autoencoders:
            # Phase classifier:
            set_random_seeds()
            model = classifier_function()
            model.fit(xt_train, y_train)
            curr_acc = np.sum(model.predict(xt_test) == y_test) / len(y_test)
            accs['phase'].append(curr_acc)
            
            # Breathing classifier:
            set_random_seeds()
            base_model = classifier_function()
            base_model.fit(br_train, y_train)
            curr_acc = np.sum(base_model.predict(br_test) == y_test) / len(y_test)
            accs['breathing'].append(curr_acc)

            # Heartbeat classifier:
            set_random_seeds()
            base_model = classifier_function()
            base_model.fit(hb_train, y_train)
            curr_acc = np.sum(base_model.predict(hb_test) == y_test) / len(y_test)
            accs['heartbeat'].append(curr_acc)

            # Combined classifier:
            set_random_seeds()
            base_model = classifier_function()
            base_model.fit(cmb_train, y_train)
            curr_acc = np.sum(base_model.predict(cmb_test) == y_test) / len(y_test)
            accs['combined br hb'].append(curr_acc)



            ## ----- Classify with autoencoders:
            # AE Training params
            batch_size = 256
            epochs = 100
            encoding_dim = 64

            # undercomplete AE
            set_random_seeds()
            autoencoder, encoded = undercomplete_ae(xt_train, encoding_dim, encoded_as_model=True)
            sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                                   xt_valid, xt_valid, xt_test, xt_test)
            model = classifier_function()
            xtt_train = encoded.predict(xt_train)
            xtt_test = encoded.predict(xt_test)
            model.fit(xtt_train, y_train)
            curr_acc = np.sum(model.predict(xtt_test) == y_test) / len(y_test)
            accs['undercomplete'].append(curr_acc)

            # sparse AE
            set_random_seeds()
            autoencoder, encoded = sparse_ae(xt_train, encoding_dim, encoded_as_model=True)
            sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                                   xt_valid, xt_valid, xt_test, xt_test)
            model = classifier_function()
            xtt_train = encoded.predict(xt_train)
            xtt_test = encoded.predict(xt_test)
            model.fit(xtt_train, y_train)
            curr_acc = np.sum(model.predict(xtt_test) == y_test) / len(y_test)
            accs['sparse'].append(curr_acc)

            # deep AE
            set_random_seeds()
            autoencoder, encoded = deep_ae(xt_train, enc_layers=[512,256], encoding_dim=encoding_dim, dec_layers=[256,512], encoded_as_model=True)
            sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                                   xt_valid, xt_valid, xt_test, xt_test)
            model = classifier_function()
            xtt_train = encoded.predict(xt_train)
            xtt_test = encoded.predict(xt_test)
            model.fit(xtt_train, y_train)
            curr_acc = np.sum(model.predict(xtt_test) == y_test) / len(y_test)
            accs['deep'].append(curr_acc)

            # contractive AE
            set_random_seeds()
            autoencoder, encoded = contractive_ae(xt_train, encoding_dim, encoded_as_model=True)
            sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                                   xt_valid, xt_valid, xt_test, xt_test)
            model = classifier_function()
            xtt_train = encoded.predict(xt_train)
            xtt_test = encoded.predict(xt_test)
            model.fit(xtt_train, y_train)
            curr_acc = np.sum(model.predict(xtt_test) == y_test) / len(y_test)
            accs['contractive'].append(curr_acc)

    # Print total time required to run this
    end_time = datetime.now()
    elapsed_time = end_time - start_time
    print("Completed!", "Time elapsed:", elapsed_time)
    
    return accs

#### kNN

In [18]:
from sklearn.neighbors import KNeighborsClassifier

def KNN_classifier():
    model = KNeighborsClassifier(p=3, n_neighbors=7, metric='cosine')
    return model

Combine the autoencoders with the classifier: 

In [19]:
accs = helper_loop(KNN_classifier, idents, n)

iteration: 1 of 7 ; time elapsed: 0:00:00
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
iteration: 2 of 7 ; time elapsed: 0:02:13.982688
iteration: 3 of 7 ; time elapsed: 0:04:21.032716
iteration: 4 of 7 ; time elapsed: 0:06:37.722013
iteration: 5 of 7 ; time elapsed: 0:09:00.387753
iteration: 6 of 7 ; time elapsed: 0:11:33.011195
iteration: 7 of 7 ; time elapsed: 0:14:21.043605
Completed! Time elapsed: 0:17:12.994278


In [20]:
accuracies['kNN'] = accs

In [21]:
# print accuracies of each method and corresponding id which yielded that accuracy (same row)
pandas.DataFrame.from_dict(accs)

Unnamed: 0,phase,breathing,heartbeat,combined br hb,undercomplete,sparse,deep,contractive,test id
0,0.458333,0.408333,0.575,0.525,0.55,0.391667,0.475,0.525,62i9y
1,0.658333,0.4,0.583333,0.458333,0.541667,0.525,0.575,0.558333,2gu87
2,0.575,0.441667,0.583333,0.475,0.558333,0.466667,0.516667,0.583333,iz2ps
3,0.541667,0.608333,0.491667,0.541667,0.541667,0.508333,0.466667,0.483333,1mpau
4,0.558333,0.55,0.541667,0.683333,0.525,0.491667,0.416667,0.433333,7dwjy
5,0.391667,0.541667,0.575,0.425,0.508333,0.475,0.641667,0.475,7swyk
6,0.7,0.575,0.458333,0.416667,0.683333,0.608333,0.608333,0.691667,94mnx


In [22]:
# print some statistics for each method
print_accs_stats(accs)

           phase  breathing  heartbeat  combined br hb  undercomplete    sparse      deep  contractive
min     0.391667   0.400000   0.458333        0.416667       0.508333  0.391667  0.416667     0.433333
max     0.700000   0.608333   0.583333        0.683333       0.683333  0.608333  0.641667     0.691667
mean    0.554762   0.503571   0.544048        0.503571       0.558333  0.495238  0.528571     0.535714
median  0.558333   0.541667   0.575000        0.475000       0.541667  0.491667  0.516667     0.525000


####  SVC

In [23]:
from sklearn.svm import SVC

def SVC_classifier():
    model = SVC(kernel='rbf', C=1.5)
    return model

Combine the autoencoders with the classifier: 

In [24]:
accs = helper_loop(SVC_classifier, idents, n)

iteration: 1 of 7 ; time elapsed: 0:00:00.005983
iteration: 2 of 7 ; time elapsed: 0:03:01.461545
iteration: 3 of 7 ; time elapsed: 0:06:04.339267
iteration: 4 of 7 ; time elapsed: 0:09:11.219051
iteration: 5 of 7 ; time elapsed: 0:12:22.639876
iteration: 6 of 7 ; time elapsed: 0:15:35.612219
iteration: 7 of 7 ; time elapsed: 0:18:47.217661
Completed! Time elapsed: 0:22:08.072677


In [25]:
accuracies['SVC'] = accs

In [26]:
# print accuracies of each method and corresponding id which yielded that accuracy (same row)
pandas.DataFrame.from_dict(accs)

Unnamed: 0,phase,breathing,heartbeat,combined br hb,undercomplete,sparse,deep,contractive,test id
0,0.3,0.491667,0.5,0.441667,0.416667,0.491667,0.375,0.466667,62i9y
1,0.433333,0.45,0.516667,0.516667,0.508333,0.491667,0.508333,0.541667,2gu87
2,0.533333,0.308333,0.5,0.408333,0.5,0.483333,0.55,0.558333,iz2ps
3,0.616667,0.516667,0.5,0.458333,0.483333,0.516667,0.516667,0.533333,1mpau
4,0.475,0.441667,0.5,0.533333,0.425,0.491667,0.466667,0.433333,7dwjy
5,0.558333,0.608333,0.5,0.5,0.583333,0.55,0.616667,0.533333,7swyk
6,0.575,0.65,0.441667,0.633333,0.625,0.625,0.616667,0.691667,94mnx


In [27]:
# print some statistics for each method
print_accs_stats(accs)

           phase  breathing  heartbeat  combined br hb  undercomplete    sparse      deep  contractive
min     0.300000   0.308333   0.441667        0.408333       0.416667  0.483333  0.375000     0.433333
max     0.616667   0.650000   0.516667        0.633333       0.625000  0.625000  0.616667     0.691667
mean    0.498810   0.495238   0.494048        0.498810       0.505952  0.521429  0.521429     0.536905
median  0.533333   0.491667   0.500000        0.500000       0.500000  0.491667  0.516667     0.533333


#### Random Forest

In [28]:
from sklearn.ensemble import RandomForestClassifier
def random_forest_classifier():
    model = RandomForestClassifier(n_estimators = 250,
                                     min_samples_split = 10,
                                     min_samples_leaf = 4,
                                     max_features = 'auto',
                                     max_depth = 90,
                                     bootstrap = True)
    return model

Combine the autoencoders with the classifier: 

In [29]:
accs = helper_loop(random_forest_classifier, idents, n)

iteration: 1 of 7 ; time elapsed: 0:00:00.005985
iteration: 2 of 7 ; time elapsed: 0:03:26.966782
iteration: 3 of 7 ; time elapsed: 0:06:55.113636
iteration: 4 of 7 ; time elapsed: 0:10:20.153873
iteration: 5 of 7 ; time elapsed: 0:13:48.013152
iteration: 6 of 7 ; time elapsed: 0:17:18.545559
iteration: 7 of 7 ; time elapsed: 0:20:56.114613
Completed! Time elapsed: 0:24:36.162203


In [30]:
accuracies['random_forest'] = accs

In [31]:
# print accuracies of each method and corresponding id which yielded that accuracy (same row)
pandas.DataFrame.from_dict(accs)

Unnamed: 0,phase,breathing,heartbeat,combined br hb,undercomplete,sparse,deep,contractive,test id
0,0.391667,0.45,0.491667,0.441667,0.3,0.391667,0.333333,0.325,62i9y
1,0.4,0.483333,0.433333,0.591667,0.416667,0.45,0.508333,0.383333,2gu87
2,0.35,0.525,0.425,0.6,0.441667,0.466667,0.533333,0.55,iz2ps
3,0.5,0.325,0.375,0.208333,0.458333,0.533333,0.466667,0.466667,1mpau
4,0.35,0.7,0.466667,0.575,0.408333,0.541667,0.358333,0.45,7dwjy
5,0.625,0.45,0.516667,0.575,0.633333,0.458333,0.516667,0.45,7swyk
6,0.516667,0.575,0.558333,0.466667,0.558333,0.525,0.558333,0.566667,94mnx


In [32]:
# print some statistics for each method
print_accs_stats(accs)

           phase  breathing  heartbeat  combined br hb  undercomplete    sparse      deep  contractive
min     0.350000   0.325000   0.375000        0.208333       0.300000  0.391667  0.333333     0.325000
max     0.625000   0.700000   0.558333        0.600000       0.633333  0.541667  0.558333     0.566667
mean    0.447619   0.501190   0.466667        0.494048       0.459524  0.480952  0.467857     0.455952
median  0.400000   0.483333   0.466667        0.575000       0.441667  0.466667  0.508333     0.450000


#### Naive Bayesian

In [33]:
from sklearn.naive_bayes import ComplementNB

def naive_bayesian_classifier():
    model = ComplementNB()
    return model

Combine the autoencoders with the classifier: 

In [34]:
accs = helper_loop(naive_bayesian_classifier, idents, n)

iteration: 1 of 7 ; time elapsed: 0:00:00.004987
iteration: 2 of 7 ; time elapsed: 0:03:21.865217
iteration: 3 of 7 ; time elapsed: 0:06:46.331717
iteration: 4 of 7 ; time elapsed: 0:10:09.513734
iteration: 5 of 7 ; time elapsed: 0:13:42.550339
iteration: 6 of 7 ; time elapsed: 0:17:17.386563
iteration: 7 of 7 ; time elapsed: 0:21:05.037112
Completed! Time elapsed: 0:24:49.152310


In [35]:
accuracies['naive_bayesian'] = accs

In [36]:
# print accuracies of each method and corresponding id which yielded that accuracy (same row)
pandas.DataFrame.from_dict(accs)

Unnamed: 0,phase,breathing,heartbeat,combined br hb,undercomplete,sparse,deep,contractive,test id
0,0.475,0.558333,0.491667,0.5,0.416667,0.391667,0.466667,0.375,62i9y
1,0.6,0.633333,0.6,0.616667,0.566667,0.633333,0.608333,0.533333,2gu87
2,0.516667,0.358333,0.425,0.375,0.6,0.541667,0.516667,0.5,iz2ps
3,0.641667,0.15,0.541667,0.225,0.45,0.433333,0.55,0.491667,1mpau
4,0.566667,0.466667,0.358333,0.466667,0.558333,0.416667,0.516667,0.541667,7dwjy
5,0.525,0.383333,0.5,0.4,0.566667,0.541667,0.416667,0.416667,7swyk
6,0.725,0.475,0.541667,0.483333,0.7,0.725,0.75,0.733333,94mnx


In [37]:
# print some statistics for each method
print_accs_stats(accs)

           phase  breathing  heartbeat  combined br hb  undercomplete    sparse      deep  contractive
min     0.475000   0.150000   0.358333        0.225000       0.416667  0.391667  0.416667     0.375000
max     0.725000   0.633333   0.600000        0.616667       0.700000  0.725000  0.750000     0.733333
mean    0.578571   0.432143   0.494048        0.438095       0.551190  0.526190  0.546429     0.513095
median  0.566667   0.466667   0.500000        0.466667       0.566667  0.541667  0.516667     0.500000


#### XGBoost

In [38]:
from xgboost import XGBClassifier

def XGBoost_classifier():
    model = XGBClassifier(n_estimators = 83)
    return model

Combine the autoencoders with the classifier: 

In [39]:
accs = helper_loop(XGBoost_classifier, idents, n)

iteration: 1 of 7 ; time elapsed: 0:00:00.003991
iteration: 2 of 7 ; time elapsed: 0:03:43.053825
iteration: 3 of 7 ; time elapsed: 0:07:31.586255
iteration: 4 of 7 ; time elapsed: 0:11:29.039808
iteration: 5 of 7 ; time elapsed: 0:15:32.849651
iteration: 6 of 7 ; time elapsed: 0:19:43.020015
iteration: 7 of 7 ; time elapsed: 0:23:58.714797
Completed! Time elapsed: 0:28:18.182285


In [40]:
accuracies['XGBoost'] = accs

In [41]:
# print accuracies of each method and corresponding id which yielded that accuracy (same row)
pandas.DataFrame.from_dict(accs)

Unnamed: 0,phase,breathing,heartbeat,combined br hb,undercomplete,sparse,deep,contractive,test id
0,0.383333,0.558333,0.458333,0.5,0.383333,0.325,0.575,0.375,62i9y
1,0.45,0.558333,0.541667,0.583333,0.408333,0.416667,0.491667,0.483333,2gu87
2,0.416667,0.408333,0.55,0.491667,0.541667,0.5,0.475,0.425,iz2ps
3,0.516667,0.4,0.441667,0.125,0.475,0.5,0.483333,0.55,1mpau
4,0.416667,0.65,0.5,0.408333,0.408333,0.425,0.416667,0.408333,7dwjy
5,0.533333,0.516667,0.55,0.525,0.55,0.591667,0.616667,0.425,7swyk
6,0.533333,0.633333,0.391667,0.616667,0.583333,0.566667,0.508333,0.625,94mnx


In [42]:
# print some statistics for each method
print_accs_stats(accs)

           phase  breathing  heartbeat  combined br hb  undercomplete    sparse      deep  contractive
min     0.383333   0.400000   0.391667        0.125000       0.383333  0.325000  0.416667     0.375000
max     0.533333   0.650000   0.550000        0.616667       0.583333  0.591667  0.616667     0.625000
mean    0.464286   0.532143   0.490476        0.464286       0.478571  0.475000  0.509524     0.470238
median  0.450000   0.558333   0.500000        0.500000       0.475000  0.500000  0.491667     0.425000


#### Simple dense layer

Define the classifier:

In [43]:
def dense_classifier(model, params):
    
    model = Dropout(params['dropout'], name='dropout_cl')(model)
    model = Dense(params['hidden_size'], activation=params['activation'], name='dense_cl1')(model)
    model = Dense(1, activation=params['last_activation'], name='dense_cl2')(model)

    return model

In [44]:
def dense_classifier_base():
    model = Sequential()
    model.add(Dropout(0))
    model.add(Dense(16))
    model.add(Activation('sigmoid'))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=metrics)
    return model

In [45]:
params = {
    'dropout': 0.24,
    'optimizer': 'Adam',
    'hidden_size': 32,
    'loss': 'binary_crossentropy',
    'last_activation': 'sigmoid',
    'activation': 'softmax',
    'batch_size': 256,
    'epochs': 100
}

Combine the autoencoders with the classifier: 

In [46]:
# set the variables in the dictionary
accuracies['simple_dense'] = {}
accs = accuracies['simple_dense']
accs['phase'] = []
accs['breathing'] = []
accs['heartbeat'] = []
accs['combined br hb'] = []
accs['undercomplete'] = []
accs['sparse'] = []
accs['deep'] = []
accs['contractive'] = []
accs['test id'] = []
start_time = datetime.now()

with tf.compat.v1.Session(config=config) as sess:
    # leave out person out validation
    for i in range(n):

        # print current iteration and time elapsed from start
        print("iteration:", i+1, "of", n, "; time elapsed:", datetime.now()-start_time)

        ## ----- Data preparation:
        validation_idents = [idents[i]]
        test_idents = [idents[i-1]]

        train_idents = []
        for ident in idents:
            if (ident not in test_idents) and (ident not in validation_idents):
                train_idents.append(ident)
        
        # save test id to see which id yielded which accuracies
        accs['test id'].append(test_idents[0])

        # Load data (xt-raw phase data, y-class, br-breathing data, hb-heartbeat data, cmb-combined [br,hb])
        xt_train, y_train, br_train, hb_train, cmb_train = get_data_from_idents_br_hb(path, train_idents, seconds)
        xt_valid, y_valid, br_valid, hb_valid, cmb_valid = get_data_from_idents_br_hb(path, validation_idents, seconds)
        xt_test, y_test, br_test, hb_test, cmb_test = get_data_from_idents_br_hb(path, test_idents, seconds)

        # Scale data with standard scaler then MinMax scaler
        # Raw Phase data:
        xt_train = scale_data(xt_train, standardScaler=True, minMaxScaler=True)
        xt_valid = scale_data(xt_valid, standardScaler=True, minMaxScaler=True)
        xt_test = scale_data(xt_test, standardScaler=True, minMaxScaler=True)
        # Hand extracted breathing data:
        br_train = scale_data(br_train, standardScaler=True, minMaxScaler=True)
        br_valid = scale_data(br_valid, standardScaler=True, minMaxScaler=True)
        br_test = scale_data(br_test, standardScaler=True, minMaxScaler=True)
        # Hand extracted Heartbeat data:
        hb_train = scale_data(hb_train, standardScaler=True, minMaxScaler=True)
        hb_valid = scale_data(hb_valid, standardScaler=True, minMaxScaler=True)
        hb_test = scale_data(hb_test, standardScaler=True, minMaxScaler=True)
        # Combined breathing and heartbeat data (joined together into one matrix)
        cmb_train = scale_data(cmb_train, standardScaler=True, minMaxScaler=True)
        cmb_valid = scale_data(cmb_valid, standardScaler=True, minMaxScaler=True)
        cmb_test = scale_data(cmb_test, standardScaler=True, minMaxScaler=True)
        
        
        
        ## ----- Classify without autoencoders:
        # Phase classifier:
        model = dense_classifier_base()
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['phase'].append(curr_acc)

        # Breathing classifier:
        model = dense_classifier_base()
        sc, curr_acc, epoch_data = model_train(model, br_train, y_train, params['batch_size'], params['epochs'],
                                               br_valid, y_valid, br_test, y_test)
        accs['breathing'].append(curr_acc)

        # Heartbeat classifier:
        model = dense_classifier_base()
        sc, curr_acc, epoch_data = model_train(model, hb_train, y_train, params['batch_size'], params['epochs'],
                                               hb_valid, y_valid, hb_test, y_test)
        accs['heartbeat'].append(curr_acc)

        # Combined classifier:
        model = dense_classifier_base()
        sc, curr_acc, epoch_data = model_train(model, cmb_train, y_train, params['batch_size'], params['epochs'],
                                               cmb_valid, y_valid, cmb_test, y_test)
        accs['combined br hb'].append(curr_acc)
        
        
        
        ## ----- Classify with autoencoders:
        # AE Training params
        batch_size = 256
        epochs = 100
        encoding_dim = 64

        # Undercomplete AE:
        autoencoder, encoded = undercomplete_ae(xt_train, encoding_dim)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = dense_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['undercomplete'].append(curr_acc)

        # Sparse AE:
        autoencoder, encoded = sparse_ae(xt_train, encoding_dim)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = dense_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['sparse'].append(curr_acc)

        # Deep AE:
        autoencoder, encoded = deep_ae(xt_train, enc_layers=[512,256], encoding_dim=encoding_dim, dec_layers=[256,512])
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = dense_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['deep'].append(curr_acc)

        # Contractive AE:
        autoencoder, encoded = contractive_ae(xt_train, encoding_dim)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = dense_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['contractive'].append(curr_acc)

# Print total time required to run this
end_time = datetime.now()
elapsed_time = end_time - start_time
print("Completed!", "Time elapsed:", elapsed_time)

iteration: 1 of 7 ; time elapsed: 0:00:00.006955
iteration: 2 of 7 ; time elapsed: 0:06:49.061160
iteration: 3 of 7 ; time elapsed: 0:14:07.304875
iteration: 4 of 7 ; time elapsed: 0:21:44.615280
iteration: 5 of 7 ; time elapsed: 0:29:45.350013
iteration: 6 of 7 ; time elapsed: 0:38:13.286252
iteration: 7 of 7 ; time elapsed: 0:47:02.862003
Completed! Time elapsed: 0:56:22.299823


In [47]:
# print accuracies of each method and corresponding id which yielded that accuracy (same row)
pandas.DataFrame.from_dict(accs)

Unnamed: 0,phase,breathing,heartbeat,combined br hb,undercomplete,sparse,deep,contractive,test id
0,0.425,0.625,0.5,0.516667,0.516667,0.466667,0.3,0.525,62i9y
1,0.641667,0.658333,0.541667,0.616667,0.516667,0.541667,0.616667,0.591667,2gu87
2,0.5,0.316667,0.391667,0.433333,0.483333,0.475,0.541667,0.483333,iz2ps
3,0.408333,0.225,0.491667,0.458333,0.583333,0.608333,0.558333,0.575,1mpau
4,0.558333,0.525,0.5,0.466667,0.625,0.433333,0.583333,0.55,7dwjy
5,0.425,0.241667,0.558333,0.366667,0.441667,0.358333,0.475,0.5,7swyk
6,0.633333,0.566667,0.5,0.566667,0.608333,0.65,0.658333,0.666667,94mnx


In [48]:
# print some statistics for each method
print_accs_stats(accs)

           phase  breathing  heartbeat  combined br hb  undercomplete    sparse      deep  contractive
min     0.408333   0.225000   0.391667        0.366667       0.441667  0.358333  0.300000     0.483333
max     0.641667   0.658333   0.558333        0.616667       0.625000  0.650000  0.658333     0.666667
mean    0.513095   0.451190   0.497619        0.489286       0.539286  0.504762  0.533333     0.555952
median  0.500000   0.525000   0.500000        0.466667       0.516667  0.475000  0.558333     0.550000


#### LSTM-based classifier  
based on the original author's code

Optimize hyperparameters with talos:

In [49]:
def LSTM_classifier(model, params):

    model = layers.Reshape((-1, 1), input_shape=(model.shape), name='reshape_cl') (model)

    model = layers.Dropout(params['dropout'], name='dropout_cl1') (model)
    
    model = Conv1D(params['filters'],
                     params['kernel_size'],
                     padding='valid',
                     activation=params['activation'],
                     strides=params['strides'],
                     name='conv1d_cl1') (model)
    
    model = MaxPooling1D(pool_size=params['pool_size'], name='maxpool_cl1') (model)
    
    model = Conv1D(params['filters'],
                     params['kernel_size'],
                     padding='valid',
                     activation=params['activation'],
                     strides=params['strides'],
                     name='conv1d_cl2') (model)
    
    model = MaxPooling1D(pool_size=params['pool_size'], name='maxpool_cl2') (model)
    
    model = layers.Dropout(params['dropout'], name='dropout_cl2') (model)

    model = LSTM(params['lstm_output_size'], activation='sigmoid', name='lstm_cl') (model)

    model = Dense(1, activation=params['last_activation'], name='dense_cl') (model)
    return model

In [50]:
def LSTM_classifier_base(params):
    
    model = Sequential()
    model.add(Dropout(params['dropout']))
    model.add(Conv1D(params['filters'],
                     params['kernel_size'],
                     padding='valid',
                     activation=params['activation'],
                     strides=params['strides']))

    model.add(MaxPooling1D(pool_size=params['pool_size']))
    model.add(Conv1D(params['filters'],
                     params['kernel_size'],
                     padding='valid',
                     activation=params['activation'],
                     strides=params['strides']))
    model.add(MaxPooling1D(pool_size=params['pool_size']))

    model.add(Dropout(params['dropout']))
    model.add(LSTM(params['lstm_output_size']))
    model.add(Dense(1))
    model.add(Activation(params['last_activation']))

    model.compile(loss=params['loss'],
                  optimizer=params['optimizer'],
                  metrics=['acc'])
    
    return model

In [51]:
params_phase = {
    'kernel_size': 32,
    'strides': 4,
    'pool_size': 2,
    'filters': 8,
    'lstm_output_size': 236,
    'loss': 'binary_crossentropy',
    'dropout': 0.09,
    'activation': 'relu',
    'optimizer': 'Nadam',
    'last_activation': 'sigmoid'
}

In [52]:
params_br_hb = {
    'kernel_size': 2,
    'strides': 1,
    'pool_size': 1,
    'filters': 2,
    'lstm_output_size': 4,
    'loss': 'binary_crossentropy',
    'dropout': 0.09,
    'activation': 'relu',
    'optimizer': 'Nadam',
    'last_activation': 'sigmoid'
}

In [53]:
params = {
    'kernel_size': 4,
    'filters': 2,
    'strides': 2,
    'pool_size': 2,
    'dropout': 0.09,
    'optimizer': 'Nadam',
    'loss': 'binary_crossentropy',
    'activation': 'relu',
    'last_activation': 'sigmoid',
    'lstm_output_size': 256,
    'batch_size': 64,
    'epochs': 100
}

Combine the autoencoders with the classifier: 

In [54]:
# set the variables in the dictionary
accuracies['LSTM'] = {}
accs = accuracies['LSTM']
accs['phase'] = []
accs['breathing'] = []
accs['heartbeat'] = []
accs['combined br hb'] = []
accs['undercomplete'] = []
accs['sparse'] = []
accs['deep'] = []
accs['contractive'] = []
accs['test id'] = []
start_time = datetime.now()

with tf.compat.v1.Session(config=config) as sess:
    # leave out person out validation
    for i in range(5): ##### <--------------------------------- TODO: change to range(n)

        # print current iteration and time elapsed from start
        print("iteration:", i+1, "of", n, "; time elapsed:", datetime.now()-start_time)

        ## ----- Data preparation:
        validation_idents = [idents[i]]
        test_idents = [idents[i-1]]

        train_idents = []
        for ident in idents:
            if (ident not in test_idents) and (ident not in validation_idents):
                train_idents.append(ident)

        # save test id to see which id yielded which accuracies
        accs['test id'].append(test_idents[0])
        
        # Load data (xt-raw phase data, y-class, br-breathing data, hb-heartbeat data, cmb-combined [br,hb])
        xt_train, y_train, br_train, hb_train, cmb_train = get_data_from_idents_br_hb(path, train_idents, seconds)
        xt_valid, y_valid, br_valid, hb_valid, cmb_valid = get_data_from_idents_br_hb(path, validation_idents, seconds)
        xt_test, y_test, br_test, hb_test, cmb_test = get_data_from_idents_br_hb(path, test_idents, seconds)

        # Scale data with standard scaler then MinMax scaler
        # Raw Phase data:
        xt_train = scale_data(xt_train, standardScaler=True, minMaxScaler=True)
        xt_valid = scale_data(xt_valid, standardScaler=True, minMaxScaler=True)
        xt_test = scale_data(xt_test, standardScaler=True, minMaxScaler=True)
        # Hand extracted breathing data:
        br_train = scale_data(br_train, standardScaler=True, minMaxScaler=True)
        br_valid = scale_data(br_valid, standardScaler=True, minMaxScaler=True)
        br_test = scale_data(br_test, standardScaler=True, minMaxScaler=True)
        # Hand extracted Heartbeat data:
        hb_train = scale_data(hb_train, standardScaler=True, minMaxScaler=True)
        hb_valid = scale_data(hb_valid, standardScaler=True, minMaxScaler=True)
        hb_test = scale_data(hb_test, standardScaler=True, minMaxScaler=True)
        # Combined breathing and heartbeat data (joined together into one matrix)
        cmb_train = scale_data(cmb_train, standardScaler=True, minMaxScaler=True)
        cmb_valid = scale_data(cmb_valid, standardScaler=True, minMaxScaler=True)
        cmb_test = scale_data(cmb_test, standardScaler=True, minMaxScaler=True)
        
        
        
        ## ----- Classify without autoencoders:
        # Phase classifier:
        model = LSTM_classifier_base(params_phase)
        # reshape data for the classifier
        xtt_train = xt_train.reshape(-1, xt_train[0].shape[0], 1)
        xtt_valid = xt_valid.reshape(-1, xt_valid[0].shape[0], 1)
        xtt_test = xt_test.reshape(-1, xt_test[0].shape[0], 1)
        # train and evaluate
        sc, curr_acc, epoch_data = model_train(model, xtt_train, y_train, params['batch_size'], params['epochs'],
                                               xtt_valid, y_valid, xtt_test, y_test)
        accs['phase'].append(curr_acc)

        # Breathing classifier:
        model = LSTM_classifier_base(params_br_hb)
        # reshape data for the classifier
        brt_train = br_train.reshape(-1, br_train[0].shape[0], 1)
        brt_valid = br_valid.reshape(-1, br_valid[0].shape[0], 1)
        brt_test = br_test.reshape(-1, br_test[0].shape[0], 1)
        # train and evaluate
        sc, curr_acc, epoch_data = model_train(model, brt_train, y_train, params['batch_size'], params['epochs'],
                                               brt_valid, y_valid, brt_test, y_test)
        accs['breathing'].append(curr_acc)

        # Heartbeat classifier:
        model = LSTM_classifier_base(params_br_hb)
        # reshape data for the classifier
        hbt_train = hb_train.reshape(-1, hb_train[0].shape[0], 1)
        hbt_valid = hb_valid.reshape(-1, hb_valid[0].shape[0], 1)
        hbt_test = hb_test.reshape(-1, hb_test[0].shape[0], 1)
        # train and evaluate
        sc, curr_acc, epoch_data = model_train(model, hbt_train, y_train, params['batch_size'], params['epochs'],
                                               hbt_valid, y_valid, hbt_test, y_test)
        accs['heartbeat'].append(curr_acc)

        # Combined classifier:
        model = LSTM_classifier_base(params_br_hb)
        # reshape data for the classifier
        cmbt_train = cmb_train.reshape(-1, cmb_train[0].shape[0], 1)
        cmbt_valid = cmb_valid.reshape(-1, cmb_valid[0].shape[0], 1)
        cmbt_test = cmb_test.reshape(-1, cmb_test[0].shape[0], 1)
        # train and evaluate
        sc, curr_acc, epoch_data = model_train(model, cmbt_train, y_train, params['batch_size'], params['epochs'],
                                               cmbt_valid, y_valid, cmbt_test, y_test)
        accs['combined br hb'].append(curr_acc)

        
        
        ## ----- Classify with autoencoders:
        # AE Training params
        batch_size = 256
        epochs = 100
        encoding_dim = 64

        # undercomplete AE
        autoencoder, encoded = undercomplete_ae(xt_train, encoding_dim)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = LSTM_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['undercomplete'].append(curr_acc)

        # sparse AE
        autoencoder, encoded = sparse_ae(xt_train, encoding_dim)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = LSTM_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['sparse'].append(curr_acc)

        # deep AE
        autoencoder, encoded = deep_ae(xt_train, enc_layers=[512,256], encoding_dim=encoding_dim, dec_layers=[256,512])
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = LSTM_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['deep'].append(curr_acc)

        # contractive AE
        autoencoder, encoded = contractive_ae(xt_train, encoding_dim)
        sc, curr_acc, epoch_data = model_train(autoencoder, xt_train, xt_train, batch_size, epochs,
                                               xt_valid, xt_valid, xt_test, xt_test)
        model = LSTM_classifier(encoded, params)
        model = Model(inputs=autoencoder.inputs, outputs=model)
        model.compile(loss=params['loss'],
                      optimizer=params['optimizer'],
                      metrics=metrics)
        sc, curr_acc, epoch_data = model_train(model, xt_train, y_train, params['batch_size'], params['epochs'],
                                               xt_valid, y_valid, xt_test, y_test)
        accs['contractive'].append(curr_acc)

end_time = datetime.now()
elapsed_time = end_time - start_time
print("Completed!", "Time elapsed:", elapsed_time)

iteration: 1 of 7 ; time elapsed: 0:00:00.008976
iteration: 2 of 7 ; time elapsed: 0:36:29.165944
iteration: 3 of 7 ; time elapsed: 1:22:22.883035
iteration: 4 of 7 ; time elapsed: 2:17:20.406974
iteration: 5 of 7 ; time elapsed: 3:07:58.224104
Completed! Time elapsed: 4:08:35.281540


In [55]:
# print accuracies of each method and corresponding id which yielded that accuracy (same row)
pandas.DataFrame.from_dict(accs)

Unnamed: 0,phase,breathing,heartbeat,combined br hb,undercomplete,sparse,deep,contractive,test id
0,0.45,0.5,0.625,0.658333,0.475,0.383333,0.541667,0.533333,62i9y
1,0.591667,0.641667,0.541667,0.566667,0.633333,0.566667,0.641667,0.566667,2gu87
2,0.616667,0.433333,0.441667,0.466667,0.5,0.475,0.583333,0.525,iz2ps
3,0.258333,0.458333,0.491667,0.466667,0.591667,0.575,0.525,0.508333,1mpau
4,0.583333,0.416667,0.5,0.5,0.516667,0.5,0.425,0.483333,7dwjy


In [56]:
# print some statistics for each method
print_accs_stats(accs)

           phase  breathing  heartbeat  combined br hb  undercomplete    sparse      deep  contractive
min     0.258333   0.416667   0.441667        0.466667       0.475000  0.383333  0.425000     0.483333
max     0.616667   0.641667   0.625000        0.658333       0.633333  0.575000  0.641667     0.566667
mean    0.500000   0.490000   0.520000        0.531667       0.543333  0.500000  0.543333     0.523333
median  0.583333   0.458333   0.500000        0.500000       0.516667  0.500000  0.541667     0.525000


###  Compare Accuracies

Print min, max, mean, median for each clasifier/autoencoder combination:

In [57]:
for classifier in accuracies:
    print("-----------", classifier + ":", "-----------")
    accs = accuracies[classifier]
    print_accs_stats(accs)
    print("\n")

----------- kNN: -----------
           phase  breathing  heartbeat  combined br hb  undercomplete    sparse      deep  contractive
min     0.391667   0.400000   0.458333        0.416667       0.508333  0.391667  0.416667     0.433333
max     0.700000   0.608333   0.583333        0.683333       0.683333  0.608333  0.641667     0.691667
mean    0.554762   0.503571   0.544048        0.503571       0.558333  0.495238  0.528571     0.535714
median  0.558333   0.541667   0.575000        0.475000       0.541667  0.491667  0.516667     0.525000


----------- SVC: -----------
           phase  breathing  heartbeat  combined br hb  undercomplete    sparse      deep  contractive
min     0.300000   0.308333   0.441667        0.408333       0.416667  0.483333  0.375000     0.433333
max     0.616667   0.650000   0.516667        0.633333       0.625000  0.625000  0.616667     0.691667
mean    0.498810   0.495238   0.494048        0.498810       0.505952  0.521429  0.521429     0.536905
median  0.533

Print all accuracies in table form:

In [58]:
for classifier in accuracies:
    print(classifier + ":")
#     print(pandas.DataFrame.from_dict(accuracies[classifier]))
    # Using .to_string() gives nicer loooking results (doesn't split into new line)
    print(pandas.DataFrame.from_dict(accuracies[classifier]).to_string())
    print("\n")

kNN:
      phase  breathing  heartbeat  combined br hb  undercomplete    sparse      deep  contractive test id
0  0.458333   0.408333   0.575000        0.525000       0.550000  0.391667  0.475000     0.525000   62i9y
1  0.658333   0.400000   0.583333        0.458333       0.541667  0.525000  0.575000     0.558333   2gu87
2  0.575000   0.441667   0.583333        0.475000       0.558333  0.466667  0.516667     0.583333   iz2ps
3  0.541667   0.608333   0.491667        0.541667       0.541667  0.508333  0.466667     0.483333   1mpau
4  0.558333   0.550000   0.541667        0.683333       0.525000  0.491667  0.416667     0.433333   7dwjy
5  0.391667   0.541667   0.575000        0.425000       0.508333  0.475000  0.641667     0.475000   7swyk
6  0.700000   0.575000   0.458333        0.416667       0.683333  0.608333  0.608333     0.691667   94mnx


SVC:
      phase  breathing  heartbeat  combined br hb  undercomplete    sparse      deep  contractive test id
0  0.300000   0.491667   0.500000 