In [1]:
import datareader # made by the previous author for reading the collected data
import dataextractor # same as above
import pandas
import numpy as np

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Activation, Input
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Conv1D, MaxPooling1D
from tensorflow.keras.optimizers import Adam, Nadam
import tensorflow.keras.backend as K
tf.keras.backend.set_floatx('float32') # call this, to set keras to use float32 to avoid a warning message
metrics = ['accuracy']

from sklearn.preprocessing import StandardScaler, MinMaxScaler

import json
from datetime import datetime
import warnings
import matplotlib.pyplot as plt

In [2]:
import random

random.seed(1)
np.random.seed(4)
tf.random.set_seed(2)

This is modfied original author's code for reading data:

In [3]:
def model_train(model, x_train, y_train, batch_size, epochs, x_valid, y_valid, x_test, y_test):
    """Train model with the given training, validation, and test set, with appropriate batch size and # epochs."""
    epoch_data = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_valid, y_valid), verbose=0)
    score = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=0)
    acc = score[1]
    score = score[0]
    return score, acc, epoch_data


In [4]:
def get_busy_vs_relax_timeframes_br_hb(path, ident, seconds, checkIfValid=True):
    """Returns raw data from either 'on task' or 'relax' time frames and their class (0 or 1)."""
    
    dataread = datareader.DataReader(path, ident)  # initialize path to data
    data = dataread.read_grc_data()  # read from files
    samp_rate = int(round(len(data[1]) / max(data[0])))
    cog_res = dataread.read_cognitive_load_study(str(ident) + '-primary-extract.txt')

    tasks_data = np.empty((0, seconds*samp_rate))
    tasks_y = np.empty((0, 1))
    breathing = np.empty((0,12))
    heartbeat = np.empty((0,10))

    busy_n = dataread.get_data_task_timestamps(return_indexes=True)
    relax_n = dataread.get_relax_timestamps(return_indexes=True)

    for i in cog_res['task_number']:
        task_num_table = i - 225  # 0 - 17
        tmp_tasks_data = np.empty((0, seconds*samp_rate))
        tmp_tasks_y = np.empty((0, 1))
        tmp_breathing = np.empty((0,12))
        tmp_heartbeat = np.empty((0,10))
        
        ### task versus relax (1 sample each)
        dataextract = dataextractor.DataExtractor(data[0][busy_n[task_num_table][0]:busy_n[task_num_table][1]],
                                                  data[1][busy_n[task_num_table][0]:busy_n[task_num_table][1]],
                                                  samp_rate)

        dataextract_relax = dataextractor.DataExtractor(data[0][relax_n[task_num_table][0]:relax_n[task_num_table][1]],
                                                        data[1][relax_n[task_num_table][0]:relax_n[task_num_table][1]],
                                                        samp_rate)

        try:

            # get extracted features for breathing
            tmpBR_busy = dataextract.extract_from_breathing_time(dataextract.t[-samp_rate*seconds:],
                                                                 dataextract.y[-samp_rate*seconds:])
            tmpBR_relax = dataextract_relax.extract_from_breathing_time(dataextract_relax.t[-samp_rate*seconds:],
                                                                 dataextract_relax.y[-samp_rate*seconds:])
            #get extracted features for heartbeat
            tmpHB_busy = dataextract.extract_from_heartbeat_time(dataextract.t[-samp_rate*seconds:],
                                                                 dataextract.y[-samp_rate*seconds:])
            tmpHB_relax = dataextract.extract_from_heartbeat_time(dataextract_relax.t[-samp_rate*seconds:],
                                                                 dataextract_relax.y[-samp_rate*seconds:])
            
            if checkIfValid and not(tmpBR_busy['br_ok'][0] and tmpBR_relax['br_ok'][0] and tmpHB_busy['hr_ok'][0] and tmpHB_relax['hr_ok'][0]):
                # don't add this to the data that will be returned
                # by going to the next iteration
                continue

            tmp_tasks_data = np.vstack((tmp_tasks_data, dataextract.y[-samp_rate * seconds:]))
            tmp_tasks_y = np.vstack((tasks_y, 1))
            tmp_tasks_data = np.vstack((tmp_tasks_data, dataextract_relax.y[-samp_rate * seconds:]))
            tmp_tasks_y = np.vstack((tmp_tasks_y, 0))

            # put busy frames then relaxed frames under the previous frames
            tmp_breathing = np.vstack((tmp_breathing, tmpBR_busy.to_numpy(dtype='float64', na_value=0)[0][:-1]))
            tmp_breathing = np.vstack((tmp_breathing, tmpBR_relax.to_numpy(dtype='float64', na_value=0)[0][:-1]))

            tmp_heartbeat = np.vstack((tmp_heartbeat, tmpHB_busy.to_numpy(dtype='float64', na_value=0)[0][:-1]))
            tmp_heartbeat = np.vstack((tmp_heartbeat, tmpHB_relax.to_numpy(dtype='float64', na_value=0)[0][:-1]))

        except ValueError:
#             print(ident)  # ignore short windows
            continue

        # put busy frames then relaxed frames under the previous frames
        tasks_data = np.vstack((tasks_data, dataextract.y[-samp_rate * seconds:]))
        tasks_y = np.vstack((tasks_y, 1))
        tasks_data = np.vstack((tasks_data, dataextract_relax.y[-samp_rate * seconds:]))
        tasks_y = np.vstack((tasks_y, 0))

        breathing = np.vstack((breathing, tmp_breathing))

        heartbeat = np.vstack((heartbeat, tmp_heartbeat))

        tasks_data = np.unwrap(tasks_data)
        
    return tasks_data, tasks_y, breathing, heartbeat

In [5]:
def get_data_from_idents_br_hb(path, idents, seconds, checkIfValid=True):
    """Go through all user data and take out windows of only <seconds> long time frames,
    along with the given class (from 'divide_each_task' function).
    """
    samp_rate = 43  # hard-coded sample rate
    data, ys = np.empty((0, samp_rate*seconds)), np.empty((0, 1))
    brs = np.empty((0,12))
    hbs = np.empty((0,10))
    combined = np.empty((0,22))
    
    # was gettign some weird warnings; stack overflow said to ignore them
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=RuntimeWarning)
        for i in idents:
            x, y, br, hb = get_busy_vs_relax_timeframes_br_hb(path, i, seconds, checkIfValid) # either 'get_busy_vs_relax_timeframes',
            # get_engagement_increase_vs_decrease_timeframes, get_task_complexities_timeframes or get_TLX_timeframes

            data = np.vstack((data, x))
            ys = np.vstack((ys, y))
            brs = np.vstack((brs, br))
            hbs = np.vstack((hbs, hb))
        combined = np.hstack((brs,hbs))
    
    return data, ys, brs, hbs, combined

## Prepare data

Initialize variables:

In [6]:
# initialize a dictionary to store accuracies for comparison
accuracies = {}

# used for reading the data into an array
seconds = 30  # time window length
samp_rate = 43  # hard-coded sample rate
phase_shape = np.empty((0, samp_rate*seconds))
y_shape = np.empty((0, 1))
breathing_shape = np.empty((0,12))
heartbeat_shape = np.empty((0,10))
combined_shape = np.empty((0,22))
idents = ['2gu87', 'iz2ps', '1mpau', '7dwjy', '7swyk', '94mnx', 'bd47a', 'c24ur', 'ctsax', 'dkhty', 'e4gay',
              'ef5rq', 'f1gjp', 'hpbxa', 'pmyfl', 'r89k1', 'tn4vl', 'td5pr', 'gyqu9', 'fzchw', 'l53hg', '3n2f9',
              '62i9y']
path = '../../../StudyData/'


# change to len(idents) at the end to use all the data
n = len(idents)

# testing busy vs relax

In [7]:
isOk_dict = {}
with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=RuntimeWarning)
    for ident in idents:
        if not (ident in isOk_dict):
            isOk_dict[ident] = {}
            id_dict = isOk_dict[ident]
            id_dict['total'] = 0
            id_dict['br_busy'] = 0
            id_dict['br_relax'] = 0
            id_dict['br_both'] = 0
            id_dict['hr_busy'] = 0
            id_dict['hr_relax'] = 0
            id_dict['hr_both'] = 0
            id_dict['rowOk'] = 0
        id_dict = isOk_dict[ident]

        dataread = datareader.DataReader(path, ident)  # initialize path to data
        data = dataread.read_grc_data()  # read from files
        samp_rate = int(round(len(data[1]) / max(data[0])))
        cog_res = dataread.read_cognitive_load_study(str(ident) + '-primary-extract.txt')
        
        tasks_data = np.empty((0, seconds*samp_rate))
        tasks_y = np.empty((0, 1))
        breathing = np.empty((0,12))
        heartbeat = np.empty((0,10))

        busy_n = dataread.get_data_task_timestamps(return_indexes=True)
        relax_n = dataread.get_relax_timestamps(return_indexes=True)

        isOk_array = np.empty((0,4))
        for i in cog_res['task_number']:
            task_num_table = i - 225  # 0 - 17
            tmp_tasks_data = np.empty((0, seconds*samp_rate))
            tmp_tasks_y = np.empty((0, 1))
            tmp_breathing = np.empty((0,12))
            tmp_heartbeat = np.empty((0,10))

            ### task versus relax (1 sample each)
            dataextract = dataextractor.DataExtractor(data[0][busy_n[task_num_table][0]:busy_n[task_num_table][1]],
                                                      data[1][busy_n[task_num_table][0]:busy_n[task_num_table][1]],
                                                      samp_rate)

            dataextract_relax = dataextractor.DataExtractor(data[0][relax_n[task_num_table][0]:relax_n[task_num_table][1]],
                                                            data[1][relax_n[task_num_table][0]:relax_n[task_num_table][1]],
                                                            samp_rate)

            id_dict['total'] += 1
            try:

                # get extracted features for breathing
                tmpBR_busy = dataextract.extract_from_breathing_time(dataextract.t[-samp_rate*seconds:],
                                                                     dataextract.y[-samp_rate*seconds:])
                tmpBR_relax = dataextract_relax.extract_from_breathing_time(dataextract_relax.t[-samp_rate*seconds:],
                                                                     dataextract_relax.y[-samp_rate*seconds:])
                #get extracted features for heartbeat
                tmpHB_busy = dataextract.extract_from_heartbeat_time(dataextract.t[-samp_rate*seconds:],
                                                                     dataextract.y[-samp_rate*seconds:])
                tmpHB_relax = dataextract.extract_from_heartbeat_time(dataextract_relax.t[-samp_rate*seconds:],
                                                                     dataextract_relax.y[-samp_rate*seconds:])

                id_dict['br_busy'] += 1 if tmpBR_busy['br_ok'][0] else 0
                id_dict['br_relax'] += 1 if tmpBR_relax['br_ok'][0] else 0
                id_dict['br_both'] += 1 if tmpBR_busy['br_ok'][0] and tmpBR_relax['br_ok'][0] else 0
                
                id_dict['hr_busy'] += 1 if tmpHB_busy['hr_ok'][0] else 0
                id_dict['hr_relax'] += 1 if tmpHB_relax['hr_ok'][0] else 0
                id_dict['hr_both'] += 1 if tmpHB_busy['hr_ok'][0] and tmpHB_relax['hr_ok'][0] else 0

                checkIfValid = True
                if checkIfValid and not(tmpBR_busy['br_ok'][0] and tmpBR_relax['br_ok'][0] and tmpHB_busy['hr_ok'][0] and tmpHB_relax['hr_ok'][0]):
                    # don't add this to the data that will be returned
                    # by going to the next iteration
                    continue

                id_dict['rowOk'] += 1
                
                
            except ValueError:
        #             print(ident)  # ignore short windows
                continue



In [8]:
tmpBR_busy

Unnamed: 0,br_rate,br_change_in_rate_start_end,br_freq_6_12,br_freq_12_18,br_freq_18_24,br_freq_24_30,br_IBI_mean,br_IBI_std,br_raw_mean,br_raw_median,br_raw_std,br_raw_rms,br_ok
0,8.822667,12.609971,3.448499,2.622062,1.437031,0.77395,3.412493,1.551748,-0.005893,0.016639,0.708765,0.70879,False


In [9]:
tmpHB_relax

Unnamed: 0,hr_rate,hr_change_in_rate_start_end,hr_RR_mean,hr_SDNN,hr_RMSSD,hr_pNN50,hr_pNN70,hr_HRV_lf,hr_HRV_hf,hr_HRV_lf_hf,hr_ok
0,124.777723,25.219941,0.892642,0.15852,0.906608,1.0,0.842105,0.662263,4.158368,0.15926,False


In [10]:
pandas.DataFrame.from_dict(isOk_dict).T
# br_both means breathing busy and relax were both ok in the same iteration
# hr_both means heartrate busy and relax were both ok in the same iteration
# rowOk means br_both and hr_both were both ok in hte same iteration

Unnamed: 0,total,br_busy,br_relax,br_both,hr_busy,hr_relax,hr_both,rowOk
2gu87,18,13,14,10,9,7,3,2
iz2ps,18,14,14,10,11,10,7,5
1mpau,18,13,16,12,6,9,4,3
7dwjy,18,12,10,6,10,3,2,1
7swyk,18,18,8,8,11,2,2,2
94mnx,18,12,6,6,4,7,2,0
bd47a,18,11,8,5,7,7,3,1
c24ur,18,15,14,12,8,14,7,6
ctsax,18,11,9,4,10,6,2,0
dkhty,18,16,9,8,8,8,4,3


In [11]:
pandas.DataFrame.from_dict(isOk_dict).T.sum()

total       414
br_busy     298
br_relax    255
br_both     189
hr_busy     193
hr_relax    169
hr_both      78
rowOk        54
dtype: int64

In [12]:
phase, y, breathing, heartbeat, combined = get_data_from_idents_br_hb(path, idents, seconds)
breathing.shape

(102, 12)

In [13]:
heartbeat.shape

(102, 10)