## Feature and model imports

In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys
module_path = os.path.abspath(os.path.join("../care_for_me"))
sys.path.insert(0, module_path)
module_path = os.path.abspath(os.path.join(".."))
sys.path.insert(0, module_path)

import biosppy as bp
import care_for_me
import heartpy as hp
import neurokit2 as nk
import numpy as np
import pandas as pd
import pyhrv
import pyhrv.time_domain as td
import scipy

from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

from sklearn.svm import SVC
# from lightgbm import LightGBM
# from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier


In [2]:
# Statistical/common feature extraction methods

WINDOW_SIZE = 60
OVERLAP = 30

def extract_mean(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        feature = np.mean(segment)
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = np.mean(segment)
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

def extract_med(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        feature = np.median(segment)
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = np.median(segment)
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

def extract_std(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        feature = np.std(segment)
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = np.std(segment)
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

def extract_var(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        feature = np.var(segment)
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = np.var(segment)
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

def extract_range(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        feature = np.max(segment) - np.min(segment)
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = np.max(segment) - np.min(segment)
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

def extract_peak(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        feature = np.max(segment)
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = np.max(segment)
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

def extract_slope(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        feature = np.gradient(segment)
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = np.gradient(segment)
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

In [3]:
# Extract power from frequency bands

def bandpower(x, fs, fmin, fmax):
    f, Pxx = scipy.signal.periodogram(x, fs=fs)
    ind_min = np.argmax(f > fmin) - 1
    ind_max = np.argmax(f > fmax) - 1
    return np.trapezoid(Pxx[ind_min: ind_max], f[ind_min: ind_max])

def extract_freq_power(data, fs, low, high):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        # extract features
        feature = bandpower(segment, fs, low, high)
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = bandpower(segment, fs, low, high)
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

In [4]:
# ECG feature extraction methods

def extract_ecg_features_pyhrv(data, fs):
    n = data.size
    if n == 0:
        print("ECG signal has length 0, returning None")
        return None
    
    hr = []
    rmssd = []
    sdnn = []

    start = 0
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)
    stop = start + window_size
    if stop >= n:
        t, filtered_signal, rpeaks, _, _, _, bpm = bp.signals.ecg.ecg(signal=data, sampling_rate=fs, show=False)
        bpm = np.mean(bpm)
        rmssd_segment = td.rmssd(rpeaks=t[rpeaks])["rmssd"]
        sdnn_segment = td.sdnn(rpeaks=t[rpeaks])["sdnn"]

        hr.append(bpm)
        rmssd.append(rmssd_segment)
        sdnn.append(sdnn_segment)
    else:
        while stop < n:
            stop = start + window_size
            segment = data[start:stop]
            segment, info = nk.ecg_process(segment, sampling_rate=fs)
            segment = segment["ECG_Clean"]
            t, filtered_signal, rpeaks, _, _, _, bpm = bp.signals.ecg.ecg(signal=segment, sampling_rate=fs, show=False)
            try:
                segment = data.iloc[start:stop]
            except AttributeError:
                segment = data[start:stop]
            try:
                bpm = np.mean(bpm)
                rmssd_segment = td.rmssd(rpeaks=t[rpeaks])["rmssd"]
                sdnn_segment = td.sdnn(rpeaks=t[rpeaks])["sdnn"]
            except Exception as e:
                bpm = np.nan
                rmssd_segment = np.nan
                sdnn_segment = np.nan
            hr.append(bpm)
            rmssd.append(rmssd_segment)
            sdnn.append(sdnn_segment)
            start = stop - overlap
    return hr, rmssd, sdnn
    
def extract_hr(data, fs):
    hr, _, _ = extract_ecg_features_pyhrv(data, fs)
    return hr

def extract_rmssd(data, fs):
    _, rmssd, _ = extract_ecg_features_pyhrv(data, fs)
    return rmssd

def extract_sdnn(data, fs):
    _, _, sdnn = extract_ecg_features_pyhrv(data, fs)
    return sdnn

def extract_hr_mean(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        t, filtered_signal, rpeaks, _, _, _, bpm = bp.signals.ecg.ecg(signal=data, sampling_rate=fs, show=False)
        bpm = np.mean(bpm)
        out.append(bpm)
    else:
        while stop < n:
            stop = start + window_size
            segment = data[start:stop]
            segment, info = nk.ecg_process(segment, sampling_rate=fs)
            segment = segment["ECG_Clean"]
            t, filtered_signal, rpeaks, _, _, _, bpm = bp.signals.ecg.ecg(signal=segment, sampling_rate=fs, show=False)
            try:
                segment = data.iloc[start:stop]
            except AttributeError:
                segment = data[start:stop]
            try:
                bpm = np.mean(bpm)
            except Exception as e:
                bpm = np.nan
            out.append(bpm)
            start = stop - overlap
    out = [np.mean(out)]
    return out

def extract_tinn(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        # extract features
        feature = pyhrv.hrv.hrv(
            signal=segment, sampling_rate=fs, plot_ecg=False, plot_Tachogram=False, show=False
        )["tinn"]
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = pyhrv.hrv.hrv(
            signal=segment, sampling_rate=fs, plot_ecg=False, plot_Tachogram=False, show=False
        )["tinn"]
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

def extract_nn50(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        # extract features
        feature = pyhrv.hrv.hrv(
            signal=segment, sampling_rate=fs, plot_ecg=False, plot_Tachogram=False, show=False
        )["nn50"]
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = pyhrv.hrv.hrv(
            signal=segment, sampling_rate=fs, plot_ecg=False, plot_Tachogram=False, show=False
        )["nn50"]
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

def extract_ulf(data, fs):
    low = 0
    high = 0.03
    return extract_freq_power(data, fs, low, high)

def extract_lf(data, fs):
    low = 0.03
    high = 0.5
    return extract_freq_power(data, fs, low, high)

def extract_hf(data, fs):
    low = 0.12
    high = 0.488
    return extract_freq_power(data, fs, low, high)

def extract_uhf(data, fs):
    low = 150
    high = 250
    return extract_freq_power(data, fs, low, high)

def extract_lf_hf_ratio(data, fs):
    lf = extract_lf(data, fs)
    hf = extract_hf(data, fs)
    return np.divide(lf, hf)

In [12]:
# EDA feature extraction

# Minimum threshold by which to exclude SCRs (peaks) as relative to the largest amplitude in the signal (from neurokit documentation)
MIN_AMP = 0.3 
def extract_eda_features_nk(signal, fs):
    signal = signal.astype(np.double)
    signal = hp.scale_data(signal)
    signal = scipy.ndimage.median_filter(signal, int(fs))
    signals, info = nk.eda_process(signal, sampling_rate=fs)
    phasic = signals["EDA_Phasic"].to_numpy()
    tonic = signals["EDA_Tonic"].to_numpy()

    peak_info = nk.eda_findpeaks(phasic, fs, amplitude_min=MIN_AMP)
    peak_idx = peak_info["SCR_Peaks"].astype(int)
    peak_amps = peak_info["SCR_Height"]
    peaks = np.zeros(phasic.shape)
    np.put(peaks, peak_idx, [1])
    tonic = tonic - peaks

    return tonic, peaks

def extract_mean_scl(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    out = []

    tonic, _ = extract_eda_features_nk(data, fs)

    if tonic is None:
        print("mean SCL is None")
        return None
    
    n = tonic.size

    if stop >= n:
        segment = tonic
        segment_mean = np.mean(segment)
        out.append(segment_mean)
    while stop < n:
        stop = start + window_size
        segment = tonic[start:stop]
        segment_mean = np.mean(segment)
        out.append(segment_mean)
        start = stop - overlap
    mean_scl = list(out)
    return mean_scl

def extract_scr_rate(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    out = []

    _, peaks = extract_eda_features_nk(data, fs)

    if peaks is None:
        print("SCR rate is None")
        return None

    n = peaks.size
    
    if stop >= n:
        segment = peaks
        num_peaks = sum(segment)
        out.append(num_peaks)
    while stop < n:
        stop = start + window_size
        segment = peaks[start:stop]
        num_peaks = sum(segment)
        out.append(num_peaks)
        start = stop - overlap
    scr_rate = list(out)
    return scr_rate


In [163]:
# Set up parameters
from care_for_me.signals import Signals, Features


signal_types = [
    Signals.ECG,
    Signals.EDA
]
feature_names = [
    Features.ECG_MEAN, Features.ECG_MEDIAN, Features.ECG_STD, Features.ECG_VAR,
    Features.HR, Features.RMSSD, Features.SDNN,
    Features.ULF, Features.LF, Features.HF, Features.LF_HF,
    Features.EDA_MEAN, Features.EDA_MEDIAN, Features.EDA_STD, Features.EDA_VAR,
    Features.MEAN_SCL, Features.SCR_RATE
]

# Uses default preprocessing methods

feature_extraction_methods = {
    "ECG": {
        Features.ECG_MEAN: extract_mean,
        Features.ECG_MEDIAN: extract_med,
        Features.ECG_STD: extract_std,
        Features.ECG_VAR: extract_var,
        Features.HR: extract_hr, Features.RMSSD: extract_rmssd, Features.SDNN: extract_sdnn,
        Features.LF: extract_lf, Features.HF: extract_hf, Features.LF_HF: extract_lf_hf_ratio,
    },
    "EDA": {
        Features.EDA_MEAN: extract_mean,
        Features.EDA_MEDIAN: extract_med,
        Features.EDA_STD: extract_std,
        Features.EDA_VAR: extract_var,
        Features.MEAN_SCL: extract_mean_scl, Features.SCR_RATE: extract_scr_rate
    }
}

## APD

#### NOTE:
The following subjects did not complete the speech exposure phase and were removed:
- 57
- 93
- 16
- 87
- 8
- 21
- 88
- 84
- 23

The following subjects did not complete the bug exposure task and were removed: 
- 4

In [88]:
import os
import sys
module_path = os.path.abspath(os.path.join("../care_for_me"))
sys.path.insert(0, module_path)
module_path = os.path.abspath(os.path.join(".."))
sys.path.insert(0, module_path)

import apd

ROOT_DIR = "/Users/emilyzhou/Desktop/Research/CAREForMe/"
DATA_DIR = os.path.join(ROOT_DIR, "data")
APD_PATH = os.path.join(DATA_DIR, "APD")
SOURCE_FOLDER = os.path.join(APD_PATH, "formatted")
METRICS = os.path.join(DATA_DIR, "metrics", "APD")

ALL = "all"
HA = "high_anxiety_group"
LA = "low_anxiety_group"

ha_participant_indices = [
    '4', '6', '7', '8', '10', '12', '15', '16', '18', '22', '26', '27', '29', '31', '32', '33', '35', '42', '45', '47', '48', '49', '54', '55', '66', '69'
]

la_participant_indices = [
    '14', '21', '23', '25', '34', '39', '43', '46', '51', '57', '71', '72', '77', '78', '79', '80', '82', '83', '84', '85', '87', '88', '89', '91', '92', '93'
]

SUBJECTS = ha_participant_indices.extend(la_participant_indices)

In [None]:
# Rename "Grove sensor reading" to "EDA"
def get_data_files(source_folder, signal_types):
        files_dict = {}
        dir_list = [os.path.join(source_folder, f) for f in os.listdir(source_folder)]  # Lists all files and subdirectories
        for p in dir_list:
            if os.path.isdir(p):
                files_p = os.listdir(p)
                s = files_p[0].split("_")[0]  # Get subject index from file
                files_p = [os.path.join(p, f) for f in files_p if any(signal in f for signal in signal_types)]
                files_dict[s] = files_p  # Add list of all files in subdirectory p
            else:
                print(f"Path {p} corresponds to a file, expecting a subdirectory.")
        return files_dict

data_files = get_data_files(SOURCE_FOLDER, ["EDA"])
for subject in data_files.keys():
    files = data_files[subject]
    for file in files:
        df = pd.read_csv(file)
        df = df.rename(columns={"Grove sensor reading": "EDA"})
        df.to_csv(file, index=True)

In [162]:
# Generate APD labels
labels = apd.get_suds_labels(APD_PATH)

def generate_labels(data):
    """
    Generate binary labels for APD based on the SUDS questionnaire and the input data format.
    
    Parameters
    --------------------
    :param data: Features to generate labels for. Must include subject ID and phase columns.
    :type data: pd.DataFrame

    Returns
    --------------------
    Generated labels and the unmodified input data.
    """
    annotations = apd.get_suds_labels(APD_PATH)
    labels = []
    for i in range(data.shape[0]):
        subject = int(data["subject"].iloc[i])
        phase = data["Phase"].iloc[i]
        label_row = annotations.loc[(annotations["subject"] == subject)]
        label = label_row[phase]
        labels.append(label)
    labels = np.array(labels).ravel()
    return labels, data

### Run pipeline

In [None]:
from care_for_me.signal_acquisition.signal_acquisition import SignalAcquisition
from care_for_me.signal_preprocessor.signal_preprocessor import SignalPreprocessor
from care_for_me.feature_extractor.feature_extractor import FeatureExtractor
from care_for_me.label_generator.label_generator import LabelGenerator
from care_for_me.classification.estimator import Estimator
from care_for_me.pipeline.pipeline import Pipeline


label_gen = generate_labels
signal_acq = SignalAcquisition(signal_types=signal_types, source_folder=SOURCE_FOLDER)
signal_preprocessor = SignalPreprocessor(skip=True, resample_rate=250)
feature_extractor = FeatureExtractor(feature_extraction_methods=feature_extraction_methods, calculate_mean=False)
label_generator = LabelGenerator(label_generation_method=label_gen)

models = {
    "SVM": SVC(),
    "RF": RandomForestClassifier()
}

accs = {
    "SVM": [],
    "RF": []
}

aucs = {
    "SVM": [],
    "RF": []
}

true = {
    "SVM": [],
    "RF": []
}

preds = {
    "SVM": [],
    "RF": []
}

estimator_train_val_test = Estimator(2, models, name="Classification: train-val-test", random_seed=36)

pipeline = Pipeline()

pipeline.generate_nodes_from_layers(
    [signal_acq, signal_preprocessor, feature_extractor, label_generator, estimator_train_val_test]
)

# We leave it up to the user to handle the final output of the pipeline. 
out = pipeline.run()

# Results
# fitted_models = out[0]
y_true = out[1]
y_preds = out[2]


for model_name in models.keys():
    model = models[model_name]
    acc = accuracy_score(y_true, y_preds[model_name])
    auc = roc_auc_score(y_true, y_preds[model_name])

    true[model_name].append(y_true)
    preds[model_name].append(y_preds[model_name])
    accs[model_name].append(acc)
    aucs[model_name].append(auc)

Running node Signal Acquisition...
- Elapsed time: 0.0 s
Running node Signal Preprocessor...
- Elapsed time: 13.96 s
Running node Feature Extractor...


  2%|▏         | 1/42 [00:36<24:40, 36.10s/it]

In [None]:
# Ensemble, average voting:
# ensemble_preds = np.mean(list(preds.keys()), axis=0)
# ensemble_acc = accuracy_score(true.values()[0], ensemble_preds)
# ensemble_auc = roc_auc_score(true.values()[0], ensemble_preds)

for model_name in models.keys():
    print(f"{model_name} " + "-"*50)
    print(f"\nMean accuracy: {np.mean(accs[model_name])}")
    print(f"STD accuracy: {np.std(accs[model_name])}")
    print(f"Mean AUC score: {np.mean(aucs[model_name])}")
    print(f"STD AUC score: {np.std(aucs[model_name])}")

print("Ensemble " + "-"*50)

## WESAD

In [39]:
import os
import sys
module_path = os.path.abspath(os.path.join("../care_for_me"))
sys.path.insert(0, module_path)
module_path = os.path.abspath(os.path.join(".."))
sys.path.insert(0, module_path)

import wesad

subject_indices = list(range(2, 12)) + list(range(13, 18))
SUBJECTS = [str(i) for i in subject_indices]

# NOTE: Change ROOT_DIR according to your own file structure. This will be the only place you will need to do this.
ROOT_DIR = "/Users/emilyzhou/Desktop/Research/CAREForMe/"
DATA_DIR = os.path.join(ROOT_DIR, "data")
WESAD_PATH = os.path.join(DATA_DIR, "WESAD")
SOURCE_FOLDER = os.path.join(WESAD_PATH, "formatted")
ANNOTATIONS_PATH = os.path.join(WESAD_PATH, "annotations")
METRICS = os.path.join(DATA_DIR, "metrics", "WESAD")

In [8]:
# Generate WESAD labels
labels = wesad.generate_labels(ANNOTATIONS_PATH, threshold="dynamic")

def generate_labels(data):
    """
    Generate binary labels for WESAD based on the STAI questionnaire and the input data format.
    
    Parameters
    --------------------
    :param data: Features to generate labels for. Must include subject ID and phase columns.
    :type data: pd.DataFrame

    Returns
    --------------------
    Generated labels and the unmodified input data.
    """
    annotations = wesad.generate_labels(ANNOTATIONS_PATH, threshold="dynamic")
    labels = []
    for i in range(data.shape[0]):
        subject = int(data["subject"].iloc[i])
        phase = data["Phase"].iloc[i]
        label_row = annotations.loc[(annotations["subject"] == subject)]
        label = label_row[phase]
        labels.append(label)
    labels = np.array(labels).ravel()
    return labels, data

### Run pipeline

In [None]:
from care_for_me.signal_acquisition.signal_acquisition import SignalAcquisition
from care_for_me.signal_preprocessor.signal_preprocessor import SignalPreprocessor
from care_for_me.feature_extractor.feature_extractor import FeatureExtractor
from care_for_me.label_generator.label_generator import LabelGenerator
from care_for_me.classification.estimator import Estimator
from care_for_me.pipeline.pipeline import Pipeline


label_gen = generate_labels
signal_acq = SignalAcquisition(signal_types=signal_types, source_folder=SOURCE_FOLDER)
signal_preprocessor = SignalPreprocessor(skip=True, resample_rate=250)
feature_extractor = FeatureExtractor(feature_extraction_methods=feature_extraction_methods, calculate_mean=True)
label_generator = LabelGenerator(label_generation_method=label_gen)

models = {
    "SVM": SVC(C=1, gamma=0.01),
    "RF": RandomForestClassifier(n_estimators=30)
}

accs = {
    "SVM": [],
    "RF": []
}

aucs = {
    "SVM": [],
    "RF": []
}

true = {
    "SVM": [],
    "RF": []
}

preds = {
    "SVM": [],
    "RF": []
}

estimator_train_val_test = Estimator(2, models, name="Classification: train-val-test", random_seed=36)

pipeline = Pipeline()

pipeline.generate_nodes_from_layers(
    [signal_acq, signal_preprocessor, feature_extractor, label_generator, estimator_train_val_test]
)

# We leave it up to the user to handle the final output of the pipeline. 
out = pipeline.run()

# Results
# fitted_models = out[0]
y_true = out[1]
y_preds = out[2]


for model_name in models.keys():
    model = models[model_name]
    acc = accuracy_score(y_true, y_preds[model_name])
    auc = roc_auc_score(y_true, y_preds[model_name])

    true[model_name].append(y_true)
    preds[model_name].append(y_preds[model_name])
    accs[model_name].append(acc)
    aucs[model_name].append(auc)

Running node Signal Acquisition...
- Elapsed time: 0.0 s
Running node Signal Preprocessor...
- Elapsed time: 0.403 s
Running node Feature Extractor...


100%|██████████| 15/15 [00:02<00:00,  6.18it/s]


- Elapsed time: 2.431 s
Running node Label Generator...
- Elapsed time: 0.04 s
Running node Classification: train-val-test...
Cross-validation scores: [0.66666667 0.58333333 0.58333333 0.58333333 0.58333333]
- Elapsed time: 0.008 s
Running node Signal Acquisition...
- Elapsed time: 0.0 s
Running node Signal Preprocessor...
- Elapsed time: 0.37 s
Running node Feature Extractor...


100%|██████████| 15/15 [00:02<00:00,  6.21it/s]


- Elapsed time: 2.419 s
Running node Label Generator...
- Elapsed time: 0.037 s
Running node Classification: train-val-test...
Cross-validation scores: [0.91666667 0.95833333 0.83333333 0.70833333 0.91666667]
- Elapsed time: 0.009 s
Running node Signal Acquisition...
- Elapsed time: 0.0 s
Running node Signal Preprocessor...
- Elapsed time: 0.358 s
Running node Feature Extractor...


100%|██████████| 15/15 [00:02<00:00,  6.02it/s]


- Elapsed time: 2.494 s
Running node Label Generator...
- Elapsed time: 0.045 s
Running node Classification: train-val-test...
Cross-validation scores: [0.94444444 0.91666667 0.91666667 0.94444444 0.97222222]
- Elapsed time: 0.01 s
Running node Signal Acquisition...
- Elapsed time: 0.0 s
Running node Signal Preprocessor...
- Elapsed time: 0.364 s
Running node Feature Extractor...


100%|██████████| 15/15 [00:02<00:00,  6.20it/s]


- Elapsed time: 2.424 s
Running node Label Generator...
- Elapsed time: 0.046 s
Running node Classification: train-val-test...
Cross-validation scores: [1. 1. 1. 1. 1.]
- Elapsed time: 0.01 s
Running node Signal Acquisition...
- Elapsed time: 0.0 s
Running node Signal Preprocessor...
- Elapsed time: 0.344 s
Running node Feature Extractor...


100%|██████████| 15/15 [00:02<00:00,  5.93it/s]


- Elapsed time: 2.535 s
Running node Label Generator...
- Elapsed time: 0.056 s
Running node Classification: train-val-test...
Cross-validation scores: [1.   1.   1.   0.95 1.  ]
- Elapsed time: 0.012 s
Running node Signal Acquisition...
- Elapsed time: 0.0 s
Running node Signal Preprocessor...
- Elapsed time: 0.4 s
Running node Feature Extractor...


100%|██████████| 15/15 [00:02<00:00,  6.46it/s]


- Elapsed time: 2.326 s
Running node Label Generator...
- Elapsed time: 0.058 s
Running node Classification: train-val-test...
Cross-validation scores: [1.         0.95833333 1.         1.         1.        ]
- Elapsed time: 0.1 s
Running node Signal Acquisition...
- Elapsed time: 0.0 s
Running node Signal Preprocessor...
- Elapsed time: 0.347 s
Running node Feature Extractor...


100%|██████████| 15/15 [00:02<00:00,  6.51it/s]


- Elapsed time: 2.31 s
Running node Label Generator...
- Elapsed time: 0.11 s
Running node Classification: train-val-test...
Cross-validation scores: [1.         0.95238095 1.         1.         1.        ]
- Elapsed time: 0.102 s
Running node Signal Acquisition...
- Elapsed time: 0.0 s
Running node Signal Preprocessor...
- Elapsed time: 0.345 s
Running node Feature Extractor...


100%|██████████| 15/15 [00:02<00:00,  6.56it/s]


- Elapsed time: 2.294 s
Running node Label Generator...
- Elapsed time: 0.068 s
Running node Classification: train-val-test...
Cross-validation scores: [1. 1. 1. 1. 1.]
- Elapsed time: 0.103 s
Running node Signal Acquisition...
- Elapsed time: 0.0 s
Running node Signal Preprocessor...
- Elapsed time: 0.343 s
Running node Feature Extractor...


100%|██████████| 15/15 [00:02<00:00,  6.53it/s]


- Elapsed time: 2.303 s
Running node Label Generator...
- Elapsed time: 0.075 s
Running node Classification: train-val-test...
Cross-validation scores: [1. 1. 1. 1. 1.]
- Elapsed time: 0.105 s
Running node Signal Acquisition...
- Elapsed time: 0.0 s
Running node Signal Preprocessor...
- Elapsed time: 0.345 s
Running node Feature Extractor...


100%|██████████| 15/15 [00:02<00:00,  6.37it/s]


- Elapsed time: 2.362 s
Running node Label Generator...
- Elapsed time: 0.085 s
Running node Classification: train-val-test...
Cross-validation scores: [1. 1. 1. 1. 1.]
- Elapsed time: 0.108 s


In [46]:
# Ensemble, average voting:
# ensemble_preds = np.mean(list(preds.keys()), axis=0)
# ensemble_acc = accuracy_score(true.values()[0], ensemble_preds)
# ensemble_auc = roc_auc_score(true.values()[0], ensemble_preds)

for model_name in models.keys():
    print(f"{model_name} " + "-"*50)
    print(f"Mean accuracy: {np.mean(accs[model_name])}")
    print(f"STD accuracy: {np.std(accs[model_name])}")
    print(f"Mean AUC score: {np.mean(aucs[model_name])}")
    print(f"STD AUC score: {np.std(aucs[model_name])}")
    print()

for i in range(5):
    ensemble_preds = []
    ensemble_true = []
    for model_name in models.keys():
        pred = preds[model_name][i]
        y_true = true[model_name][i]
        print(len(pred))
        ensemble_preds.append(pred)
    
print("Ensemble " + "-"*50)

SVM --------------------------------------------------
Mean accuracy: 0.8933333333333333
STD accuracy: 0.155492050529208
Mean AUC score: 0.8484848484848484
STD AUC score: 0.2289834604522735

RF --------------------------------------------------
Mean accuracy: 1.0
STD accuracy: 0.0
Mean AUC score: 1.0
STD AUC score: 0.0

15
90
30
105
45
120
60
135
75
150
Ensemble --------------------------------------------------


In [52]:
ROOT_DIR = "/Users/emilyzhou/Desktop/Research/CAREForMe/"
DATA_DIR = os.path.join(ROOT_DIR, "data")
WESAD_PATH = os.path.join(DATA_DIR, "WESAD")
SOURCE_FOLDER = os.path.join(WESAD_PATH, "original")

file = os.path.join(SOURCE_FOLDER, "S2", "S2.pkl")
data = pd.read_pickle(file)
labels = data["label"]
phase = 1 # baseline
# phase = 2 # stress
# phase = 3 # amusement
indices = [i for i, x in enumerate(labels) if x == phase]
signal = data["signal"]["chest"]["ECG"]
phase = [signal[i] for i in indices]
# print(indices[0:500])
print(len(signal))

4255300
