In [None]:
%load_ext autoreload
%autoreload 2

import os
import sys
module_path = os.path.abspath(os.path.join("../affecteval"))
sys.path.insert(0, module_path)
module_path = os.path.abspath(os.path.join(".."))
sys.path.insert(0, module_path)

import biosppy as bp
import heartpy as hp
import neurokit2 as nk
import numpy as np
import pandas as pd
import pyhrv
import pyhrv.time_domain as td
import scipy

from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier

from affecteval.signals import Features, Signals


In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join("../affecteval"))
sys.path.insert(0, module_path)
module_path = os.path.abspath(os.path.join(".."))
sys.path.insert(0, module_path)

import wesad

subject_indices = list(range(2, 12)) + list(range(13, 18))
SUBJECTS = [str(i) for i in subject_indices]

# NOTE: Change ROOT_DIR according to your own file structure. This will be the only place you will need to do this.
ROOT_DIR = "/Users/emilyzhou/Desktop/Research/CAREForMe/"
DATA_DIR = os.path.join(ROOT_DIR, "data")
WESAD_PATH = os.path.join(DATA_DIR, "WESAD")
SOURCE_FOLDER = os.path.join(WESAD_PATH, "formatted")
ANNOTATIONS_PATH = os.path.join(WESAD_PATH, "annotations")
METRICS = os.path.join(DATA_DIR, "metrics", "WESAD")

In [3]:
# Format data to be compatible with CAREforMe pipeline
# Only needs to be run once locally 
# Status: COMPLETE, do not re-run

# wesad.reformat_and_save_data(WESAD_PATH)

# Rename Medi_1 and Medi_2 files to Medi1 and Medi2
# files_to_rename = glob.glob(os.path.join(SOURCE_FOLDER, "*/*Medi_*.csv"))
# for file in files_to_rename:
#     file_name = file.split("_")
#     new_name = f"{file_name[0]}_{file_name[1]}{file_name[2]}_{file_name[3]}"
#     os.rename(file, new_name)

### Generate WESAD labels

In [4]:
def generate_labels_3_class(data):
    print(data.shape)
    labels = []
    for i in range(data.shape[0]):
        phase = data["Phase"].iloc[i]
        if phase == "Base":     # Baseline
            labels.append(0)
        elif phase == "TSST": 
            labels.append(1)    # Stress
        elif phase == "Fun":
            labels.append(2)    # Amusement
    labels = np.array(labels).ravel()
    return labels, data

def generate_labels_binary(data):
    labels = []
    for i in range(data.shape[0]):
        phase = data["Phase"].iloc[i]
        if phase == "Base" or phase == "Fun":     # Non-stress
            labels.append(0)
        elif phase == "TSST": 
            labels.append(1)    # Stress
    labels = np.array(labels).ravel()
    return labels, data

In [5]:
from scipy.signal import butter, lfilter

# Preprocessing methods
# ECG ------------------------------
def preprocess_ecg(data, fs):
    return data

# EDA ------------------------------
def butter_lowpass(cutoff, fs, order=5):
    return butter(order, cutoff, fs=fs, btype='low', analog=False)

def butter_lowpass_filter(data, cutoff, fs, order=5):
    b, a = butter_lowpass(cutoff, fs, order=order)
    y = lfilter(b, a, data)
    return y

def preprocess_eda(data, fs):
    return butter_lowpass_filter(data, 5, fs)

# EMG ------------------------------
def preprocess_emg(data, fs):
    return data

# RESP ------------------------------
def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a

def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = lfilter(b, a, data)
    return y

def preprocess_resp(data, fs):
    return butter_bandpass_filter(data, 0.1, 0.35, fs)

# TEMP ------------------------------
def preprocess_temp(data, fs):
    return data


In [6]:
# FEATURE EXTRACTION TEMPLATE

WINDOW_SIZE = 60
OVERLAP = 0.25

def extract_(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        # extract features
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

In [7]:
# Statistical/common feature extraction methods

WINDOW_SIZE = 60
OVERLAP = 0.25

def extract_min(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        feature = np.min(segment)
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = np.min(segment)
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

def extract_max(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        feature = np.max(segment)
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = np.max(segment)
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

def extract_mean(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        feature = np.mean(segment)
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = np.mean(segment)
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

def extract_med(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        feature = np.median(segment)
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = np.median(segment)
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

def extract_std(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        feature = np.std(segment)
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = np.std(segment)
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

def extract_var(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        feature = np.var(segment)
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = np.var(segment)
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

def extract_range(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        feature = np.max(segment) - np.min(segment)
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = np.max(segment) - np.min(segment)
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

def extract_peak(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        feature = np.max(segment)
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = np.max(segment)
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

def extract_slope(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        feature = np.mean(np.gradient(segment))
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = np.mean(np.gradient(segment))
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

In [8]:
# Extract power from frequency bands

def bandpower(x, fs, fmin, fmax):
    f, Pxx = scipy.signal.periodogram(x, fs=fs)
    ind_min = np.argmax(f > fmin) - 1
    ind_max = np.argmax(f > fmax) - 1
    return np.trapezoid(Pxx[ind_min: ind_max], f[ind_min: ind_max])

def extract_freq_power(data, fs, low, high):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        # extract features
        feature = bandpower(segment, fs, low, high)
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = bandpower(segment, fs, low, high)
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

In [9]:
# ECG feature extraction methods
from pyhrv.hrv import hrv

WINDOW_SIZE = 60
OVERLAP = 0.25

def extract_ecg_features_pyhrv(data, fs):
    n = data.size
    if n == 0:
        print("ECG signal has length 0, returning None")
        return None
    
    hr = []
    rmssd = []
    sdnn = []
    tinn = []
    nn50 = []

    start = 0
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)
    stop = start + window_size
    if stop >= n:
        t, filtered_signal, rpeaks, _, _, _, bpm = bp.signals.ecg.ecg(signal=data, sampling_rate=fs, show=False)
        bpm = np.mean(bpm)
        rmssd_segment = td.rmssd(rpeaks=t[rpeaks])["rmssd"]
        sdnn_segment = td.sdnn(rpeaks=t[rpeaks])["sdnn"]
        tinn_segment = td.tinn(rpeaks=t[rpeaks])["tinn"]
        nn50_segment = td.nnXX(rpeaks=t[rpeaks], threshold=50)["nn50"]

        hr.append(bpm)
        rmssd.append(rmssd_segment)
        sdnn.append(sdnn_segment)
        tinn.append(tinn_segment)
        nn50.append(nn50_segment)
    else:
        while stop < n:
            stop = start + window_size
            segment = data[start:stop]
            if len(segment) < fs*4:
                continue
            segment, info = nk.ecg_process(segment, sampling_rate=fs)
            segment = segment["ECG_Clean"]
            t, filtered_signal, rpeaks, _, _, _, bpm = bp.signals.ecg.ecg(signal=segment, sampling_rate=fs, show=False)
            try:
                segment = data.iloc[start:stop]
            except AttributeError:
                segment = data[start:stop]
            try:
                bpm = np.mean(bpm)
                rmssd_segment = np.mean(td.rmssd(rpeaks=t[rpeaks])["rmssd"])
                sdnn_segment = td.sdnn(rpeaks=t[rpeaks])["sdnn"]
                tinn_segment = td.tinn(rpeaks=t[rpeaks], plot=False)["tinn"]
                nn50_segment = td.nnXX(rpeaks=t[rpeaks], threshold=50)["nn50"]
            except Exception as e:
                bpm = np.nan
                rmssd_segment = np.nan
                sdnn_segment = np.nan
                tinn_segment = np.nan
                nn50_segment = np.nan
            hr.append(bpm)
            rmssd.append(rmssd_segment)
            sdnn.append(sdnn_segment)
            tinn.append(tinn_segment)
            nn50.append(nn50_segment)

            start = stop - overlap
    return hr, rmssd, sdnn, tinn, nn50
    
def extract_hr(data, fs):
    hr, _, _, _, _ = extract_ecg_features_pyhrv(data, fs)
    return hr

def extract_rmssd(data, fs):
    _, rmssd, _, _, _ = extract_ecg_features_pyhrv(data, fs)
    return rmssd

def extract_sdnn(data, fs):
    _, _, sdnn, _, _ = extract_ecg_features_pyhrv(data, fs)
    return sdnn

def extract_hr_mean(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        t, filtered_signal, rpeaks, _, _, _, bpm = bp.signals.ecg.ecg(signal=data, sampling_rate=fs, show=False)
        bpm = np.mean(bpm)
        out.append(bpm)
    else:
        while stop < n:
            stop = start + window_size
            segment = data[start:stop]
            if len(segment) < fs*4:
                continue
            segment, info = nk.ecg_process(segment, sampling_rate=fs)
            segment = segment["ECG_Clean"]
            t, filtered_signal, rpeaks, _, _, _, bpm = bp.signals.ecg.ecg(signal=segment, sampling_rate=fs, show=False)
            try:
                segment = data.iloc[start:stop]
            except AttributeError:
                segment = data[start:stop]
            try:
                bpm = np.mean(bpm)
            except Exception as e:
                bpm = np.nan
            out.append(bpm)
            start = stop - overlap
    out = [np.mean(out)]
    return out

def extract_tinn(data, fs):
    _, _, _, tinn, _ = extract_ecg_features_pyhrv(data, fs)
    return tinn

def extract_nn50(data, fs):
    _, _, _, _, nn50 = extract_ecg_features_pyhrv(data, fs)
    return nn50

def extract_ulf(data, fs):
    low = 0
    high = 0.03
    return extract_freq_power(data, fs, low, high)

def extract_lf(data, fs):
    low = 0.03
    high = 0.5
    return extract_freq_power(data, fs, low, high)

def extract_hf(data, fs):
    low = 0.12
    high = 0.488
    return extract_freq_power(data, fs, low, high)

def extract_uhf(data, fs):
    low = 150
    high = 250
    return extract_freq_power(data, fs, low, high)

def extract_lf_hf_ratio(data, fs):
    lf = extract_lf(data, fs)
    hf = extract_hf(data, fs)
    return np.divide(lf, hf)

In [10]:
# EDA feature extraction

# Minimum threshold by which to exclude SCRs (peaks) as relative to the largest amplitude in the signal (from neurokit documentation)
MIN_AMP = 0.3 
def extract_eda_features_nk(signal, fs):
    signal = signal.astype(np.double)
    signal = hp.scale_data(signal)
    signal = scipy.ndimage.median_filter(signal, int(fs))  # Median smoothing to reject outliers
    signals, info = nk.eda_process(signal, sampling_rate=fs)
    phasic = signals["EDA_Phasic"].to_numpy()
    tonic = signals["EDA_Tonic"].to_numpy()

    peak_info = nk.eda_findpeaks(phasic, fs, amplitude_min=MIN_AMP)
    peak_idx = peak_info["SCR_Peaks"].astype(int)
    peak_amps = peak_info["SCR_Height"]
    peaks = np.zeros(phasic.shape)
    np.put(peaks, peak_idx, [1])
    tonic = tonic - peaks

    return tonic, peaks

def extract_mean_scl(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    out = []

    tonic, _ = extract_eda_features_nk(data, fs)

    if tonic is None:
        print("mean SCL is None")
        return None
    
    n = tonic.size

    if stop >= n:
        segment = tonic
        segment_mean = np.mean(segment)
        out.append(segment_mean)
    while stop < n:
        stop = start + window_size
        segment = tonic[start:stop]
        segment_mean = np.mean(segment)
        out.append(segment_mean)
        start = stop - overlap
    mean_scl = list(out)
    return mean_scl

def extract_scr_rate(data, fs):
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    out = []

    _, peaks = extract_eda_features_nk(data, fs)

    if peaks is None:
        print("SCR rate is None")
        return None

    n = peaks.size
    
    if stop >= n:
        segment = peaks
        num_peaks = sum(segment)
        out.append(num_peaks)
    while stop < n:
        stop = start + window_size
        segment = peaks[start:stop]
        num_peaks = sum(segment)
        out.append(num_peaks)
        start = stop - overlap
    scr_rate = list(out)
    return scr_rate


In [11]:
# EMG feature extraction

WINDOW_SIZE = 60
OVERLAP = 0.25

def extract_emg_psd_1(data, fs):
    low = 0
    high = 50
    return extract_freq_power(data, fs, low, high)
    
def extract_emg_psd_2(data, fs):
    low = 50
    high = 100
    return extract_freq_power(data, fs, low, high)
    
def extract_emg_psd_3(data, fs):
    low = 100
    high = 150
    return extract_freq_power(data, fs, low, high)
    
def extract_emg_psd_4(data, fs):
    low = 150
    high = 200
    return extract_freq_power(data, fs, low, high)
    
def extract_emg_psd_5(data, fs):
    low = 200
    high = 250
    return extract_freq_power(data, fs, low, high)
    
def extract_emg_psd_6(data, fs):
    low = 250
    high = 300
    return extract_freq_power(data, fs, low, high)
    
def extract_emg_psd_7(data, fs):
    low = 300
    high = 350
    return extract_freq_power(data, fs, low, high)

In [12]:
# RESP feature extraction

WINDOW_SIZE = 60
OVERLAP = 0.25

def extract_resp(data, fs):
    return nk.rsp_process(data, fs)

def extract_resp_mean_inh(data, fs):
    data = extract_resp(data, fs)
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        # extract features
        feature = segment["RSP_Phase"]
        feature = feature.value_counts().get(1, 0)
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = segment["RSP_Phase"]
        feature = feature.value_counts().get(1, 0)
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

def extract_resp_mean_exh(data, fs):
    data = extract_resp(data, fs)
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        # extract features
        feature = segment["RSP_Phase"]
        feature = feature.value_counts().get(0)
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = segment["RSP_Phase"]
        feature = feature.value_counts().get(0)
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

def extract_resp_std_inh(data, fs):
    data = extract_resp(data, fs)
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        # extract features
        feature = segment["RSP_Phase"]
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = segment["RSP_Phase"]
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

def extract_resp_std_exh(data, fs):
    data = extract_resp(data, fs)
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        # extract features
        feature = segment["RSP_Phase"]
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = segment["RSP_Phase"]
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

def extract_resp_inh_exh(data, fs):
    data = extract_resp(data, fs)
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        # extract features
        feature = segment["RSP_Phase"]
        inh = feature.value_counts().get(1, 0)
        exh = feature.value_counts().get(0)
        feature = inh/exh
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = segment["RSP_Phase"]
        inh = feature.value_counts().get(1, 0)
        exh = feature.value_counts().get(0)
        feature = inh/exh
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

def extract_resp_vol(data, fs):
    data = extract_resp(data, fs)
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        # extract features
        feature = np.mean(segment["RSP_RVT"])
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = np.mean(segment["RSP_RVT"])
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

def extract_resp_rate(data, fs):
    data = extract_resp(data, fs)
    window_size = int(WINDOW_SIZE*fs)
    overlap = int(OVERLAP*fs)

    start = 0
    stop = start + window_size
    n = data.size
    out = []

    if stop >= n:
        segment = data
        # extract features
        feature = np.mean(segment["RSP_Rate"])
        out.append(feature)

    while stop < n:
        stop = start + window_size
        segment = data[start:stop]
        # extract features
        feature = np.mean(segment["RSP_Rate"])
        out.append(feature)
        start = stop - overlap

    out = list(out)
    return out

def extract_resp_duration(data, fs):
    pass



In [None]:
# Set up parameters
from affecteval.signal_acquisition.signal_acquisition import SignalAcquisition
from affecteval.signal_preprocessor.signal_preprocessor import SignalPreprocessor
from affecteval.feature_extractor.feature_extractor import FeatureExtractor
from affecteval.label_generator.label_generator import LabelGenerator
from affecteval.classification.estimator import Estimator
from affecteval.pipeline.pipeline import Pipeline


signal_types = [
    Signals.ECG,
    Signals.EDA,
    Signals.EMG,
    Signals.RESP,
    Signals.TEMP,
]
feature_names = [
    Features.ECG_MEAN, Features.ECG_MEDIAN, Features.ECG_STD, Features.ECG_VAR,
    Features.HR, Features.HR_MEAN, Features.RMSSD, Features.RMSSD_MEAN, Features.SDNN, Features.NN50, Features.TINN,
    Features.ULF, Features.LF, Features.HF, Features.UHF, Features.LF_NORM, Features.HF_NORM, Features.LF_HF,
    Features.EDA_MEAN, Features.EDA_MEDIAN, Features.EDA_STD, Features.EDA_VAR, Features.EDA_SLOPE, Features.EDA_RANGE,
    Features.MEAN_SCL, Features.SCR_RATE,
    Features.EMG_MEAN, Features.EMG_MEDIAN, Features.EMG_STD, Features.EMG_VAR, Features.EMG_10, Features.EMG_90, Features.EMG_NUM_PEAKS, Features.EMG_PEAK_MEAN, Features.EMG_PEAK_STD, Features.EMG_PEAK_AMP,
    Features.EMG_F_MEAN, Features.EMG_F_MED, Features.EMG_F_PEAK, Features.EMG_PSD_1, Features.EMG_PSD_2, Features.EMG_PSD_3, Features.EMG_PSD_4, Features.EMG_PSD_5, Features.EMG_PSD_6, Features.EMG_PSD_7,
    Features.RESP_MEAN_INH, Features.RESP_MEAN_EXH, Features.RESP_STD_INH, Features.RESP_STD_EXH, Features.RESP_INH_EXH, Features.RESP_RANGE, Features.RESP_VOL, Features.RESP_RATE, Features.RESP_DURATION,
    Features.TEMP_MEAN, Features.TEMP_STD, Features.TEMP_MIN, Features.TEMP_MAX, Features.TEMP_SLOPE, Features.TEMP_RANGE
]

preprocessing_methods = {
    "ECG": preprocess_ecg,
    "EDA": preprocess_eda,
    "EMG": preprocess_emg,
    "RESP": preprocess_resp,
    "TEMP": preprocess_temp
}

feature_extraction_methods = {
    "ECG": {
        Features.ECG_MEAN: extract_mean,
        Features.ECG_MEDIAN: extract_med,
        Features.ECG_STD: extract_std,
        Features.ECG_VAR: extract_var,
        # Features.HR: extract_hr, Features.HR_MEAN: extract_hr_mean, 
        # Features.RMSSD: extract_rmssd, Features.RMSSD_MEAN: extract_rmssd, 
        # Features.SDNN: extract_sdnn, Features.NN50: extract_nn50, Features.TINN: extract_tinn,
        # Features.ULF: extract_ulf, Features.LF: extract_lf, Features.HF: extract_hf, Features.UHF: extract_uhf, 
        # Features.LF_NORM: extract_lf, Features.HF_NORM: extract_hf, 
        # Features.LF_HF: extract_lf_hf_ratio,
    },
    "EDA": {
        Features.EDA_MEAN: extract_mean,
        Features.EDA_MEDIAN: extract_med,
        Features.EDA_STD: extract_std,
        Features.EDA_VAR: extract_var,
        # Features.EDA_SLOPE: extract_slope,
        Features.EDA_RANGE: extract_range,
        # Features.MEAN_SCL: extract_mean_scl, Features.SCR_RATE: extract_scr_rate,
    },
    "EMG": {
        Features.EMG_MEAN: extract_mean,
        Features.EMG_MEDIAN: extract_med,
        Features.EMG_STD: extract_std,
        Features.EMG_VAR: extract_var,

    },
    "RESP": {

    },
    "TEMP": {
        Features.TEMP_MEAN: extract_mean,
        Features.TEMP_STD: extract_std,
        Features.TEMP_MIN: extract_min,
        Features.TEMP_MAX: extract_max,
        Features.TEMP_SLOPE: extract_slope
    }
}

### Three-class classification

In [None]:
from affecteval.signal_acquisition.signal_acquisition import SignalAcquisition
from affecteval.signal_preprocessor.signal_preprocessor import SignalPreprocessor
from affecteval.feature_extractor.feature_extractor import FeatureExtractor
from affecteval.label_generator.label_generator import LabelGenerator
from affecteval.classification.estimator import Estimator
from affecteval.pipeline.pipeline import Pipeline

import warnings
warnings.filterwarnings("ignore")


label_gen = generate_labels_3_class
signal_acq = SignalAcquisition(signal_types=signal_types, source_folder=SOURCE_FOLDER)
signal_preprocessor = SignalPreprocessor(skip=True, resample_rate=250)
feature_extractor = FeatureExtractor(feature_extraction_methods=feature_extraction_methods, calculate_mean=False)
label_generator = LabelGenerator(label_generation_method=label_gen)

models = {
    "DT": DecisionTreeClassifier(criterion="entropy"),
    "RF": RandomForestClassifier(criterion="entropy", n_estimators=100),
    "AB": AdaBoostClassifier(n_estimators=100),
    "LDA": LinearDiscriminantAnalysis(),
    "KNN": KNeighborsClassifier(n_neighbors=9)
}

accs = {
    "DT": [],
    "RF": [],
    "AB": [],
    "LDA": [],
    "KNN": [],
}

f1s = {
    "DT": [],
    "RF": [],
    "AB": [],
    "LDA": [],
    "KNN": [],
}

true = {
    "DT": [],
    "RF": [],
    "AB": [],
    "LDA": [],
    "KNN": [],
}

preds = {
    "DT": [],
    "RF": [],
    "AB": [],
    "LDA": [],
    "KNN": [],
}

estimator_train_val_test = Estimator(2, models, name="Classification: train-val-test", random_seed=36)

pipeline = Pipeline()

pipeline.generate_nodes_from_layers(
    [signal_acq, signal_preprocessor, feature_extractor, label_generator, estimator_train_val_test]
)

# We leave it up to the user to handle the final output of the pipeline. 
out = pipeline.run()

# Results
# fitted_models = out[0]
y_true = out[1]
y_preds = out[2]


for model_name in models.keys():
    model = models[model_name]
    acc = accuracy_score(y_true, y_preds[model_name])
    f1 = f1_score(y_true, y_preds[model_name], average='micro')

    true[model_name].append(y_true)
    preds[model_name].append(y_preds[model_name])
    accs[model_name].append(acc)
    f1s[model_name].append(f1)

Running node Signal Acquisition...
- Elapsed time: 0.0 s
Running node Signal Preprocessor...
- Elapsed time: 30.721 s
Running node Feature Extractor...


100%|██████████| 15/15 [00:00<00:00, 22.94it/s]


- Elapsed time: 0.658 s
Running node Label Generator...
(581, 16)
- Elapsed time: 0.003 s
Running node Classification: train-val-test...
DT --------------------------------------------------
Cross-validation acc: [0.87096774 0.8172043  0.79569892 0.77419355 0.82608696]
Cross-validation mean acc: 0.8168302945301542
Cross-validation std acc: 0.03249958705871656
Cross-validation f1: [0.86522444 0.81145471 0.77259818 0.74947138 0.84819653]
Cross-validation mean f1: 0.8093890495286399
Cross-validation std f1: 0.04375288042202031
RF --------------------------------------------------
Cross-validation acc: [0.91397849 0.90322581 0.93548387 0.84946237 0.92391304]
Cross-validation mean acc: 0.905212716222534
Cross-validation std acc: 0.029849399746023637
Cross-validation f1: [0.92325269 0.90164413 0.92364643 0.872429   0.93307954]
Cross-validation mean f1: 0.910810358922606
Cross-validation std f1: 0.02178366790866064
AB --------------------------------------------------
Cross-validation acc: [0

### Binary classification

In [None]:
from affecteval.signal_acquisition.signal_acquisition import SignalAcquisition
from affecteval.signal_preprocessor.signal_preprocessor import SignalPreprocessor
from affecteval.feature_extractor.feature_extractor import FeatureExtractor
from affecteval.label_generator.label_generator import LabelGenerator
from affecteval.feature_selector.feature_selector import FeatureSelector
from affecteval.classification.estimator import Estimator
from affecteval.pipeline.pipeline import Pipeline

import warnings
warnings.filterwarnings("ignore")


label_gen = generate_labels_binary
signal_acq = SignalAcquisition(signal_types=signal_types, source_folder=SOURCE_FOLDER)
signal_preprocessor = SignalPreprocessor(skip=True, resample_rate=250)
feature_extractor = FeatureExtractor(feature_extraction_methods=feature_extraction_methods, calculate_mean=False)
label_generator = LabelGenerator(label_generation_method=label_gen)

models = {
    "DT": DecisionTreeClassifier(criterion="entropy"),
    "RF": RandomForestClassifier(criterion="entropy", n_estimators=100),
    "AB": AdaBoostClassifier(n_estimators=100),
    "LDA": LinearDiscriminantAnalysis(),
    "KNN": KNeighborsClassifier(n_neighbors=9)
}

accs = {
    "DT": [],
    "RF": [],
    "AB": [],
    "LDA": [],
    "KNN": [],
}

f1s = {
    "DT": [],
    "RF": [],
    "AB": [],
    "LDA": [],
    "KNN": [],
}

true = {
    "DT": [],
    "RF": [],
    "AB": [],
    "LDA": [],
    "KNN": [],
}

preds = {
    "DT": [],
    "RF": [],
    "AB": [],
    "LDA": [],
    "KNN": [],
}

estimator_train_val_test = Estimator(2, models, name="Classification: train-val-test", random_seed=36)

pipeline = Pipeline()

pipeline.generate_nodes_from_layers(
    [signal_acq, signal_preprocessor, feature_extractor, label_generator, estimator_train_val_test]
)

# We leave it up to the user to handle the final output of the pipeline. 
out = pipeline.run()

# Results
# fitted_models = out[0]
y_true = out[1]
y_preds = out[2]


for model_name in models.keys():
    model = models[model_name]
    acc = accuracy_score(y_true, y_preds[model_name])
    f1 = f1_score(y_true, y_preds[model_name], average='micro')

    true[model_name].append(y_true)
    preds[model_name].append(y_preds[model_name])
    accs[model_name].append(acc)
    f1s[model_name].append(f1)

Running node Signal Acquisition...
- Elapsed time: 0.0 s
Running node Signal Preprocessor...
- Elapsed time: 30.564 s
Running node Feature Extractor...


100%|██████████| 15/15 [00:00<00:00, 23.31it/s]


- Elapsed time: 0.647 s
Running node Label Generator...
- Elapsed time: 0.003 s
Running node Classification: train-val-test...
DT --------------------------------------------------
Cross-validation acc: [0.96774194 0.93548387 0.93548387 0.96774194 0.9673913 ]
Cross-validation mean acc: 0.9547685834502104
Cross-validation std acc: 0.01574642232318585
Cross-validation f1: [0.967584   0.93548387 0.9131041  0.92438965 0.95652174]
Cross-validation mean f1: 0.9394166713903971
Cross-validation std f1: 0.020097819019838786
RF --------------------------------------------------
Cross-validation acc: [0.96774194 0.96774194 0.94623656 0.96774194 0.94565217]
Cross-validation mean acc: 0.9590229079008882
Cross-validation std acc: 0.010680183219333988
Cross-validation f1: [0.967584   0.96723154 0.92151709 0.9459926  0.95689575]
Cross-validation mean f1: 0.9518441955079183
Cross-validation std f1: 0.017111425498110247
AB --------------------------------------------------
Cross-validation acc: [0.93548