#### NOTE:
The following subjects did not complete the speech exposure phase and were removed:
- 57
- 93
- 16
- 87
- 8
- 21
- 88
- 84
- 23

The following subjects did not complete the bug exposure task and were removed: 
- 4

### An example implementation of the AffectEval pipeline using the APD dataset

In [None]:
%load_ext autoreload
%autoreload 2

import os
import sys
module_path = os.path.abspath(os.path.join("../affecteval"))
sys.path.insert(0, module_path)
module_path = os.path.abspath(os.path.join(".."))
sys.path.insert(0, module_path)

import numpy as np
import pandas as pd
import apd

from affecteval import signals
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score


ROOT_DIR = "/Users/emilyzhou/Desktop/Research/CAREForMe/"
DATA_DIR = os.path.join(ROOT_DIR, "data")
APD_PATH = os.path.join(DATA_DIR, "APD")
SOURCE_FOLDER = os.path.join(APD_PATH, "formatted")
METRICS = os.path.join(DATA_DIR, "metrics", "APD")

ALL = "all"
HA = "high_anxiety_group"
LA = "low_anxiety_group"

ha_participant_indices = [
    '4', '6', '7', '8', '10', '12', '15', '16', '18', '22', '26', '27', '29', '31', '32', '33', '35', '42', '45', '47', '48', '49', '54', '55', '66', '69'
]

la_participant_indices = [
    '14', '21', '23', '25', '34', '39', '43', '46', '51', '57', '71', '72', '77', '78', '79', '80', '82', '83', '84', '85', '87', '88', '89', '91', '92', '93'
]

SUBJECTS = ha_participant_indices.extend(la_participant_indices)

In [None]:
# Format data to be compatible with CAREforMe pipeline
# Only needs to be run once locally
# Status: COMPLETE, do not re-run
apd.reformat_and_save_data(APD_PATH)

In [2]:
labels = apd.get_suds_labels(APD_PATH)
# print(labels)

def generate_labels(data):
    """
    Generate binary labels for APD based on the SUDS questionnaire and the input data format.
    
    Parameters
    --------------------
    :param data: Features to generate labels for. Must include subject ID and phase columns.
    :type data: pd.DataFrame

    Returns
    --------------------
    Generated labels and the unmodified input data.
    """
    print(data.shape)
    annotations = apd.get_suds_labels(APD_PATH)
    labels = []
    for i in range(data.shape[0]):
        subject = int(data["subject"].iloc[i])
        phase = data["Phase"].iloc[i]
        label_row = annotations.loc[(annotations["subject"] == subject)]
        label = label_row[phase]
        labels.append(label)
    labels = np.array(labels).ravel()
    return labels, data

### Binary stress classification

In [None]:
# Build pipeline with default preprocessing and feature extraction methods

from affecteval.signal_acquisition.signal_acquisition import SignalAcquisition
from affecteval.signal_preprocessor.signal_preprocessor import SignalPreprocessor
from affecteval.feature_extractor.feature_extractor import FeatureExtractor
from affecteval.label_generator.label_generator import LabelGenerator
from affecteval.feature_selector.feature_selector import FeatureSelector
from affecteval.classification.estimator import Estimator
from affecteval.pipeline.pipeline import Pipeline

from sklearn.svm import SVC


signal_types = [
    signals.Signals.ECG,
    signals.Signals.EDA
]
feature_names = [
    signals.Features.HR, signals.Features.RMSSD, signals.Features.SDNN,
    # signals.Features.HR,
    signals.Features.MEAN_SCL, signals.Features.SCR_RATE
]

label_gen = generate_labels
signal_acq = SignalAcquisition(source_folder=SOURCE_FOLDER, signal_types=signal_types)
signal_preprocessor = SignalPreprocessor(skip=True, resample_rate=100)
feature_extractor = FeatureExtractor()
label_generator = LabelGenerator(label_generation_method=label_gen)

model = SVC()
feature_selector = FeatureSelector(model, feature_names, num_features=3)
estimator_train = Estimator(0, model, name="SVC training")
estimator_test = Estimator(1, model, name="SVC testing")
estimator_train_val_test = Estimator(2, model, name="SVC train-val-test")

pipeline = Pipeline()


pipeline.generate_nodes_from_layers(
    # [signal_acq, signal_preprocessor, feature_extractor]
    # [signal_acq, signal_preprocessor, feature_extractor, label_generator, feature_selector]
    # [signal_acq, signal_preprocessor, feature_extractor, label_generator, feature_selector, estimator_train, estimator_test]
    [signal_acq, signal_preprocessor, feature_extractor, label_generator, feature_selector, estimator_train_val_test]
)

accs = []
aucs = []

for i in range(5):
    # We leave it up to the user to handle the final output of the pipeline. 
    out = pipeline.run()

    # Results
    # fitted_model = out[0]
    y_true = out[1]
    y_pred = out[2]

    acc = accuracy_score(y_true, y_pred)
    auc = roc_auc_score(y_true, y_pred)
    # f1 = f1_score(y_true, y_pred)

    accs.append(acc)
    aucs.append(auc)

print(f"\nMean accuracy: {np.mean(accs)}")
print(f"STD accuracy: {np.std(accs)}")
print(f"Mean AUC score: {np.mean(aucs)}")
print(f"STD AUC score: {np.std(aucs)}")
# print(f"F1-score: {f1}")

Running node Signal Acquisition...
- Elapsed time: 0.0 s
Running node Signal Preprocessor...


KeyboardInterrupt: 

### Subject identification

In [None]:
# Build pipeline with default preprocessing and feature extraction methods

from affecteval.signal_acquisition.signal_acquisition import SignalAcquisition
from affecteval.signal_preprocessor.signal_preprocessor import SignalPreprocessor
from affecteval.feature_extractor.feature_extractor import FeatureExtractor
from affecteval.label_generator.label_generator import LabelGenerator
from affecteval.feature_selector.feature_selector import FeatureSelector
from affecteval.classification.estimator import Estimator
from affecteval.pipeline.pipeline import Pipeline

from sklearn.svm import SVC


signal_types = [
    signals.Signals.ECG,
    signals.Signals.EDA
]
feature_names = [
    signals.Features.HR, signals.Features.RMSSD, signals.Features.SDNN,
    # signals.Features.HR,
    signals.Features.MEAN_SCL, signals.Features.SCR_RATE
]

label_gen = "subject"
signal_acq = SignalAcquisition(source_folder=SOURCE_FOLDER, signal_types=signal_types)
signal_preprocessor = SignalPreprocessor(skip=True, resample_rate=100)
feature_extractor = FeatureExtractor()
label_generator = LabelGenerator(label_generation_method=label_gen)

model = SVC()
feature_selector = FeatureSelector(model, feature_names, num_features=3)
estimator_train = Estimator(0, model, name="SVC training")
estimator_test = Estimator(1, model, name="SVC testing")
estimator_train_val_test = Estimator(2, model, name="SVC train-val-test")

pipeline = Pipeline()


pipeline.generate_nodes_from_layers(
    # [signal_acq, signal_preprocessor, feature_extractor]
    # [signal_acq, signal_preprocessor, feature_extractor, label_generator, feature_selector]
    # [signal_acq, signal_preprocessor, feature_extractor, label_generator, feature_selector, estimator_train, estimator_test]
    [signal_acq, signal_preprocessor, feature_extractor, label_generator, feature_selector, estimator_train_val_test]
)

out = pipeline.run()

# Results
# fitted_model = out[0]
y_true = out[1]
y_pred = out[2]

acc = accuracy_score(y_true, y_pred)
# f1 = f1_score(y_true, y_pred)

print(f"\nAccuracy: {acc}")
# print(f"F1-score: {f1}")

Running node Signal Acquisition...
- Elapsed time: 0.0 s
Running node Signal Preprocessor...
- Elapsed time: 11.389 s
Running node Feature Extractor...


100%|██████████| 42/42 [00:14<00:00,  2.92it/s]


- Elapsed time: 14.386 s
Running node Label Generator...
- Elapsed time: 0.0 s
Running node Feature Selector...
- Elapsed time: 1.567 s
Running node SVC train-val-test...
Cross-validation scores: [0.06666667 0.08333333 0.06666667 0.06666667 0.05      ]
- Elapsed time: 0.055 s

Accuracy: 0.039473684210526314




### Phase identification

In [None]:
# Build pipeline with default preprocessing and feature extraction methods

from affecteval.signal_acquisition.signal_acquisition import SignalAcquisition
from affecteval.signal_preprocessor.signal_preprocessor import SignalPreprocessor
from affecteval.feature_extractor.feature_extractor import FeatureExtractor
from affecteval.label_generator.label_generator import LabelGenerator
from affecteval.feature_selector.feature_selector import FeatureSelector
from affecteval.classification.estimator import Estimator
from affecteval.pipeline.pipeline import Pipeline

from sklearn.svm import SVC


signal_types = [
    signals.Signals.ECG,
    signals.Signals.EDA
]
feature_names = [
    signals.Features.HR, signals.Features.RMSSD, signals.Features.SDNN,
    # signals.Features.HR,
    signals.Features.MEAN_SCL, signals.Features.SCR_RATE
]

label_gen = "phase"
signal_acq = SignalAcquisition(source_folder=SOURCE_FOLDER, signal_types=signal_types)
signal_preprocessor = SignalPreprocessor(skip=True, resample_rate=100)
feature_extractor = FeatureExtractor()
label_generator = LabelGenerator(label_generation_method=label_gen)

model = SVC()
feature_selector = FeatureSelector(model, feature_names, num_features=3)
estimator_train = Estimator(0, model, name="SVC training")
estimator_test = Estimator(1, model, name="SVC testing")
estimator_train_val_test = Estimator(2, model, name="SVC train-val-test")

pipeline = Pipeline()


pipeline.generate_nodes_from_layers(
    # [signal_acq, signal_preprocessor, feature_extractor]
    # [signal_acq, signal_preprocessor, feature_extractor, label_generator, feature_selector]
    # [signal_acq, signal_preprocessor, feature_extractor, label_generator, feature_selector, estimator_train, estimator_test]
    [signal_acq, signal_preprocessor, feature_extractor, label_generator, feature_selector, estimator_train_val_test]
)

out = pipeline.run()

# Results
# fitted_model = out[0]
y_true = out[1]
y_pred = out[2]

acc = accuracy_score(y_true, y_pred)
# f1 = f1_score(y_true, y_pred)

print(f"\nAccuracy: {acc}")
# print(f"F1-score: {f1}")

Running node Signal Acquisition...
- Elapsed time: 0.0 s
Running node Signal Preprocessor...
- Elapsed time: 11.019 s
Running node Feature Extractor...


100%|██████████| 42/42 [00:14<00:00,  2.97it/s]


- Elapsed time: 14.153 s
Running node Label Generator...
- Elapsed time: 0.001 s
Running node Feature Selector...
- Elapsed time: 0.246 s
Running node SVC train-val-test...
Cross-validation scores: [0.16666667 0.11666667 0.11666667 0.08333333 0.18333333]
- Elapsed time: 0.03 s

Accuracy: 0.10526315789473684
