### An example implementation of the CAREforMe pipeline using the WESAD dataset

In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys
module_path = os.path.abspath(os.path.join("../care_for_me"))
sys.path.insert(0, module_path)
module_path = os.path.abspath(os.path.join(".."))
sys.path.insert(0, module_path)

import numpy as np
import pandas as pd
import wesad

from care_for_me import signals


ROOT_DIR = "C:\\Users\\zhoux\\Desktop\\Projects\\CAREforMe"
DATA_DIR = os.path.join(ROOT_DIR, "data")
WESAD_PATH = os.path.join(DATA_DIR, "WESAD")
METRICS = os.path.join(DATA_DIR, "metrics", "WESAD")

subject_indices = list(range(2, 12)) + list(range(13, 18))
SUBJECTS = [str(i) for i in subject_indices]

In [None]:
# Format data to be compatible with CAREforMe pipeline
# Only needs to be run once locally 
# Status: COMPLETE, do not re-run
wesad.reformat_and_save_data()

In [3]:
print(wesad.generate_labels(threshold="dynamic"))

    subject  Base_STAI  TSST_STAI  Medi_1_STAI  Fun_STAI  Medi_2_STAI
0         2          0          1            0         0            0
1         3          0          1            0         0            0
2         4          0          1            1         0            1
3         5          0          1            0         1            0
4         6          0          1            0         1            0
5         7          0          1            0         0            1
6         8          0          1            1         0            0
7         9          0          1            0         1            0
8        10          0          1            0         0            0
9        11          0          1            0         0            0
10       13          1          1            0         0            1
11       14          0          1            0         0            0
12       15          1          1            0         0            0
13       16         

In [61]:
# Build pipeline with default preprocessing and feature extraction methods
# Asynchronous directed acyclic graph

# from care_for_me import feature_extractor
# from care_for_me import signal_acquisition
# from care_for_me import signal_preprocessor
# from care_for_me import pipeline
import random

from care_for_me.feature_extractor.feature_extractor import FeatureExtractor
from care_for_me.signal_acquisition.signal_acquisition import SignalAcquisition
from care_for_me.signal_preprocessor.signal_preprocessor import SignalPreprocessor
from care_for_me.feature_selector.feature_selector import FeatureSelector
from care_for_me.classification.estimator import Estimator
from care_for_me.pipeline.pipeline import Node, Pipeline

from sklearn.svm import SVC

# TODO: WESAD data includes phases labeled Medi_1 and Medi_2. Currently, the filename parsing 
# treats these two phases as the same; need to fix.

# Change wesad_path according to your local file structure
wesad_path = "C:\\Users\\zhoux\\Desktop\\Projects\\CAREforMe\\data\\WESAD"
source_folder = os.path.join(wesad_path, "formatted")
signal_types = [
    signals.Signals.ECG,
    signals.Signals.EDA
]
feature_names = [
    signals.Features.HR, signals.Features.RMSSD, signals.Features.SDNN,
    # signals.Features.HR,
    signals.Features.MEAN_SCL, signals.Features.SCR_RATE
]

signal_acq = SignalAcquisition(source_folder=source_folder, signal_types=signal_types)
signal_preprocessor = SignalPreprocessor(skip=True, resample_rate=100)
feature_extractor = FeatureExtractor()

labels = [random.choice([0, 1]) for _ in range(len(wesad.SUBJECTS))]
model = SVC()
feature_selector = FeatureSelector(model, feature_names, labels, num_features=3)
estimator_train = Estimator(labels, 0, model, name="SVC training")
estimator_test = Estimator(labels, 1, model, name="SVC testing")
estimator_train_val_test = Estimator(labels, 2, model, name="SVC train-val-test")

pipeline = Pipeline()

pipeline.generate_nodes_from_layers(
    [signal_acq, signal_preprocessor, feature_extractor]
    # [signal_acq, signal_preprocessor, feature_extractor, feature_selector, estimator_train, estimator_test]
    # [signal_acq, signal_preprocessor, feature_extractor, feature_selector, estimator_train_val_test]
)

# We leave it up to the user to handle the final output of the pipeline. 
out = pipeline.run()
fitted_model = out[0]
y_true = out[1]
y_pred = out[2]

Running node Signal Acquisition...
Elapsed time for Signal Acquisition: 0.0
Running node Signal Preprocessor...
Elapsed time for Signal Preprocessor: 0.7106266021728516
Running node Feature Extractor...
10
['Fun', 'Fun', 'Base', 'Base', 'TSST', 'TSST', 'Medi', 'Medi', 'Medi', 'Medi']
5
[[99.55597857993477], [94.39108353141918], [87.48371724269373], [100.05424360496107], [91.9960558719887]]
5
[[10.870900173777482], [22.85939028359967], [73.39326555326059], [6.871842709362768], [11.964232409262916]]
5
[[20.58626040948328], [47.85509737559403], [85.8835382530153], [15.137884778117304], [14.793821103999562]]
5
[[861.5115325691801], [846.6819291521989], [866.146652375675], [848.2536197245486], [853.3276361926398]]
5
[[3.0], [6.0], [3.0], [2.0], [4.0]]


ValueError: All arrays must be of the same length