### An example implementation of the CAREforMe pipeline using the WESAD dataset

In [72]:
%load_ext autoreload
%autoreload 2

import os
import sys
module_path = os.path.abspath(os.path.join("../care_for_me"))
sys.path.insert(0, module_path)
module_path = os.path.abspath(os.path.join(".."))
sys.path.insert(0, module_path)

import numpy as np
import pandas as pd
import wesad

from care_for_me import signals

subject_indices = list(range(2, 12)) + list(range(13, 18))
SUBJECTS = [str(i) for i in subject_indices]

# NOTE: Change ROOT_DIR according to your own file structure. This will be the only place you will need to do this.
ROOT_DIR = "/Users/emilyzhou/Desktop/Research/CAREForMe/"
DATA_DIR = os.path.join(ROOT_DIR, "data")
WESAD_PATH = os.path.join(DATA_DIR, "WESAD")
SOURCE_FOLDER = os.path.join(WESAD_PATH, "formatted")
ANNOTATIONS = os.path.join(WESAD_PATH, "annotations")
METRICS = os.path.join(DATA_DIR, "metrics", "WESAD")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
# Format data to be compatible with CAREforMe pipeline
# Only needs to be run once locally 
# Status: COMPLETE, do not re-run
import glob
import os


# wesad.reformat_and_save_data(WESAD_PATH)

# Rename Medi_1 and Medi_2 files to Medi1 and Medi2
files_to_rename = glob.glob(os.path.join(SOURCE_FOLDER, "*/*Medi_*.csv"))
for file in files_to_rename:
    file_name = file.split("_")
    new_name = f"{file_name[0]}_{file_name[1]}{file_name[2]}_{file_name[3]}"
    os.rename(file, new_name)

In [98]:
# print(wesad.generate_labels(ANNOTATIONS, threshold="dynamic"))

def generate_labels(data):
    annotations = wesad.generate_labels(ANNOTATIONS, threshold="dynamic")
    labels = []
    for i in range(data.shape[0]):
        subject = int(data["subject"].iloc[i])
        phase = data["Phase"].iloc[i]
        label_row = annotations.loc[(annotations["subject"] == subject)]
        label = label_row[phase]
        labels.append(label)
    labels = np.array(labels).ravel()
    return labels, data

In [99]:
# Build pipeline with default preprocessing and feature extraction methods
# Asynchronous directed acyclic graph

# from care_for_me import feature_extractor
# from care_for_me import signal_acquisition
# from care_for_me import signal_preprocessor
# from care_for_me import pipeline
import random

from care_for_me.signal_acquisition.signal_acquisition import SignalAcquisition
from care_for_me.signal_preprocessor.signal_preprocessor import SignalPreprocessor
from care_for_me.feature_extractor.feature_extractor import FeatureExtractor
from care_for_me.label_generator.label_generator import LabelGenerator
from care_for_me.feature_selector.feature_selector import FeatureSelector
from care_for_me.classification.estimator import Estimator
from care_for_me.pipeline.pipeline import Pipeline

from sklearn.svm import SVC

# TODO: WESAD data includes phases labeled Medi_1 and Medi_2. Currently, the filename parsing 
# treats these two phases as the same; need to fix.

signal_types = [
    signals.Signals.ECG,
    signals.Signals.EDA
]
feature_names = [
    signals.Features.HR, signals.Features.RMSSD, signals.Features.SDNN,
    # signals.Features.HR,
    signals.Features.MEAN_SCL, signals.Features.SCR_RATE
]

labels = os.path.join(WESAD_PATH, "annotations")
# label_gen = "phase"
# label_gen = "subject"
label_gen = generate_labels
signal_acq = SignalAcquisition(signal_types=signal_types, source_folder=SOURCE_FOLDER)
signal_preprocessor = SignalPreprocessor(skip=True, resample_rate=100)
feature_extractor = FeatureExtractor()
label_generator = LabelGenerator(label_generation_method=label_gen)

model = SVC()
feature_selector = FeatureSelector(model, feature_names, num_features=3)
estimator_train = Estimator(0, model, name="SVC training")
estimator_test = Estimator(1, model, name="SVC testing")
estimator_train_val_test = Estimator(labels, 2, model, name="SVC train-val-test")

pipeline = Pipeline()

pipeline.generate_nodes_from_layers(
    # [signal_acq, signal_preprocessor, feature_extractor]
    # [signal_acq, signal_preprocessor, feature_extractor, label_generator, feature_selector]
    [signal_acq, signal_preprocessor, feature_extractor, label_generator, feature_selector, estimator_train, estimator_test]
    # [signal_acq, signal_preprocessor, feature_extractor, feature_selector, estimator_train_val_test]
)

# We leave it up to the user to handle the final output of the pipeline. 
out = pipeline.run()

Running node Signal Acquisition...
Elapsed time for Signal Acquisition: 9.5367431640625e-07
Running node Signal Preprocessor...
Elapsed time for Signal Preprocessor: 0.7328920364379883
Running node Feature Extractor...


100%|██████████| 15/15 [00:01<00:00,  8.30it/s]


Elapsed time for Feature Extractor: 1.8093688488006592
Running node Label Generator...
Elapsed time for Label Generator: 0.044515132904052734
Running node Feature Selector...
Elapsed time for Feature Selector: 0.12047791481018066
Running node SVC training...
['heart rate', 'RMSSD', 'SDNN', 'mean SCL', 'SCR rate']
Elapsed time for SVC training: 0.0009889602661132812
Running node SVC testing...
Elapsed time for SVC testing: 0.0006010532379150391


In [100]:
from sklearn.metrics import accuracy_score, f1_score

# fitted_model = out[0]
y_true = out[1]
y_pred = out[2]

acc = accuracy_score(y_true, y_pred)
# f1 = f1_score(y_true, y_pred)

print(f"Accuracy: {acc}")
# print(f"F1-score: {f1}")

Accuracy: 0.84
