In [None]:
# TRAINING
from src.config import load_config
from src.data_loading import load_dataset
from src.preprocessing import preprocess_dataset
from src.windows import create_windows
from src.features import extract_features_from_window
from src.models_ml import build_ml_model
from src.evaluate import evaluate_ml
from src.explain import get_shap_explainer
import pandas as pd
import numpy as np

cfg = load_config("configs/wesad_stress.yaml")
raw = load_dataset(cfg["dataset"]["name"], cfg["dataset"]["raw_path"])
processed = preprocess_dataset(raw, cfg["preprocessing"], cfg["dataset"]["interim_path"])

windows = create_windows(processed,
                         cfg["preprocessing"]["sampling_rate"],
                         cfg["windowing"]["window_size_sec"],
                         cfg["windowing"]["stride_sec"])

X = windows["X"]
y = windows["y"]
channels = [k for k in processed[list(processed.keys())[0]].keys() if k!="label"]

# Extract features
feat_list = [extract_features_from_window(X[i], channels, cfg["preprocessing"]["sampling_rate"]) for i in range(len(X))]
df = pd.DataFrame(feat_list).dropna()
df["label"] = y[:len(df)]
Xf = df.drop("label",axis=1).values
yf = df["label"].values

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
X_train, X_test, y_train, y_test = train_test_split(Xf, yf, test_size=0.2, random_state=42, stratify=yf)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = build_ml_model(cfg)
model.fit(X_train, y_train)
evaluate_ml(model, X_test, y_test)

In [None]:
# SHAP
import shap

X_sample = shap.sample(X_train, 200)
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_sample)

shap.summary_plot(shap_values, X_sample, feature_names=df.drop("label",axis=1).columns.tolist())