In [28]:
import os
import pickle
import neurokit2 as nk
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


data_path = "/Users/lzy/Downloads/WESAD"
subjects = ["S2", "S3", "S5", "S8", "S11", "S17"]
label_map = {1: "Calm", 2: "Sad", 3: "Happy"}

sampling_rate = 700
segment_length = sampling_rate * 60 * 10

features = []
labels = []

for subj in subjects:
    subj_path = os.path.join(data_path, subj, subj + ".pkl")
    with open(subj_path, 'rb') as f:
        data = pickle.load(f, encoding='latin1')

    ecg_raw = data['signal']['chest']['ECG']
    label_raw = data['label']

    for label_value, emotion in label_map.items():
        indices = (label_raw == label_value).nonzero()[0]
        if len(indices) == 0:
            continue

        start_idx = indices[0]
        end_idx = min(start_idx + segment_length, len(ecg_raw))
        ecg_segment = ecg_raw[start_idx:end_idx].squeeze()

        try:
            
            signals, info = nk.ecg_process(ecg_segment, sampling_rate=sampling_rate)
            rpeaks = info["ECG_R_Peaks"]

           
            hrv_features = nk.hrv(signals, rpeaks=rpeaks, sampling_rate=sampling_rate, show=False)

            RMSSD = hrv_features["HRV_RMSSD"].values[0]
            SDNN = hrv_features["HRV_SDNN"].values[0]
            LF_HF = hrv_features.get("HRV_LFHF", pd.Series([0.0])).values[0]
            BPM = signals["ECG_Rate"].dropna().mean()

            features.append([RMSSD, SDNN, LF_HF, BPM])
            labels.append(emotion)

        except Exception as e:
            print(f"[!] Error in handling the {subj} label {label_value} : {e}")


df = pd.DataFrame(features, columns=["RMSSD", "SDNN", "LF_HF", "BPM"])
df["Emotion"] = labels

scaler = StandardScaler()
X = scaler.fit_transform(df[["RMSSD", "SDNN", "LF_HF", "BPM"]])

le = LabelEncoder()
y = le.fit_transform(df["Emotion"])


X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=42)


clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)


print("\nAccuracy:", accuracy_score(y_test, y_pred))


Accuracy: 0.8333333333333334


In [29]:
from joblib import dump

dump(clf, "random_forest_model.pkl")
dump(scaler, "scaler.pkl")
dump(le, "label_encoder.pkl")

['label_encoder.pkl']

In [30]:
import os
import pickle
import json
import neurokit2 as nk
import pandas as pd
from joblib import load

clf = load('random_forest_model.pkl') 
scaler = load('scaler.pkl')            
le = load('label_encoder.pkl')         

data_path = "/Users/lzy/Downloads/WESAD"
subjects = ["S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S10", "S11", "S13", "S14", "S15", "S16", "S17"]
sampling_rate = 700
segment_length = sampling_rate * 60 * 10 

results = {}

for subj in subjects:
    subj_path = os.path.join(data_path, subj, f"{subj}.pkl")
    try:
        with open(subj_path, 'rb') as f:
            data = pickle.load(f, encoding='latin1')
        
        ecg_raw = data['signal']['chest']['ECG'].squeeze()
        
        for seg_idx in range(0, len(ecg_raw), segment_length):
            ecg_segment = ecg_raw[seg_idx:seg_idx + segment_length]

            try:
                signals, info = nk.ecg_process(ecg_segment, sampling_rate=sampling_rate)
                rpeaks = info["ECG_R_Peaks"]
                
                hrv_time = nk.hrv_time(rpeaks, sampling_rate=sampling_rate, show=False)
                hrv_freq = nk.hrv_frequency(rpeaks, sampling_rate=sampling_rate, show=False)

                RMSSD = hrv_features.get("HRV_RMSSD", [0.0])[0]
                SDNN = hrv_features.get("HRV_SDNN", [0.0])[0]
                LF_HF = hrv_features.get("HRV_LFHF", [0.0])[0]
                BPM = round(signals["ECG_Rate"].dropna().mean())

                features_df = pd.DataFrame([[RMSSD, SDNN, LF_HF, BPM]], 
                                          columns=["RMSSD", "SDNN", "LF_HF", "BPM"])
                features = scaler.transform(features_df)
                pred = clf.predict(features)[0]
                emotion = le.inverse_transform([pred])[0]

                results[str(int(BPM))] = emotion

            except Exception as e:
                print(f"Error processing {subj} segment {seg_idx//segment_length}: {str(e)}")
                continue
                
    except Exception as e:
        print(f"Error loading {subj}: {str(e)}")
        continue

with open("predictions.json", "w") as f:
    json.dump(results, f, indent=4)

print("The result has been saved as predictions.json")

The result has been saved as predictions.json
