This notebook is used for model training

In [5]:
import pickle
import neurokit2 as nk
import pandas as pd
import os

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np


In [107]:
import os
import pickle
import pandas as pd
import neurokit2 as nk

data_path = './'
segment_duration_sec = 10
sampling_rate = 700  # ECG信号采样率700Hz
segment_length = int(segment_duration_sec * sampling_rate)  # 小段采样点数量

In [108]:
features_list = []

subject_list = ['S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9', 'S10', 'S11', 'S13', 'S14', 'S15', 'S16', 'S17']

for subject in subject_list:
    try:
        with open(os.path.join(data_path, subject, f'{subject}.pkl'), 'rb') as file:
            data = pickle.load(file, encoding='latin1')

        ecg_signal = data['signal']['chest']['ECG'].squeeze()
        labels = data['label']

        for emotion_label in [1, 2, 3]:  # 只处理静息、压力、娱乐三种情绪
            idx = (labels == emotion_label)
            ecg_segment = ecg_signal[idx]

            # 分小段处理
            total_segments = len(ecg_segment) // segment_length

            for i in range(total_segments):
                segment = ecg_segment[i * segment_length : (i + 1) * segment_length]

                # 如果小段太短，跳过
                if len(segment) < segment_length * 0.8:
                    continue

                # 提取小段的HRV特征
                signals, info = nk.ecg_process(segment, sampling_rate=sampling_rate)
                rpeaks_df = pd.DataFrame({"ECG_R_Peaks": info["ECG_R_Peaks"]})
                hrv_features = nk.hrv_time(rpeaks_df, sampling_rate=sampling_rate)

                row = {
                    'HRV_SDNN': hrv_features['HRV_SDNN'].values[0],
                    'Subject': subject,
                    'Emotion_Label': emotion_label,
                    'Segment_Start_Time_sec': i * segment_duration_sec 
                }
                features_list.append(row)

    except Exception as e:
        print(f"跳过{subject}, 出错原因: {e}")
        continue

dataset = pd.DataFrame(features_list)

print(f"总共提取了 {len(dataset)} 条小段数据。")
print(dataset.head())


  prophase = np.mod(np.angle(scipy.signal.hilbert(signal)), pi2)
  prophase = np.mod(np.angle(scipy.signal.hilbert(signal)), pi2)
  prophase = np.mod(np.angle(scipy.signal.hilbert(signal)), pi2)
  prophase = np.mod(np.angle(scipy.signal.hilbert(signal)), pi2)
  prophase = np.mod(np.angle(scipy.signal.hilbert(signal)), pi2)
  prophase = np.mod(np.angle(scipy.signal.hilbert(signal)), pi2)
  prophase = np.mod(np.angle(scipy.signal.hilbert(signal)), pi2)
  prophase = np.mod(np.angle(scipy.signal.hilbert(signal)), pi2)
  prophase = np.mod(np.angle(scipy.signal.hilbert(signal)), pi2)
  prophase = np.mod(np.angle(scipy.signal.hilbert(signal)), pi2)
  prophase = np.mod(np.angle(scipy.signal.hilbert(signal)), pi2)
  prophase = np.mod(np.angle(scipy.signal.hilbert(signal)), pi2)
  prophase = np.mod(np.angle(scipy.signal.hilbert(signal)), pi2)
  prophase = np.mod(np.angle(scipy.signal.hilbert(signal)), pi2)
  prophase = np.mod(np.angle(scipy.signal.hilbert(signal)), pi2)
  prophase = np.mod(np.an

总共提取了 3301 条小段数据。
     HRV_SDNN Subject  Emotion_Label  Segment_Start_Time_sec
0   74.714878      S2              1                       0
1   55.837223      S2              1                      10
2  100.542745      S2              1                      20
3  100.516523      S2              1                      30
4   94.079862      S2              1                      40


In [39]:
dataset['HRV_SDNN_lag1'] = dataset['HRV_SDNN'].shift(1)
dataset['HRV_SDNN_lag2'] = dataset['HRV_SDNN'].shift(2)
cols = ['HRV_SDNN', 'HRV_SDNN_lag1', 'HRV_SDNN_lag2']

In [62]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

# 1. 特征和标签
X = dataset[cols]  # 🚀 用3个特征
y = dataset['Emotion_Label'] - 1

# 均值填充
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

# 2. 特征标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)

# 3. 切分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# 4. 训练模型
model = RandomForestClassifier()
model.fit(X_train, y_train)

# 5. 预测并评估
y_pred = model.predict(X_test)
print("模型预测准确率:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=['Baseline', 'Stress', 'Amusement']))

模型预测准确率: 0.5351681957186545
              precision    recall  f1-score   support

    Baseline       0.59      0.81      0.68       182
      Stress       0.40      0.31      0.35        85
   Amusement       0.18      0.03      0.06        60

    accuracy                           0.54       327
   macro avg       0.39      0.38      0.36       327
weighted avg       0.46      0.54      0.48       327



In [63]:
import xgboost as xgb

model = xgb.XGBClassifier(
    objective='multi:softmax',  # 多分类
    num_class=3,
    eval_metric='mlogloss',
    max_depth=5,
    learning_rate=0.1,
    n_estimators=100,
    use_label_encoder=False
)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("XGBoost模型预测准确率:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=['Baseline', 'Stress', 'Amusement']))


XGBoost模型预测准确率: 0.5259938837920489
              precision    recall  f1-score   support

    Baseline       0.57      0.81      0.67       182
      Stress       0.38      0.25      0.30        85
   Amusement       0.29      0.07      0.11        60

    accuracy                           0.53       327
   macro avg       0.41      0.37      0.36       327
weighted avg       0.47      0.53      0.47       327



TS model

In [114]:
import numpy as np

window_size = 5

X = X.fillna(0)

X_sequences = []
y_sequences = []

for i in range(len(X) - window_size):
    if (dataset.loc[i, 'Subject'] == dataset.loc[i + window_size - 1, 'Subject']) and \
       (dataset.loc[i, 'Emotion_Label'] == dataset.loc[i + window_size - 1, 'Emotion_Label']):
        
        X_seq = X[i:i+window_size]
        y_seq = y[i + window_size - 1]
        X_sequences.append(X_seq)
        y_sequences.append(y_seq)

X_sequences = np.array(X_sequences)
y_sequences = np.array(y_sequences)


In [115]:
X_train, X_test, y_train, y_test = train_test_split(
    X_sequences, y_sequences, test_size=0.2, random_state=42, stratify=y_sequences
)
print(f"训练集大小：{X_train.shape}, 测试集大小：{X_test.shape}")

训练集大小：(817, 5, 3), 测试集大小：(205, 5, 3)


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping

model = Sequential()
model.add(LSTM(64, input_shape=(window_size, 3)))
model.add(Dense(3, activation='softmax'))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=50,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1
)

Epoch 1/50


2025-04-28 00:25:26.282135: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2025-04-28 00:25:26.430827: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2025-04-28 00:25:26.552092: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




2025-04-28 00:25:27.240236: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2025-04-28 00:25:27.305890: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50


In [130]:
from sklearn.utils import class_weight

class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weights = dict(enumerate(class_weights))
print(class_weights)

history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=50,
    batch_size=32,
    callbacks=[early_stopping],
    class_weight=class_weights,
    verbose=1
)

{0: 0.6175359032501889, 1: 1.1115646258503402, 2: 2.078880407124682}
Epoch 1/50


2025-04-28 00:25:42.078914: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2025-04-28 00:25:42.231712: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


 1/21 [>.............................] - ETA: 14s - loss: 1.0306 - accuracy: 0.5625

2025-04-28 00:25:42.361052: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50


In [131]:
y_pred_prob = model.predict(X_test)
y_pred = np.argmax(y_pred_prob, axis=1)

cm = confusion_matrix(y_test, y_pred)

print("LSTM模型预测准确率:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=['Baseline', 'Stress', 'Amusement']))
cm

LSTM模型预测准确率: 0.6585365853658537
              precision    recall  f1-score   support

    Baseline       0.67      0.89      0.76       111
      Stress       0.76      0.52      0.62        61
   Amusement       0.27      0.12      0.17        33

    accuracy                           0.66       205
   macro avg       0.57      0.51      0.52       205
weighted avg       0.63      0.66      0.63       205



2025-04-28 00:25:44.184758: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2025-04-28 00:25:44.231153: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


array([[99,  6,  6],
       [24, 32,  5],
       [25,  4,  4]])

In [83]:
dataset.HRV_SDNN.describe()

count    1087.000000
mean       62.256580
std        30.766753
min         2.851807
25%        40.816451
50%        58.267642
75%        77.565408
max       215.585800
Name: HRV_SDNN, dtype: float64

In [84]:
model.save('lstm_emotion_model.h5')