In [27]:
import pickle
import numpy as np
import neurokit2 as nk
import os
import pandas as pd

In [28]:
def categorize_emotion(row):
    if row['Arousal'] > 5 and row['Valence'] > 5:
        return 'HighArousal_HighValence'
    elif row['Arousal'] > 5 and row['Valence'] <= 5:
        return 'HighArousal_LowValence'
    elif row['Arousal'] <= 5 and row['Valence'] > 5:
        return 'LowArousal_HighValence'
    else:
        return 'LowArousal_LowValence'

In [49]:
import os
import pickle
import pandas as pd
import numpy as np
import neurokit2 as nk

data_dir = "data_preprocessed_python/"
all_results = []

sampling_rate = 128  # DEAP 数据的采样率

# 定义情感分类函数
def categorize_emotion(row):
    if row['Arousal'] > 5 and row['Valence'] > 5:
        return 'HighArousal_HighValence'
    elif row['Arousal'] > 5 and row['Valence'] <= 5:
        return 'HighArousal_LowValence'
    elif row['Arousal'] <= 5 and row['Valence'] > 5:
        return 'LowArousal_HighValence'
    else:
        return 'LowArousal_LowValence'

# 初始化一个日志列表，用于记录无法处理的文件和试验
error_log = []

# 提取每次试验的HRV特征
for file_name in os.listdir(data_dir):
    if file_name.endswith(".dat"):  
        file_path = os.path.join(data_dir, file_name)
        print(f"Processing file: {file_path}")
        
        # 加载每个被试的数据
        try:
            with open(file_path, 'rb') as file:
                data = pickle.load(file, encoding='latin1')
        except Exception as e:
            print(f"Failed to load file {file_name}: {e}")
            error_log.append({"File": file_name, "Trial": "ALL", "Error": str(e)})
            continue
        
        # 提取信号和标签
        signals = data['data']
        labels = data['labels']
        
        # 提取ECG信号 (第37通道)
        ecg_signals = signals[:, 37]
        
        # 提取每次试验的HRV特征
        hrv_features = []
        for trial_idx, trial_ecg in enumerate(ecg_signals):
            try:
                # 检查信号是否有效
                if trial_ecg is None or len(trial_ecg) == 0 or np.all(trial_ecg == 0):
                    print(f"Invalid ECG signal in file {file_name}, trial {trial_idx}")
                    error_log.append({"File": file_name, "Trial": trial_idx, "Error": "Invalid ECG signal"})
                    continue

                # 提取R波峰
                rpeaks = nk.ecg_findpeaks(trial_ecg, sampling_rate=sampling_rate)

                # 检查R波峰是否有效
                if 'ECG_R_Peaks' not in rpeaks or len(rpeaks['ECG_R_Peaks']) < 2:
                    print(f"No valid R-peaks detected in file {file_name}, trial {trial_idx}")
                    error_log.append({"File": file_name, "Trial": trial_idx, "Error": "No valid R-peaks detected"})
                    continue

                # 计算HRV指标
                hrv = nk.hrv_time(rpeaks, sampling_rate=sampling_rate)
                hrv_features.append(hrv)

            except Exception as e:
                print(f"Error processing trial {trial_idx} in file {file_name}: {e}")
                error_log.append({"File": file_name, "Trial": trial_idx, "Error": str(e)})
                continue

        # 转为DataFrame格式
        if hrv_features:
            hrv_df = pd.concat(hrv_features, ignore_index=True)
        else:
            print(f"No valid HRV features extracted for file {file_name}")
            error_log.append({"File": file_name, "Trial": "ALL", "Error": "No valid HRV features"})
            continue
                           
        # 将情感标签转为DataFrame
        labels_df = pd.DataFrame(labels, columns=['Arousal', 'Valence', 'Dominance', 'Liking'])
        
        # 合并HRV特征和情感标签
        result_df = pd.concat([hrv_df, labels_df[["Arousal", "Valence"]]], axis=1)
        
        # 添加情感分类列
        result_df['Emotion_Category'] = result_df.apply(categorize_emotion, axis=1)
        
        # 添加被试编号列（如 s01, s02...）
        result_df['Subject'] = file_name.split('.')[0]  # 提取文件名作为被试编号
        
        # 将每个被试的结果添加到总列表中
        all_results.append(result_df)

# 合并所有被试的DataFrame
if all_results:
    final_result_df = pd.concat(all_results, ignore_index=True)
else:
    final_result_df = pd.DataFrame()

# 删除包含 NaN 的列和行
final_result_df = final_result_df.dropna(axis=1, how='all')  # 删除所有值为 NaN 的列
final_result_df = final_result_df.dropna(axis=0, how='any')  # 删除包含 NaN 的行

# 输出无法处理的文件日志
error_log_df = pd.DataFrame(error_log)
print("Error Log:")
print(error_log_df)

# 查看最终结果
print(final_result_df.head())

# 如果需要保存结果
final_result_df.to_csv("deap_hrv_features.csv", index=False)
error_log_df.to_csv("deap_hrv_error_log.csv", index=False)



Processing file: data_preprocessed_python/s01.dat
Processing file: data_preprocessed_python/s02.dat
Processing file: data_preprocessed_python/s03.dat
Processing file: data_preprocessed_python/s04.dat
Processing file: data_preprocessed_python/s05.dat
Processing file: data_preprocessed_python/s06.dat
Processing file: data_preprocessed_python/s07.dat
Processing file: data_preprocessed_python/s08.dat
Processing file: data_preprocessed_python/s09.dat
Processing file: data_preprocessed_python/s10.dat
Processing file: data_preprocessed_python/s11.dat
Processing file: data_preprocessed_python/s12.dat
Processing file: data_preprocessed_python/s13.dat
Processing file: data_preprocessed_python/s14.dat
Processing file: data_preprocessed_python/s15.dat
Processing file: data_preprocessed_python/s16.dat
Processing file: data_preprocessed_python/s17.dat
Processing file: data_preprocessed_python/s18.dat
Processing file: data_preprocessed_python/s19.dat
Processing file: data_preprocessed_python/s20.dat


  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  out["RMSSD"] = np.sqrt(np.nanmean(diff_rri**2))
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  out["RMSSD"] = np.sqrt(np.nanmean(diff_rri**2))
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  out["RMSSD"] = np.sqrt(np.nanmean(diff_rri**2))
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  out["RMSSD"] = np.sqrt(np.nanmean(diff_rri**2))


No valid R-peaks detected in file s25.dat, trial 33
Processing file: data_preprocessed_python/s26.dat
No valid R-peaks detected in file s26.dat, trial 0
No valid R-peaks detected in file s26.dat, trial 1
No valid R-peaks detected in file s26.dat, trial 2
No valid R-peaks detected in file s26.dat, trial 3
No valid R-peaks detected in file s26.dat, trial 4
No valid R-peaks detected in file s26.dat, trial 6
No valid R-peaks detected in file s26.dat, trial 8
No valid R-peaks detected in file s26.dat, trial 9
No valid R-peaks detected in file s26.dat, trial 10
No valid R-peaks detected in file s26.dat, trial 11
No valid R-peaks detected in file s26.dat, trial 12
No valid R-peaks detected in file s26.dat, trial 13
No valid R-peaks detected in file s26.dat, trial 14
No valid R-peaks detected in file s26.dat, trial 15
No valid R-peaks detected in file s26.dat, trial 16
No valid R-peaks detected in file s26.dat, trial 17
No valid R-peaks detected in file s26.dat, trial 18
No valid R-peaks detec

  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  out["RMSSD"] = np.sqrt(np.nanmean(diff_rri**2))
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  out["RMSSD"] = np.sqrt(np.nanmean(diff_rri**2))
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


No valid R-peaks detected in file s27.dat, trial 39
Processing file: data_preprocessed_python/s28.dat
No valid R-peaks detected in file s28.dat, trial 13


  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  out["RMSSD"] = np.sqrt(np.nanmean(diff_rri**2))
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  out["RMSSD"] = np.sqrt(np.nanmean(diff_rri**2))
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  out["RMSSD"] = np.sqrt(np.nanmean(diff_rri**2))


No valid R-peaks detected in file s28.dat, trial 29
No valid R-peaks detected in file s28.dat, trial 34
Processing file: data_preprocessed_python/s29.dat
No valid R-peaks detected in file s29.dat, trial 1
No valid R-peaks detected in file s29.dat, trial 12


  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  out["RMSSD"] = np.sqrt(np.nanmean(diff_rri**2))


No valid R-peaks detected in file s29.dat, trial 30
No valid R-peaks detected in file s29.dat, trial 31
Processing file: data_preprocessed_python/s30.dat
No valid R-peaks detected in file s30.dat, trial 0
No valid R-peaks detected in file s30.dat, trial 6


  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  out["RMSSD"] = np.sqrt(np.nanmean(diff_rri**2))
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  out["RMSSD"] = np.sqrt(np.nanmean(diff_rri**2))


No valid R-peaks detected in file s30.dat, trial 12
No valid R-peaks detected in file s30.dat, trial 13
No valid R-peaks detected in file s30.dat, trial 15
No valid R-peaks detected in file s30.dat, trial 19
No valid R-peaks detected in file s30.dat, trial 20
No valid R-peaks detected in file s30.dat, trial 22
No valid R-peaks detected in file s30.dat, trial 23
No valid R-peaks detected in file s30.dat, trial 24


  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  out["RMSSD"] = np.sqrt(np.nanmean(diff_rri**2))


No valid R-peaks detected in file s30.dat, trial 29
No valid R-peaks detected in file s30.dat, trial 36


  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


No valid R-peaks detected in file s30.dat, trial 38
Processing file: data_preprocessed_python/s31.dat
No valid R-peaks detected in file s31.dat, trial 7


  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  out["RMSSD"] = np.sqrt(np.nanmean(diff_rri**2))
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  out["RMSSD"] = np.sqrt(np.nanmean(diff_rri**2))
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  out["RMSSD"] = np.sqrt(np.nanmean(diff_rri**2))
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  out["RMSSD"] = np.sqrt(np.nanmean(diff_rri**2))
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  out["RMSSD"] = np.sqrt(np.nanmean(diff_rri**2))


Processing file: data_preprocessed_python/s32.dat
No valid R-peaks detected in file s32.dat, trial 0
No valid R-peaks detected in file s32.dat, trial 1
No valid R-peaks detected in file s32.dat, trial 2
No valid R-peaks detected in file s32.dat, trial 3
No valid R-peaks detected in file s32.dat, trial 4
No valid R-peaks detected in file s32.dat, trial 6
No valid R-peaks detected in file s32.dat, trial 8
No valid R-peaks detected in file s32.dat, trial 9
No valid R-peaks detected in file s32.dat, trial 10
No valid R-peaks detected in file s32.dat, trial 12
No valid R-peaks detected in file s32.dat, trial 16
No valid R-peaks detected in file s32.dat, trial 17
No valid R-peaks detected in file s32.dat, trial 18
No valid R-peaks detected in file s32.dat, trial 19
No valid R-peaks detected in file s32.dat, trial 21
No valid R-peaks detected in file s32.dat, trial 22
No valid R-peaks detected in file s32.dat, trial 23
No valid R-peaks detected in file s32.dat, trial 24
No valid R-peaks detec

  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  out["RMSSD"] = np.sqrt(np.nanmean(diff_rri**2))
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  out["RMSSD"] = np.sqrt(np.nanmean(diff_rri**2))
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  out["RMSSD"] = np.sqrt(np.nanmean(diff_rri**2))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  out["RMSSD"] = np.sqrt(np.nanmean(diff_rri**2))


In [55]:
final_result_df

Unnamed: 0,HRV_MeanNN,HRV_SDNN,HRV_RMSSD,HRV_SDSD,HRV_CVNN,HRV_CVSD,HRV_MedianNN,HRV_MadNN,HRV_MCVNN,HRV_IQRNN,...,HRV_pNN50,HRV_pNN20,HRV_MinNN,HRV_MaxNN,HRV_HTI,HRV_TINN,Arousal,Valence,Emotion_Category,Subject
0,1423.018293,889.233701,1219.948829,1235.473991,0.624893,0.857297,1210.93750,671.803125,0.554779,867.187500,...,95.121951,95.121951,406.2500,4859.3750,20.500000,648.4375,7.71,7.60,HighArousal_HighValence,s01
1,1122.443182,624.945843,835.326799,842.529950,0.556773,0.744204,1015.62500,648.637500,0.638658,941.406250,...,92.727273,94.545455,359.3750,2773.4375,18.333333,218.7500,8.10,7.31,HighArousal_HighValence,s01
2,1543.359375,1143.761261,1498.892612,1518.434682,0.741086,0.971188,1140.62500,648.637500,0.568668,1046.875000,...,92.500000,95.000000,375.0000,6921.8750,13.333333,726.5625,8.58,7.54,HighArousal_HighValence,s01
3,1205.000000,759.754606,1204.462404,1216.345187,0.630502,0.999554,1046.87500,573.349219,0.547677,744.140625,...,94.000000,98.000000,343.7500,4539.0625,25.000000,210.9375,4.94,6.01,LowArousal_HighValence,s01
4,1449.590774,1173.989711,1585.736144,1605.059899,0.809877,1.093920,1066.40625,637.054688,0.597385,835.937500,...,95.238095,95.238095,367.1875,5984.3750,21.000000,546.8750,6.96,3.92,HighArousal_LowValence,s01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1237,4501.201923,3295.306953,5338.234716,5567.088774,0.732095,1.185958,3617.18750,2745.126562,0.758912,5132.812500,...,92.307692,92.307692,406.2500,10101.5625,13.000000,0.0000,1.00,9.00,LowArousal_HighValence,s31
1238,3109.786184,3090.312556,4416.437107,4544.058343,0.993738,1.420174,1539.06250,1540.514062,1.000943,5183.593750,...,94.736842,94.736842,414.0625,9906.2500,19.000000,0.0000,1.00,9.00,LowArousal_HighValence,s31
1240,11302.734375,4695.788570,6926.471915,8474.949443,0.415456,0.612814,12550.78125,3301.101562,0.263020,5244.140625,...,75.000000,75.000000,4875.0000,15234.3750,4.000000,0.0000,8.13,4.83,HighArousal_LowValence,s32
1241,13314.453125,9196.051184,16626.415833,20282.941735,0.690682,1.248749,14750.00000,7557.785156,0.512392,8744.140625,...,75.000000,75.000000,1023.4375,22734.3750,4.000000,0.0000,4.99,2.99,LowArousal_LowValence,s32


In [34]:
final_result_df

Unnamed: 0,HRV_MeanNN,HRV_SDNN,HRV_RMSSD,HRV_SDSD,HRV_CVNN,HRV_CVSD,HRV_MedianNN,HRV_MadNN,HRV_MCVNN,HRV_IQRNN,...,HRV_pNN50,HRV_pNN20,HRV_MinNN,HRV_MaxNN,HRV_HTI,HRV_TINN,Arousal,Valence,Emotion_Category,Subject
0,1423.018293,889.233701,1219.948829,1235.473991,0.624893,0.857297,1210.93750,671.803125,0.554779,867.187500,...,95.121951,95.121951,406.2500,4859.3750,20.500000,648.4375,7.71,7.60,HighArousal_HighValence,s01
1,1122.443182,624.945843,835.326799,842.529950,0.556773,0.744204,1015.62500,648.637500,0.638658,941.406250,...,92.727273,94.545455,359.3750,2773.4375,18.333333,218.7500,8.10,7.31,HighArousal_HighValence,s01
2,1543.359375,1143.761261,1498.892612,1518.434682,0.741086,0.971188,1140.62500,648.637500,0.568668,1046.875000,...,92.500000,95.000000,375.0000,6921.8750,13.333333,726.5625,8.58,7.54,HighArousal_HighValence,s01
3,1205.000000,759.754606,1204.462404,1216.345187,0.630502,0.999554,1046.87500,573.349219,0.547677,744.140625,...,94.000000,98.000000,343.7500,4539.0625,25.000000,210.9375,4.94,6.01,LowArousal_HighValence,s01
4,1449.590774,1173.989711,1585.736144,1605.059899,0.809877,1.093920,1066.40625,637.054688,0.597385,835.937500,...,95.238095,95.238095,367.1875,5984.3750,21.000000,546.8750,6.96,3.92,HighArousal_LowValence,s01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1275,,,,,,,,,,,...,,,,,,,3.91,6.96,LowArousal_HighValence,s32
1276,,,,,,,,,,,...,,,,,,,2.81,6.13,LowArousal_HighValence,s32
1277,,,,,,,,,,,...,,,,,,,3.05,7.01,LowArousal_HighValence,s32
1278,,,,,,,,,,,...,,,,,,,3.99,7.17,LowArousal_HighValence,s32


In [44]:
with open('data_preprocessed_python/s24.dat', 'rb') as file:
    data = pickle.load(file, encoding='latin1')

# 数据结构
ecg_signal = data['data'][:, 37]
rpeaks = nk.ecg_findpeaks(trial_ecg, sampling_rate=sampling_rate)
hrv = nk.hrv_time(rpeaks, sampling_rate=sampling_rate)
print(hrv)

  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  out["MeanNN"] = np.nanmean(rri)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


IndexError: index 0 is out of bounds for axis 0 with size 0

In [48]:
for trial_idx, trial_ecg in enumerate(ecg_signal):
    try:
        # 检查信号是否有效
        if trial_ecg is None or len(trial_ecg) == 0 or np.all(trial_ecg == 0):
            print(f"Trial {trial_idx}: Invalid ECG signal, skipping.")
            continue

        # 滤波提升信号质量

        # 检测R波峰
        rpeaks = nk.ecg_findpeaks(trial_ecg, sampling_rate=sampling_rate)
        if 'ECG_R_Peaks' not in rpeaks or len(rpeaks['ECG_R_Peaks']) < 2:
            print(f"Trial {trial_idx}: No valid R-peaks detected, skipping.")
            continue

        # 计算HRV指标
        hrv = nk.hrv_time(rpeaks, sampling_rate=sampling_rate)
        print(f"Trial {trial_idx}: HRV features calculated successfully.")

    except Exception as e:
        print(f"Trial {trial_idx}: Error occurred - {e}")
        continue


Trial 0: HRV features calculated successfully.
Trial 1: No valid R-peaks detected, skipping.
Trial 2: No valid R-peaks detected, skipping.
Trial 3: HRV features calculated successfully.
Trial 4: No valid R-peaks detected, skipping.
Trial 5: No valid R-peaks detected, skipping.
Trial 6: No valid R-peaks detected, skipping.
Trial 7: HRV features calculated successfully.
Trial 8: No valid R-peaks detected, skipping.
Trial 9: HRV features calculated successfully.
Trial 10: HRV features calculated successfully.
Trial 11: HRV features calculated successfully.
Trial 12: HRV features calculated successfully.
Trial 13: HRV features calculated successfully.
Trial 14: HRV features calculated successfully.
Trial 15: No valid R-peaks detected, skipping.
Trial 16: No valid R-peaks detected, skipping.
Trial 17: No valid R-peaks detected, skipping.
Trial 18: No valid R-peaks detected, skipping.
Trial 19: No valid R-peaks detected, skipping.
Trial 20: No valid R-peaks detected, skipping.
Trial 21: No v

In [42]:
import pickle
import numpy as np

data_dir = "data_preprocessed_python/"

# 检查文件内容是否一致
for file_name in os.listdir(data_dir):
    if file_name.endswith(".dat"):
        file_path = os.path.join(data_dir, file_name)
        try:
            with open(file_path, 'rb') as file:
                data = pickle.load(file, encoding='latin1')
            
            # 检查文件结构
            print(f"Processing {file_name}")
            print(f"Data keys: {list(data.keys())}")
            print(f"Data shape: {data['data'].shape}, Labels shape: {data['labels'].shape}")
        
        except Exception as e:
            print(f"Error reading file {file_name}: {e}")

Processing s01.dat
Data keys: ['labels', 'data']
Data shape: (40, 40, 8064), Labels shape: (40, 4)
Processing s02.dat
Data keys: ['labels', 'data']
Data shape: (40, 40, 8064), Labels shape: (40, 4)
Processing s03.dat
Data keys: ['labels', 'data']
Data shape: (40, 40, 8064), Labels shape: (40, 4)
Processing s04.dat
Data keys: ['labels', 'data']
Data shape: (40, 40, 8064), Labels shape: (40, 4)
Processing s05.dat
Data keys: ['labels', 'data']
Data shape: (40, 40, 8064), Labels shape: (40, 4)
Processing s06.dat
Data keys: ['labels', 'data']
Data shape: (40, 40, 8064), Labels shape: (40, 4)
Processing s07.dat
Data keys: ['labels', 'data']
Data shape: (40, 40, 8064), Labels shape: (40, 4)
Processing s08.dat
Data keys: ['labels', 'data']
Data shape: (40, 40, 8064), Labels shape: (40, 4)
Processing s09.dat
Data keys: ['labels', 'data']
Data shape: (40, 40, 8064), Labels shape: (40, 4)
Processing s10.dat
Data keys: ['labels', 'data']
Data shape: (40, 40, 8064), Labels shape: (40, 4)
Processing