## PPG 特征提取与选择 Notebook
该 Notebook 演示如何使用 `feature.py` 模块：
1. 加载 PPG/ABP 数据
2. 信号分段
3. 提取 75 维特征
4. 准备 SBP/DBP 标签
5. 特征离散化 (ChiMerge)
6. BEFS 特征选择

In [1]:
from feat import load_ppg_abp_from_folder, segment_signal, extract_ppg_features_75, discretize_features, befs_feature_selection
import pandas as pd

In [2]:
# 1. 加载数据
folder_path = 'data'  # 替换为实际数据文件夹路径
records = load_ppg_abp_from_folder(folder_path)
print(f'Loaded {len(records)} records')

Loaded 1 records


In [3]:
# 2. 信号分段
window_size = 125 * 8  # 2 秒 (125Hz * 2)
step_size = 125 * 3     # 1 秒 (125Hz)
segments = []
for rec in records:
    segs = segment_signal(rec['ppg'], rec['abp'], window_size, step_size)
    segments.extend(segs)
print(f'Total segments: {len(segments)}')

Total segments: 169


In [4]:
# 3. 提取特征
feature_list = []
for seg in segments:
    feat = extract_ppg_features_75(seg['ppg_segment'], fs=125)
    feature_list.append(feat)
df_features = pd.DataFrame(feature_list)
df_features.head()

Unnamed: 0,Mean,Median,Std,Var,IQR,RMS,Skew,Kurtosis,Min,Max,...,RR_mean,RR_SDNN,RR_RMSSD,pNN50,SD1,SD2,SD1_SD2,RR_LF,RR_HF,RR_LF_HF
0,-0.036177,-0.102374,0.54086,0.29253,0.722086,0.541799,0.45054,0.156142,-1.391402,1.651701,...,0.6952,0.102828,0.176665,0.333333,0.132457,0.060018,2.206964,0.0,0.0,0.0
1,0.005987,-0.074843,0.581892,0.338598,0.855299,0.581631,0.359481,-0.334393,-1.391402,1.651701,...,0.666182,0.140449,0.223342,0.5,0.166469,0.108351,1.536389,0.0,0.0,0.0
2,0.00887,-0.074066,0.598588,0.358308,0.924687,0.598355,0.363102,-0.507467,-1.391402,1.651701,...,0.650909,0.116167,0.160679,0.3,0.118593,0.113688,1.043144,0.0,0.0,0.0
3,-0.010188,-0.074504,0.543612,0.295514,0.956647,0.543436,0.306123,-1.200751,-0.874907,1.01949,...,0.7072,0.0086,0.009978,0.0,0.007364,0.009679,0.760754,0.0,0.0,0.0
4,-0.01115,-0.067648,0.554153,0.307086,0.979305,0.553988,0.310152,-1.204956,-0.852994,1.01949,...,0.7128,0.005903,0.007055,0.0,0.005249,0.006491,0.808707,0.0,0.0,0.0


In [5]:
# 4. 准备 SBP/DBP 标签
sbp_values = [seg['abp_segment'].max() for seg in segments]
dbp_values = [seg['abp_segment'].min() for seg in segments]
print(f'Prepared {len(sbp_values)} SBP labels and {len(dbp_values)} DBP labels')

Prepared 169 SBP labels and 169 DBP labels


In [6]:
# 5. 特征离散化 (ChiMerge)
bins = discretize_features(df_features, sbp_values, k=10, alpha=0.05)
bins

{'Mean': array([-0.03617723, -0.01857951, -0.01274794, -0.00585671,  0.00029196,
         0.00639938,  0.01237809,  0.01848959]),
 'Median': array([-0.12264285, -0.10237419, -0.08628649, -0.07406557, -0.06227451,
        -0.04963727, -0.03811344, -0.02530123, -0.01350237]),
 'Std': array([0.51445525, 0.53275413, 0.5501332 , 0.56665935, 0.58453991,
        0.60114094, 0.61853363, 0.63593006, 0.64182054]),
 'Var': array([0.26727985, 0.28782843, 0.30879   , 0.32913878, 0.34872123,
        0.36770745, 0.38888487, 0.40440704, 0.4119336 ]),
 'IQR': array([0.72208553, 0.84489182, 0.87873104, 0.93039846, 0.9728099 ,
        1.01340812, 1.05649963, 1.0960356 ]),
 'RMS': array([0.51420262, 0.53252614, 0.54989196, 0.56668757, 0.58429066,
        0.60123614, 0.61822444, 0.63565851, 0.64153476]),
 'Skew': array([0.1228989 , 0.19558167, 0.23059771, 0.26616906, 0.30612322,
        0.34266099, 0.37406103, 0.38514397]),
 'Kurtosis': array([-1.12253034, -0.9987378 , -0.9150238 , -0.33439316]),
 'Min': a

In [7]:
# 6. BEFS 特征选择
final_features, selected_sbp, selected_dbp = befs_feature_selection(df_features, sbp_values, dbp_values, k=20)
print('Final selected feature indices:', final_features)
print('SBP optimized features:', selected_sbp)
print('DBP optimized features:', selected_dbp)

Final selected feature indices: [0, 4, 6, 8, 9, 10, 14, 16, 17, 18, 19, 21, 23, 29, 36, 45, 46, 47, 48, 50, 56, 57, 61, 63, 64, 65, 66, 69, 70, 71]
SBP optimized features: [0, 4, 6, 8, 9, 10, 14, 19, 21, 23, 36, 45, 46, 48, 56, 61, 63, 65, 69, 70, 71]
DBP optimized features: [6, 8, 14, 16, 17, 18, 19, 21, 29, 45, 47, 48, 50, 57, 61, 63, 64, 65, 66, 69, 70]
