# 2 PTT计算准备

## 步骤 2.1：数据加载与检查

In [2]:
import pandas as pd
import os

output_dir = '/root/PI_Lab/output/csv_output'
experiment_id = '1'  # 替换为具体实验ID

# 加载Biopac数据（血压）
biopac_file = f'{experiment_id}_biopac_aligned.csv'
biopac_df = pd.read_csv(os.path.join(output_dir, biopac_file))
print(f"Biopac数据列: {biopac_df.columns.tolist()}")
print(f"样本数: {len(biopac_df)}")

# 加载HUB数据
sensor_positions = {'sensor2': 'nose', 'sensor3': 'finger', 'sensor4': 'wrist', 'sensor5': 'ear'}
hub_data = {}
for sensor, position in sensor_positions.items():
    hub_file = f'{experiment_id}_hub_{sensor}_aligned.csv'
    hub_df = pd.read_csv(os.path.join(output_dir, hub_file))
    hub_data[sensor] = hub_df
    print(f"{position} ({sensor}) 数据列: {hub_df.columns.tolist()}")
    print(f"样本数: {len(hub_df)}")

Biopac数据列: ['timestamp', 'diastolic_bp', 'rsp', 'systemic_vascular_resistance', 'hr', 'cardiac_output', 'bp', 'systolic_bp', 'mean_bp', 'cardiac_index']
样本数: 66669
nose (sensor2) 数据列: ['timestamp', 'red', 'ir', 'green', 'ax', 'ay', 'az', 'rx', 'ry', 'rz', 'mx', 'my', 'mz', 'temp', 'time']
样本数: 66669
finger (sensor3) 数据列: ['timestamp', 'red', 'ir', 'green', 'ax', 'ay', 'az', 'rx', 'ry', 'rz', 'mx', 'my', 'mz', 'temp', 'time']
样本数: 66669
wrist (sensor4) 数据列: ['timestamp', 'red', 'ir', 'green', 'ax', 'ay', 'az', 'rx', 'ry', 'rz', 'mx', 'my', 'mz', 'temp', 'time']
样本数: 66669
ear (sensor5) 数据列: ['timestamp', 'red', 'ir', 'green', 'ax', 'ay', 'az', 'rx', 'ry', 'rz', 'mx', 'my', 'mz', 'temp', 'time']
样本数: 66669


## 步骤 2.2：PPG信号预处理

In [3]:
from scipy.signal import butter, filtfilt
import neurokit2 as nk

def butter_bandpass(lowcut, highcut, fs, order=4):
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    return b, a

def bandpass_filter(data, lowcut=0.5, highcut=8, fs=100, order=4):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = filtfilt(b, a, data)
    return y

# 假设采样率100Hz
fs = 100
for sensor, df in hub_data.items():
    for channel in ['red', 'ir', 'green']:
        if channel in df.columns:
            # 滤波
            filtered_signal = bandpass_filter(df[channel].values, fs=fs)
            df[f'{channel}_filtered'] = filtered_signal
            # 检测波峰
            peaks, _ = nk.signal_findpeaks(filtered_signal, height_min=filtered_signal.mean())
            df[f'{channel}_peaks'] = 0
            df.loc[peaks, f'{channel}_peaks'] = 1
            print(f"{sensor} {channel} 波峰数: {len(peaks)}")

ValueError: too many values to unpack (expected 2)

## 3. PTT计算

任务: 计算不同传感器间同一颜色通道的PTT。

In [4]:
import numpy as np

def calculate_ptt(ref_peaks, target_peaks, timestamps):
    ptt_values = []
    for ref_peak_idx in ref_peaks:
        ref_time = timestamps[ref_peak_idx]
        # 找到最近的target峰
        diff = np.abs(target_peaks - ref_peak_idx)
        if len(diff) > 0:
            target_peak_idx = target_peaks[np.argmin(diff)]
            target_time = timestamps[target_peak_idx]
            ptt = target_time - ref_time
            ptt_values.append(ptt)
    return np.mean(ptt_values) if ptt_values else np.nan

reference_sensor = 'sensor2'
ref_df = hub_data[reference_sensor]
timestamps = ref_df['timestamp'].values

for sensor, df in hub_data.items():
    if sensor != reference_sensor:
        for channel in ['red', 'ir', 'green']:
            if f'{channel}_peaks' in df.columns and f'{channel}_peaks' in ref_df.columns:
                ref_peaks = np.where(ref_df[f'{channel}_peaks'] == 1)[0]
                target_peaks = np.where(df[f'{channel}_peaks'] == 1)[0]
                ptt = calculate_ptt(ref_peaks, target_peaks, timestamps)
                print(f"PTT ({reference_sensor} -> {sensor}, {channel}): {ptt:.4f} 秒")

## 4. 血压预测模型构建

In [5]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# 准备特征和标签
X = []  # PTT特征
y_systolic = biopac_df['systolic_bp'].values
y_diastolic = biopac_df['diastolic_bp'].values

for sensor, df in hub_data.items():
    if sensor != reference_sensor:
        for channel in ['red', 'ir', 'green']:
            if f'{channel}_peaks' in df.columns and f'{channel}_peaks' in ref_df.columns:
                ref_peaks = np.where(ref_df[f'{channel}_peaks'] == 1)[0]
                target_peaks = np.where(df[f'{channel}_peaks'] == 1)[0]
                ptt = calculate_ptt(ref_peaks, target_peaks, timestamps)
                X.append(ptt)

X = np.array(X).reshape(-1, 1)

# 拆分训练集和测试集
X_train, X_test, y_s_train, y_s_test = train_test_split(X, y_systolic, test_size=0.2, random_state=42)
X_train, X_test, y_d_train, y_d_test = train_test_split(X, y_diastolic, test_size=0.2, random_state=42)

# 训练模型
model_systolic = LinearRegression()
model_systolic.fit(X_train, y_s_train)
y_s_pred = model_systolic.predict(X_test)
print(f"收缩压 MSE: {mean_squared_error(y_s_test, y_s_pred):.2f}")

model_diastolic = LinearRegression()
model_diastolic.fit(X_train, y_d_train)
y_d_pred = model_diastolic.predict(X_test)
print(f"舒张压 MSE: {mean_squared_error(y_d_test, y_d_pred):.2f}")

ValueError: Found input variables with inconsistent numbers of samples: [0, 66669]