In [55]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import LeaveOneOut, KFold, train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import matplotlib.pyplot as plt
import seaborn as sns
import glob
from tqdm import tqdm
from torch.utils.data import DataLoader,TensorDataset, Subset
from torchviz import make_dot
from torchsummary import summary
import random


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('device = ',device)


device =  cuda


In [56]:
def create_features(ppg_data, segment_length = 625):
    num_segments = int(len(ppg_data) / segment_length)
    features = []
    for i in range(num_segments):
        segment = ppg_data[i*segment_length : (i+1)*segment_length]
        features.append(segment.values.flatten())
    return np.array(features)

# Load data
# 读取文件列表
ppg_files = glob.glob('ft_data/PPG_data/*.csv')
bp_files = glob.glob('ft_data/BP_data/*.csv')
n = 1
k = random.randint(0, len(ppg_files)-n)
ppg_files = ppg_files[k:k+n]
bp_files = bp_files[k:k+n]

X_list = []
y_list = []
y_scalers = []

i = 0
for ppg_file, bp_file in tqdm(zip(ppg_files, bp_files), total = len(ppg_files)):
        ppg_data = pd.read_csv(ppg_file)
        bp_data = pd.read_csv(bp_file)
        # print(ppg_data.shape)
        # print(bp_data.shape)
        
        #ppg提取特征并分段
        ppg_segments = create_features(ppg_data['PPG']) # (num_segments, 625)
        # print(ppg_segments.shape)

        #裁剪BP数据
        y_sbp = bp_data['SBP'][:len(ppg_segments)]
        # # 去除NaN值
        # has_nan = np.isnan(y_sbp).any() 
        # if ((has_nan) == True):
        #     print(f'文件{ppg_file}存在NaN值,跳过')

        y_dbp = bp_data['DBP'][:len(ppg_segments)]
        # print(y_sbp.shape, y_dbp.shape)

        #调整数据形状
        X = ppg_segments.reshape(-1, 1, 625)
        y = np.column_stack((y_sbp, y_dbp))
        
        #添加到列表
        X_list.extend(X)
        y_list.extend(y)


X = np.array(X_list)
y = np.array(y_list)

print(X.shape)
print(y.shape)

100%|██████████| 1/1 [00:00<00:00, 31.02it/s]

(68, 1, 625)
(68, 2)





In [57]:
class GRUBPModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(GRUBPModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # 定义 CNN 层
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=50, kernel_size=7, padding=3)
        self.conv2 = nn.Conv1d(in_channels=50, out_channels=50, kernel_size=7, padding=3)
        self.conv3 = nn.Conv1d(in_channels=50, out_channels=50, kernel_size=7, padding=3)

        self.bn1 = nn.BatchNorm1d(50)
        self.bn2 = nn.BatchNorm1d(50)
        self.bn3 = nn.BatchNorm1d(50)
        # GRU 层前的 Batch Normalization
        self.gru_bn = nn.BatchNorm1d(input_size)
        # GRU 层后的 Batch Normalization
        self.gru_out_bn = nn.BatchNorm1d(hidden_size)
        
        # 定义 GRU 层
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        
        # 定义全连接层
        self.fc1 = nn.Linear(625 * hidden_size, 64)
        self.fc1_bn = nn.BatchNorm1d(64)
        self.fc2 = nn.Linear(64, output_size)
        
    def forward(self, x):
        
        # 通过CNN层
        conv1_out = self.conv1(x)
        conv1_out = torch.relu(self.bn1(conv1_out))
        
        conv2_out = self.conv2(conv1_out)
        conv2_out = torch.relu(self.bn2(conv2_out))
        
        conv3_out = self.conv3(conv2_out)
        conv3_out = torch.relu(self.bn3(conv3_out))
        
        # lstm_input = conv1_out + conv3_out
        x = torch.cat((conv1_out, conv3_out), dim=1) #(batch_size, 100, 625)
        x= torch.transpose(x, 1, 2) # (batch_size, 625, 100)

        # 在 LSTM 之前进行 Batch Normalization
        x = x.transpose(1, 2)
        x = self.gru_bn(x)# input (batch_size, 100, 625)
        x = x.transpose(1, 2)
        
        
        # 初始化 GRU 的隐藏状态
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        # 通过GRU层
        out, _ = self.gru(x, (h0))# input(batch_size, seq_len, input_size)

        # 在 GRU之后进行 Batch Normalization
        out = out.transpose(1, 2)
        out = self.gru_out_bn(out)
        out = out.transpose(1, 2)
        
        # 展平操作
        out = out.contiguous().view(out.size(0), -1) # [batch_size, seq_len * hidden_size]

        # 通过全连接层
        out = self.fc1(out)
        out = torch.relu(self.fc1_bn(out))
        out = self.fc2(out)
        # out = torch.relu(out)

        return out

input_size = 100
hidden_size = 25
num_layers = 1
output_size = 2

model = GRUBPModel(input_size, hidden_size, num_layers, output_size).to(device)

num_epochs = 5
batch_size = 32


# 损失函数和优化器
criterion = nn.L1Loss()

In [58]:
def train_loo(model, train_loader, val_loader, scaler_y, criterion, optimizer, epochs):
    train_loss_list = []
    val_loss_list = []
    
    for epoch in range(epochs):
        running_loss = 0.0
        for inputs, targets in train_loader:
            model.train()
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        train_avg_loss = running_loss / len(train_loader)
        #print('Epoch: {}/{}, Average Train Loss: {:.6f}'.format(epoch+1, epochs, train_avg_loss))
        train_loss_list.append(train_avg_loss)
        torch.cuda.empty_cache()
        
        val_avg_loss, SBP_errors, DBP_errors = evaluate(model, val_loader, scaler_y, criterion)
        #print('Average Val Loss: {:.6f}'.format(val_avg_loss))
        #print(SBP_errors, DBP_errors)
        val_loss_list.append(val_avg_loss)

    
    return train_loss_list, val_loss_list, SBP_errors, DBP_errors


def evaluate(model, val_loader, scaler_y, criterion): #在函数外计算loss
    model.eval()

    y_pred = []
    y_true = []
    SBP_pred_list = []
    DBP_pred_list = []
    SBP_ture_list = []
    DBP_ture_list = []

    SBP_errors = []
    DBP_errors = []

    with torch.no_grad():
        val_loss = 0.0
        for inputs, targets in val_loader:
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            val_loss += loss.item()
            y_pred.append(outputs)
            y_true.append(targets)

    y_pred = torch.cat(y_pred, dim=0)
    y_true = torch.cat(y_true, dim=0)
    val_loss /= len(val_loader) # 求得单次val的平均loss

    # 逆标准化
    y_pred = scaler_y.inverse_transform(y_pred.cpu().numpy()) 
    y_true = scaler_y.inverse_transform(y_true.cpu().numpy())
    #print('y_pred:', y_pred)

    SBP_pred_list = np.concatenate((SBP_pred_list, y_pred[:,0]), axis=0)
    SBP_ture_list= np.concatenate((SBP_ture_list, y_true[:,0]), axis=0)

    DBP_pred_list = np.concatenate((DBP_pred_list, y_pred[:,1]), axis=0)
    DBP_ture_list= np.concatenate((DBP_ture_list, y_true[:,1]), axis=0)

    SBP_errors=(mean_absolute_error(SBP_ture_list,SBP_pred_list))
    DBP_errors=(mean_absolute_error(DBP_ture_list, DBP_pred_list))

    return val_loss, SBP_errors, DBP_errors


In [59]:
# 留一交叉验证
# 对模型参数进行微调

loo = LeaveOneOut()

all_train_losses = []
all_val_losses = []
all_SBP_errors = []
all_DBP_errors = []

#标准化
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X = scaler_X.fit_transform(X.reshape(-1, 625)).reshape(-1, 625, 1)
X = X.transpose(0, 2, 1)
y = scaler_y.fit_transform(y)


for train_idx, val_idx in tqdm(loo.split(X), total = len(X), desc='LOO Cross Validation'):
    #划分数据集
    X_train_np, X_val_np = X[train_idx], X[val_idx]
    y_train_np, y_val_np = y[train_idx], y[val_idx]

    X_train = torch.tensor(X_train_np, dtype=torch.float32).to(device)
    X_val = torch.tensor(X_val_np, dtype=torch.float32).to(device)
    y_train = torch.tensor(y_train_np, dtype=torch.float32).to(device)
    y_val = torch.tensor(y_val_np, dtype=torch.float32).to(device)

    # 使用dataloader加载数据
    train_dataset = TensorDataset(X_train, y_train)
    val_dataset = TensorDataset(X_val, y_val)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    # 创建模型实例
    model = GRUBPModel(input_size, hidden_size, num_layers, output_size).to(device)

    # 加载预训练模型参数
    model.load_state_dict(torch.load('pretraind_model_params1.pth'))
    #model.load_state_dict(torch.load('checkpoint/model_epoch_20.pth'))
    
    # 冻结参数，只解冻需要微调的层
    for name, param in model.named_parameters():
        if 'conv3' in name or 'fc2' in name or 'bn' in name:
            param.requires_grad = True
        else:
            param.requires_grad = False
    
    # 优化器
    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3)

    # 训练和验证
    train_loss_list, val_loss_list, SBP_errors, DBP_errors = train_loo(model, train_loader, val_loader, scaler_y, criterion, optimizer, num_epochs)

    all_SBP_errors.append(SBP_errors)
    all_DBP_errors.append(DBP_errors)

# 打印交叉验证结果

mean_SBP_error = np.mean(all_SBP_errors)
mean_DBP_error = np.mean(all_DBP_errors)

print(f'Mean SBP Error: {mean_SBP_error:.6f}')
print(f'Mean DBP Error: {mean_DBP_error:.6f}')

LOO Cross Validation: 100%|██████████| 68/68 [00:12<00:00,  5.44it/s]

Mean SBP Error: 4.618418
Mean DBP Error: 1.654856





In [61]:
# 留一交叉验证
# 不经微调的原模型参数

loo = LeaveOneOut()

all_train_losses = []
all_val_losses = []
all_SBP_errors = []
all_DBP_errors = []


for train_idx, val_idx in tqdm(loo.split(X), total = len(X), desc='LOO Cross Validation'):
    #划分数据集
    X_train_np, X_val_np = X[train_idx], X[val_idx]
    y_train_np, y_val_np = y[train_idx], y[val_idx]

    X_train = torch.tensor(X_train_np, dtype=torch.float32).to(device)
    X_val = torch.tensor(X_val_np, dtype=torch.float32).to(device)
    y_train = torch.tensor(y_train_np, dtype=torch.float32).to(device)
    y_val = torch.tensor(y_val_np, dtype=torch.float32).to(device)

    # 使用dataloader加载数据
    train_dataset = TensorDataset(X_train, y_train)
    val_dataset = TensorDataset(X_val, y_val)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    # 创建模型实例
    model = GRUBPModel(input_size, hidden_size, num_layers, output_size).to(device)

    # 加载预训练模型参数
    model.load_state_dict(torch.load('pretraind_model_params1.pth'))
    # model.load_state_dict(torch.load('checkpoint/model_epoch_20.pth'))
    
    # 冻结参数，只解冻需要微调的层
    for param in model.parameters():
            param.requires_grad = False
    
    # 优化器
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    # 训练和验证
    # train_loss_list, val_loss_list, SBP_errors, DBP_errors = train_loo(model, train_loader, val_loader, scaler_y, criterion, optimizer, num_epochs)
    val_avg_loss, SBP_errors,DBP_errors = evaluate(model, val_loader, scaler_y, criterion)

    all_SBP_errors.append(SBP_errors)
    all_DBP_errors.append(DBP_errors)

# 打印交叉验证结果

mean_SBP_error = np.mean(all_SBP_errors)
mean_DBP_error = np.mean(all_DBP_errors)

print(f'Mean SBP Error: {mean_SBP_error:.6f}')
print(f'Mean DBP Error: {mean_DBP_error:.6f}')

LOO Cross Validation: 100%|██████████| 68/68 [00:02<00:00, 30.62it/s]

Mean SBP Error: 5.116440
Mean DBP Error: 1.918725



