In [190]:
import mindtorch.torch as torch
import pandas as pd
import mindtorch.torch.nn as nn
import mindtorch.torch.optim as optim
from mindtorch.torch.utils.data import DataLoader, TensorDataset
import mindtorch.torch.nn.functional as F
import pandas as pd
import numpy as np
#import torch
#import torch.nn as nn
#import torch.optim as optim
#from torch.utils.data import DataLoader, TensorDataset
#import torch.nn.functional as F


In [191]:
# 读取已有的 CSV 文件
input_file = 'user_health_data03.csv'
df = pd.read_csv(input_file)

# 按每个用户划分训练集和验证集
def split_data_by_user(df, train_ratio=0.8):
    user_ids = df['user_id'].unique()
    train_data = []
    val_data = []
    
    for user_id in user_ids:
        user_data = df[df['user_id'] == user_id].sort_values(by='date')
        train_size = int(len(user_data) * train_ratio)
        train_data.append(user_data.iloc[:train_size])
        val_data.append(user_data.iloc[train_size:])
    
    train_df = pd.concat(train_data)
    val_df = pd.concat(val_data)
    
    return train_df, val_df

# 将数据按用户划分为训练集和验证集
train_df, val_df = split_data_by_user(df)

# 保存数据到 CSV 文件
train_df.to_csv('train_data_with_noise.csv', index=False)
val_df.to_csv('val_data_with_noise.csv', index=False)

# 查看部分生成的数据
print("训练集前5条数据:")
print(train_df.head())

print("\n验证集前5条数据:")
print(val_df.head())




训练集前5条数据:
     user_id        date  steps  exercise_time  avg_heart_rate  \
999        1  2022-02-04  11470             22              68   
998        1  2022-02-05   9290             27              69   
997        1  2022-02-06   7218             59              85   
996        1  2022-02-07   9572             65              81   
995        1  2022-02-08   7615             36              66   

     max_heart_rate  sleep_duration  fatigue_level  relaxation_training  \
999             108        5.449322       6.107494                    0   
998             104        5.528365       5.405949                    0   
997             111        6.309658       2.857095                    0   
996             124        8.606211       3.488209                    0   
995              93        7.023586       6.106323                    0   

     height  weight  age  
999     175      70   30  
998     175      70   30  
997     175      70   30  
996     175      70   30  
995    

In [192]:
# 生成滑动窗口数据
def create_sliding_window_data(df, window_size=14):
    X, y = [], []
    
    # 对每个用户单独处理
    for user_id in df['user_id'].unique():
        user_data = df[df['user_id'] == user_id].sort_values(by='date')
        features = user_data[['steps', 'exercise_time', 'avg_heart_rate', 'max_heart_rate', 
                              'sleep_duration', 'fatigue_level', 
                              'height', 'weight', 'age']]
        target = user_data['relaxation_training']  # 目标是 relaxation_training
        
        # 使用滑动窗口生成数据
        for i in range(len(user_data) - window_size):
            X.append(features.iloc[i:i+window_size].values)  # 14天的输入数据
            y.append(target.iloc[i+window_size])  # 第15天的目标值

    X = np.array(X)
    y = np.array(y)
    
    return X, y


In [193]:
# 加载训练数据
df_train = pd.read_csv('train_data_with_noise.csv')  # 假设你的数据在这个文件中
X_train, y_train = create_sliding_window_data(df_train, window_size=3)

# 加载验证数据
df_val = pd.read_csv('val_data_with_noise.csv')  # 假设验证数据在这个文件中
X_val, y_val = create_sliding_window_data(df_val, window_size=2)

# 数据标准化
scaler = StandardScaler()
num_features = X_train.shape[2]  # 特征数量

# 标准化训练数据
X_train_reshaped = X_train.reshape(-1, num_features)  # 调整为二维
X_train_scaled = scaler.fit_transform(X_train_reshaped)  # 标准化
X_train_scaled = X_train_scaled.reshape(X_train.shape)  # 调整回三维

# 标准化验证数据
X_val_reshaped = X_val.reshape(-1, num_features)  # 调整为二维
X_val_scaled = scaler.transform(X_val_reshaped)  # 使用训练集的 scaler 进行标准化
X_val_scaled = X_val_scaled.reshape(X_val.shape)  # 调整回三维

# 打印数据维度
print("训练集 X 维度:", X_train_scaled.shape)  # (num_samples, window_size, num_features)
print("训练集 y 维度:", y_train.shape)  # (num_samples,)
print("验证集 X 维度:", X_val_scaled.shape)
print("验证集 y 维度:", y_val.shape)

训练集 X 维度: (2391, 3, 9)
训练集 y 维度: (2391,)
验证集 X 维度: (594, 2, 9)
验证集 y 维度: (594,)


In [194]:
# 转换为 PyTorch 张量
train_X_tensor = torch.tensor(train_X, dtype=torch.float32)
train_y_tensor = torch.tensor(train_y, dtype=torch.float32)
val_X_tensor = torch.tensor(val_X, dtype=torch.float32)
val_y_tensor = torch.tensor(val_y, dtype=torch.float32)

# 创建训练集和验证集的 TensorDataset
train_dataset = TensorDataset(train_X_tensor, train_y_tensor)
val_dataset = TensorDataset(val_X_tensor, val_y_tensor)

# 创建 DataLoader
batch_size = 64  # 批大小
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


In [201]:
class AttentionLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, num_heads, dropout_rate=0.5):
        super(AttentionLSTMModel, self).__init__()
        
        # 双层 LSTM
        self.lstm1 = nn.LSTM(input_size=input_size, hidden_size=hidden_size1, num_layers=1, batch_first=True)
        self.lstm2 = nn.LSTM(input_size=hidden_size1, hidden_size=hidden_size2, num_layers=1, batch_first=True)
        
        # 多头注意力层
        self.attention = nn.MultiheadAttention(embed_dim=hidden_size2, num_heads=num_heads, batch_first=True)

        self.dropout = nn.Dropout(dropout_rate)
        
        # 全连接层部分
        self.fc1 = nn.Linear(hidden_size2, 8)  # 第一个全连接层，将 32 维压缩到 16 维
        self.relu = nn.ReLU()                   # 激活函数
        self.fc2 = nn.Linear(8, output_size)   # 第二个全连接层，将 16 维压缩到 1 维

    def forward(self, x):
        # LSTM 层
        lstm_out, _ = self.lstm1(x)
        lstm_out, _ = self.lstm2(lstm_out)

        lstm_out = self.dropout(lstm_out)
        
        # 多头注意力层
        attn_output, _ = self.attention(lstm_out, lstm_out, lstm_out)
        
        # 取最后一个时间步的注意力输出
        attn_output = attn_output[:, -1, :]
        
        # 全连接层和激活函数
        out = self.fc1(attn_output)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# 定义超参数
input_size = 9  # 输入特征数
hidden_size1 = 32 # 第一层 LSTM 隐藏层大小
hidden_size2 = 16  # 第二层 LSTM 隐藏层大小
output_size = 1  # 输出为标量，用于回归
num_heads = 2  # 多头注意力头数
learning_rate = 0.02  # 学习率
num_epochs = 100  # 训练的轮数
batch_size = 64  # 批大小



In [202]:
# 实例化模型
model = AttentionLSTMModel(input_size=input_size, hidden_size1=hidden_size1, hidden_size2=hidden_size2, output_size=output_size, num_heads=num_heads)
print(model)

# 定义损失函数和优化器
criterion = nn.MSELoss()  # 使用均方误差损失
#optimizer = optim.Adam(model.parameters(), lr=learning_rate)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)  # L2 正则化

scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)  # 每10个epoch将学习率乘以0.1


# 训练模型
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        
        # 前向传播
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)
        
        # 反向传播和优化
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    scheduler.step()
    
    #print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")
    current_lr = scheduler.get_last_lr()[0]
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Learning Rate: {current_lr:.6f}")



# 验证模型
model.eval()
val_loss = 0.0
with torch.no_grad():
    for inputs, targets in val_loader:
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)
        val_loss += loss.item()

print(f"Validation Loss: {val_loss/len(val_loader):.4f}")



AttentionLSTMModel(
  (lstm1): LSTM(9, 32, batch_first=True)
  (lstm2): LSTM(32, 16, batch_first=True)
  (attention): MultiheadAttention(
    (out_proj): NonDynamicallyQuantizableLinear(in_features=16, out_features=16, bias=True)
  )
  (dropout): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=16, out_features=8, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=8, out_features=1, bias=True)
)
Epoch [1/100], Loss: 0.1982, Learning Rate: 0.020000
Epoch [2/100], Loss: 0.1878, Learning Rate: 0.020000
Epoch [3/100], Loss: 0.1803, Learning Rate: 0.020000
Epoch [4/100], Loss: 0.1760, Learning Rate: 0.020000
Epoch [5/100], Loss: 0.1731, Learning Rate: 0.020000
Epoch [6/100], Loss: 0.1743, Learning Rate: 0.020000
Epoch [7/100], Loss: 0.1635, Learning Rate: 0.020000
Epoch [8/100], Loss: 0.1582, Learning Rate: 0.020000
Epoch [9/100], Loss: 0.1488, Learning Rate: 0.020000
Epoch [10/100], Loss: 0.1435, Learning Rate: 0.002000
Epoch [11/100], Loss: 0.1357, Learning Rate: 0.002000
Ep

In [154]:
# 保存整个模型
torch.save(model, 'lstm_model_full.pt')
