In [50]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from regex import F
from sklearn.model_selection import train_test_split
import time


In [28]:
SEEDS  = [42, 2025, 7]
FOLDS  = 5
EPOCHS = 100
BATCH_SIZE = 256
ARCH   = [128, 128, 128, 128]

In [29]:
train_data=pd.read_csv('train.csv')

In [30]:
FEATURES = [c for c in train_data.columns if c not in ['id', 'Calories']]
TARGET   = 'Calories'

In [37]:
X=train_data[FEATURES]
y=train_data[TARGET]


RANDOM_STATE=42
# 使用train_test_split函数进行数据集分割
X_tr, X_va, y_tr, y_va = train_test_split(
    X, y,
    test_size=0.2,  # 验证集占比20%
    random_state=RANDOM_STATE  # 设置随机种子
)
# 打印数据集形状以验证分割结果
print(f"训练集形状: {X_tr.shape}")
print(f"验证集形状: {X_va.shape}")

训练集形状: (600000, 7)
验证集形状: (150000, 7)


In [38]:
X_tr=X_tr.values.astype(np.float32)
X_va=X_va.values.astype(np.float32)
y_tr=y_tr.values.astype(np.float32)
y_va=y_va.values.astype(np.float32)

In [49]:
train_ds = TensorDataset(torch.from_numpy(X_tr), torch.from_numpy(y_tr))
valid_ds = TensorDataset(torch.from_numpy(X_va), torch.from_numpy(y_va))
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(valid_ds, batch_size=BATCH_SIZE, shuffle=False)

In [42]:
class ResidualBlock(nn.Module):
    """
    残差块模块
    参数:
        in_f: 输入特征维度
        u1: 第一个全连接层的输出维度
        u2: 第二个全连接层的输出维度
    """
    def __init__(self, in_f, u1, u2):
        super().__init__()
        # 第一个全连接层
        self.fc1 = nn.Linear(in_f, u1, bias=False)
        # 第一个批归一化层
        self.bn1 = nn.BatchNorm1d(u1)
        # 第一个激活函数层(SiLU)
        self.act1 = nn.SiLU(inplace=True)
        
        # 第二个全连接层
        self.fc2 = nn.Linear(u1, u2, bias=False)
        # 第二个批归一化层
        self.bn2 = nn.BatchNorm1d(u2)
        
        # 如果输入维度不等于输出维度,需要投影层进行维度匹配
        if in_f != u2:
            self.proj = nn.Sequential(
                nn.Linear(in_f, u2, bias=False),
                nn.BatchNorm1d(u2)
            )
        else:
            self.proj = None
            
        # 输出激活函数层
        self.act_out = nn.SiLU(inplace=True)

    def forward(self, x):
        # 主路径处理
        y = self.fc1(x)
        y = self.bn1(y)
        y = self.act1(y)
        y = self.fc2(y)
        y = self.bn2(y)
        
        # 残差连接处理
        shortcut = self.proj(x) if self.proj is not None else x
        
        # 残差连接并激活
        return self.act_out(y + shortcut)


In [43]:


class RMSLELoss(nn.Module):
    def __init__(self, eps: float = 1e-6):
        super().__init__()
        self.eps = eps

    def forward(self, preds: torch.Tensor, targets: torch.Tensor) -> torch.Tensor:
        # 保证非负
        preds = preds.clamp(min=0.0)
        # log1p 变换
        log_preds   = torch.log1p(preds + self.eps)
        log_targets = torch.log1p(targets + self.eps)
        # 均方误差后开根号
        return torch.sqrt(F.mse_loss(log_preds, log_targets))

In [44]:
class EarlyStopping:
    """
    早停机制类
    用于在模型训练过程中监控验证集损失,当损失不再下降时提前停止训练
    参数:
        patience: 容忍验证集损失不下降的轮数,默认为10
        delta: 判断损失改善的最小阈值,默认为0.0
    """
    def __init__(self, patience=10, delta=0.0):
        # 初始化早停参数
        self.patience = patience  # 容忍轮数
        self.delta = delta  # 最小改善阈值
        self.best_loss = float('inf')  # 记录最佳损失值,初始化为无穷大
        self.counter = 0  # 计数器,记录损失未改善的轮数
        self.best_state = None  # 存储最佳模型状态

    def step(self, val_loss, model):
        """
        每轮训练后调用,判断是否需要早停
        参数:
            val_loss: 当前验证集损失
            model: 当前模型
        返回:
            bool: 是否需要早停
        """
        # 判断当前损失是否优于历史最佳损失(考虑delta阈值)
        if val_loss + self.delta < self.best_loss:
            # 更新最佳损失和模型状态
            self.best_loss = val_loss
            # 将模型参数转移到CPU并保存
            self.best_state = {k:v.cpu() for k,v in model.state_dict().items()}
            self.counter = 0  # 重置计数器
        else:
            # 损失未改善,计数器加1
            self.counter += 1
        # 返回是否达到早停条件
        return self.counter >= self.patience

    def restore(self, model):
        """
        恢复最佳模型状态
        参数:
            model: 需要恢复的模型
        """
        # 加载保存的最佳模型参数
        model.load_state_dict(self.best_state)

In [46]:
class ResMLPRegressor(nn.Module):
    """
    残差多层感知机回归器
    实现了带有残差连接的深度神经网络结构
    参数:
        input_dim: 输入特征的维度
        units: 隐藏层单元数的列表,每两个数字代表一个残差块的输入输出维度
    """
    def __init__(self, input_dim, units):
        super().__init__()
        # 输入层批归一化,用于规范化输入特征分布
        self.input_bn = nn.BatchNorm1d(input_dim)
        
        # 构建残差块序列
        layers = []
        in_dim = input_dim
        # 每两个数字构建一个残差块
        for i in range(0, len(units), 2):
            # 获取当前残差块的输入输出维度
            u1 = units[i]  # 第一个维度
            u2 = units[i+1] if i+1<len(units) else units[i]  # 第二个维度,如果不存在则使用第一个维度
            # 添加残差块
            layers.append(ResidualBlock(in_dim, u1, u2))
            in_dim = u2  # 更新下一层的输入维度
        
        # 将所有残差块组合成序列
        self.blocks = nn.Sequential(*layers)
        # 输出层,将特征映射到单个预测值
        self.head = nn.Linear(in_dim, 1)

    def forward(self, x):
        """
        前向传播过程
        参数:
            x: 输入特征张量
        返回:
            预测值张量
        """
        x = self.input_bn(x)     # 输入特征批归一化
        x = self.blocks(x)       # 通过残差块序列
        return self.head(x).squeeze(-1)  # 输出层预测并压缩维度

In [48]:
# 初始化残差多层感知机模型
# input_dim设置为训练数据特征维度,units使用预定义的网络架构ARCH
model = ResMLPRegressor(input_dim=X_tr.shape[1], units=ARCH)

# 定义均方根对数误差损失函数
criterion = RMSLELoss()

# 使用Adam优化器,学习率设为0.001
optimizer = Adam(model.parameters(), lr=1e-3)

# 学习率调度器
# 当验证损失停止下降时,将学习率降低为原来的一半
# patience=3表示等待3个epoch后仍未改善则降低学习率
# min_lr=1e-6设置最小学习率下限
scheduler = ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, patience=3, min_lr=1e-6
)

# 早停机制
# patience=10表示如果验证损失在10个epoch内没有改善则停止训练
stopper = EarlyStopping(patience=10)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)


model = ResMLPRegressor(input_dim=X_tr.shape[1], units=ARCH).to(device)
criterion = RMSLELoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


for epoch in range(1, EPOCHS+1):
    start_time = time.time()
    model.train()
    train_losses = []

    for xb, yb in train_loader:
        
        xb = xb.to(device)
        yb = yb.to(device)

        pred = model(xb)
        loss = criterion(pred, yb)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_losses.append(loss.item())

    avg_train_loss = sum(train_losses) / len(train_losses)

    # 验证
    model.eval()
    val_losses = []
    with torch.no_grad():
        for xb, yb in valid_loader:
            xb = xb.to(device)
            yb = yb.to(device)

            vp = model(xb)
            val_losses.append(criterion(vp, yb).item())

    avg_val_loss = float(sum(val_losses) / len(val_losses))
    scheduler.step(avg_val_loss)

    elapsed = time.time() - start_time
    current_lr = optimizer.param_groups[0]['lr']
    print(
        f"Epoch {epoch:3d}/{EPOCHS} | "
        f"Train Loss: {avg_train_loss:.4f} | "
        f"Val Loss:   {avg_val_loss:.4f} | "
        f"LR: {current_lr:.1e} | "
        f"Time: {elapsed:.1f}s"
    )

    if stopper.step(avg_val_loss, model):
        stopper.restore(model)
        print(f"⏹ Early stopping at epoch {epoch}")
        break