In [1]:
from torch.utils.data import Dataset, DataLoader
import torch

class TabularDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32).view(-1, 1)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

In [4]:
df=pd.read_csv('train.csv')
df=df.drop(columns=['id','Unnamed: 0'])

In [5]:
X_train,X_val,y_train,y_val=train_test_split(df.drop(columns=['Calories']),df['Calories'],test_size=0.1,random_state=42)

In [7]:
import torch.nn as nn

class ResNetForTabular(nn.Module):
    def __init__(self, input_dim, hidden_dim=64, output_dim=1, depth=3):
        super().__init__()
        self.input_layer = nn.Linear(input_dim, hidden_dim)
        self.hidden_blocks = nn.ModuleList([
            nn.Sequential(
                nn.Linear(hidden_dim, hidden_dim),
                nn.ReLU(),
                nn.Linear(hidden_dim, hidden_dim)
            ) for _ in range(depth)
        ])
        self.relu = nn.ReLU()
        self.output_layer = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.input_layer(x)
        for block in self.hidden_blocks:
            residual = x
            x = block(x)
            x = self.relu(x + residual)  # 残差连接
        return self.output_layer(x)

In [8]:
model = ResNetForTabular(input_dim=X_train.shape[1], output_dim=1)

In [9]:
class RMSLELoss(nn.Module):
    def __init__(self, eps: float = 1e-6):
        """
        eps: 用于在 log1p 前做小量平滑，避免 y_pred<0 时 log 出错
        """
        super().__init__()
        self.eps = eps

    def forward(self, y_pred: torch.Tensor, y_true: torch.Tensor) -> torch.Tensor:
        # 如果模型预测会有负数，可以先 clamp 到 >=0
        y_pred_clamped = torch.clamp(y_pred, min=0.0)  

        # log1p = log(1 + x)
        log_pred = torch.log1p(y_pred_clamped + self.eps)
        log_true = torch.log1p(y_true        + self.eps)

        # 计算 MSE 然后开根号
        mse = torch.mean((log_pred - log_true) ** 2)
        return torch.sqrt(mse)

In [13]:
import torch
from torch.utils.data import TensorDataset, DataLoader

# 假设你已经 import 了 pandas as pd

# 1. 把 DataFrame 转成 NumPy，再转成 Tensor
if isinstance(X_train, pd.DataFrame):
    X_np = X_train.to_numpy()
else:
    X_np = X_train  # 如果已经是 ndarray

if isinstance(y_train, pd.Series) or isinstance(y_train, pd.DataFrame):
    y_np = y_train.to_numpy()
else:
    y_np = y_train

# 转成 Tensor
X_tensor = torch.from_numpy(X_np).float()
y_tensor = torch.from_numpy(y_np).float().view(-1, 1)

# 2. DataLoader
train_ds = TensorDataset(X_tensor, y_tensor)
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)

# 后面的训练循环不变：
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
criterion = RMSLELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)

        optimizer.zero_grad()
        preds = model(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * xb.size(0)

    print(f"Epoch {epoch+1}/{num_epochs} — RMSLE: {running_loss/len(train_ds):.4f}")

Epoch 1/10 — RMSLE: 0.1002
Epoch 2/10 — RMSLE: 0.0697
Epoch 3/10 — RMSLE: 0.0665
Epoch 4/10 — RMSLE: 0.0648
Epoch 5/10 — RMSLE: 0.0643


KeyboardInterrupt: 