In [1]:
!pip install numpy d2l --no-deps # installing d2l # installing d2l

Looking in indexes: https://mirrors.cloud.aliyuncs.com/pypi/simple
Collecting d2l
  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/8b/39/418ef003ed7ec0f2a071e24ec3f58c7b1f179ef44bec5224dcca276876e3/d2l-1.0.3-py3-none-any.whl (111 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m111.7/111.7 kB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: d2l
Successfully installed d2l-1.0.3
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.2[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [42]:
import os
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms
from d2l import torch as d2l
import torch.nn as nn
import torch.optim as optim
import random
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts,ExponentialLR,ReduceLROnPlateau

# ======================
# 1. 自定义 Dataset
# ======================
class ImageDataset(Dataset):
    def __init__(self, csv_file, img_dir="", transform=None,class_to_idx=None, is_train=False):
        """
        csv_file: csv 文件路径
        img_dir: 图片所在目录（可为空，如果 csv 里 image 列已经有完整路径）
        transform: torchvision.transforms
        is_test: 是否是测试集（没有 label）
        """
        self.df = pd.read_csv(csv_file)
        self.df.columns = self.df.columns.str.strip()  # 去掉列名空格

        self.img_dir = img_dir
        self.transform = transform
        self.is_train = is_train
        self.class_to_idx = class_to_idx

        # 如果是训练集，构建标签映射
        if is_train and "label" in self.df.columns:
            self.classes = sorted(class_to_idx.keys())
            self.df['label_idx'] = self.df['label'].map(self.class_to_idx)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.img_dir, row['image']) if self.img_dir else row['image']
        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)

        if not self.is_train or "label_idx" not in self.df.columns:
            return img, -1   # 测试集没有标签，用 -1 占位
        else:
            return img, row['label_idx']


def split_train_val(dataset: Dataset, val_ratio=0.25):
    """
    按比例拆分训练集和验证集
    """
    n_total = len(dataset)
    indices = list(range(n_total))
    random.shuffle(indices)  # <-- 关键！打乱索引
    n_val = int(n_total * val_ratio)
    n_train = n_total - n_val

    train_idx = indices[:n_train]
    val_idx = indices[n_train:]

    train_subset = Subset(dataset, train_idx)
    val_subset = Subset(dataset, val_idx)
    return train_subset, val_subset

# ======================
# 2. Transform
# ======================
def get_transforms(is_train=True, img_size=224):
    """获取图片预处理变换"""
    if is_train:
        # 训练集数据增强
        # 方案二：更温和的数据增强
        # return transforms.Compose([
        #     # 直接将图片缩放到目标尺寸
        #     transforms.Resize((img_size, img_size)), 
        #     # 只进行水平翻转
        #     transforms.RandomHorizontalFlip(p=0.5),
        #     # (可选) 可以加上轻微的旋转
        #     # transforms.RandomRotation(10), 
        #     transforms.ToTensor(),
        #     transforms.Normalize(mean=[0.485, 0.456, 0.406],
        #                          std=[0.229, 0.224, 0.225])
        # ])
        # 训练集数据增强
        return transforms.Compose([
            transforms.Resize((256, 256)),  # 先resize到稍大尺寸
            transforms.RandomResizedCrop(img_size, scale=(0.8, 1.0)),  # 随机裁剪
            transforms.RandomHorizontalFlip(p=0.5),  # 随机水平翻转
            transforms.RandomVerticalFlip(p=0.3),    # 随机垂直翻转
            transforms.RandomRotation(15),  # 随机旋转
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # 颜色抖动
            transforms.ToTensor(),  # 转换为张量，自动归一化到[0,1]
            transforms.Normalize(mean=[0.485, 0.456, 0.406],   # ImageNet标准化
                               std=[0.229, 0.224, 0.225])
        ])
    else:
        # 验证/测试集
        return transforms.Compose([
            transforms.Resize((img_size, img_size)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

# ======================
# 3. 创建 Dataset & DataLoader
# ======================
batch_size = 32

# transform
train_tf = get_transforms(is_train=True)
eval_tf  = get_transforms(is_train=False)

# 全局生成 class_to_idx
df = pd.read_csv("train.csv")
df['label'] = df['label'].astype(str).str.strip()
classes = sorted(df['label'].unique())
class_to_idx = {cls: idx for idx, cls in enumerate(classes)}



# 读取完整数据集（先传入 train_tf，后面拆分验证集时再换 transform）
full_dataset = ImageDataset("train.csv", transform=train_tf,class_to_idx=class_to_idx, is_train=True)

# 拆分训练集和验证集
train_dataset, val_dataset = split_train_val(full_dataset, val_ratio=0.1)

# 注意：val_dataset 是 Subset，要替换成 eval_tf
val_dataset.dataset.transform = eval_tf

# DataLoader
train_iter = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_iter   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

# ======================
# 4. 定义网络（用 ResNet18）
# ======================
import torchvision
net = torchvision.models.resnet18(pretrained=True)
num_classes = len(full_dataset.classes)
net.fc = nn.Sequential(
    # nn.Dropout(0.2),
    nn.Linear(net.fc.in_features, num_classes)
)

def train_ch6(net, train_iter, test_iter, num_epochs, lr,weight_decay, device):
    print('training on', device)
    net.to(device)
    best_test_acc = 0.0
    best_epoch = 0
    early_stopping_round = 11
    # optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = ExponentialLR(optimizer, gamma=0.80,verbose=True)
    # scheduler = ReduceLROnPlateau(optimizer, mode='min',factor=0.4,patience=1,threshold=0.1, verbose=True,min_lr=1e-6)
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20,eta_min=1e-6)
    
    loss = nn.CrossEntropyLoss()
    timer, num_batches = d2l.Timer(), len(train_iter)
    for epoch in range(num_epochs):
        # 训练损失之和，训练准确率之和，样本数
        metric = d2l.Accumulator(3)
        net.train()
        for i, (X, y) in enumerate(train_iter):
            timer.start()
            optimizer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()
            with torch.no_grad():
                metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
            timer.stop()
            train_l = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]
            # if (i + 1) % (num_batches // 5) == 0 or i == num_batches - 1:
            #     animator.add(epoch + (i + 1) / num_batches,
            #                  (train_l, train_acc, None))
        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        if test_acc > best_test_acc:
            best_test_acc = test_acc
            best_epoch = epoch
            torch.save(net.state_dict(), 'best_model.pth')
            # print(f'==> New best model saved with Val Acc: {best_test_acc:.4f}')
        if epoch - best_epoch >= early_stopping_round:
            break
        # animator.add(epoch + 1, (None, None, test_acc))
        # scheduler.step(train_l)
        scheduler.step()
        print(f'epoch {epoch+1} loss {train_l:.6f}, train acc {train_acc:.3f}, ' f'test acc {test_acc:.3f} | LR: {scheduler.get_last_lr()[0]:.6f} | best_test_acc:{best_test_acc:.3f}')
    print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec '
          f'on {str(device)}')

# ======================
# 5. 训练
# ======================
lr, num_epochs,weight_decay = 0.0003, 30, 1e-4

train_ch6(net, train_iter, val_iter, num_epochs, lr,weight_decay, d2l.try_gpu())


training on cuda:0




epoch 1 loss 1.802397, train acc 0.591, test acc 0.780 | LR: 0.000225 | best_test_acc:0.780
epoch 2 loss 0.422843, train acc 0.892, test acc 0.894 | LR: 0.000169 | best_test_acc:0.894
epoch 3 loss 0.154660, train acc 0.962, test acc 0.938 | LR: 0.000127 | best_test_acc:0.938
epoch 4 loss 0.070463, train acc 0.982, test acc 0.947 | LR: 0.000095 | best_test_acc:0.947
epoch 5 loss 0.042079, train acc 0.989, test acc 0.957 | LR: 0.000071 | best_test_acc:0.957
epoch 6 loss 0.026781, train acc 0.991, test acc 0.957 | LR: 0.000053 | best_test_acc:0.957
epoch 7 loss 0.023295, train acc 0.991, test acc 0.960 | LR: 0.000040 | best_test_acc:0.960
epoch 8 loss 0.019853, train acc 0.992, test acc 0.961 | LR: 0.000030 | best_test_acc:0.961
epoch 9 loss 0.018772, train acc 0.992, test acc 0.965 | LR: 0.000023 | best_test_acc:0.965
epoch 10 loss 0.017576, train acc 0.992, test acc 0.966 | LR: 0.000017 | best_test_acc:0.966
epoch 11 loss 0.016503, train acc 0.993, test acc 0.960 | LR: 0.000013 | best_t

KeyboardInterrupt: 

In [41]:
# =======================================================
# 6. (方案B) 直接使用内存中的 net 对象进行预测
# =======================================================

# 假设你刚刚运行完训练循环，`net` 对象还在内存中
device=d2l.try_gpu()
import torchvision
net = torchvision.models.resnet18(pretrained=True)
num_classes = len(full_dataset.classes)
net.fc = nn.Sequential(
    # nn.Dropout(0.2),
    nn.Linear(net.fc.in_features, num_classes)
)
net.load_state_dict(torch.load('best_model.pth',map_location=device))
net.to(device)
# --- 1. 准备工作 ---
# 关键！将模型切换到评估模式
net.eval() 

print("直接使用当前模型，准备对测试集进行预测...")
# 测试集（用相同的 eval_tf）
test_dataset = ImageDataset("test.csv", transform=eval_tf,class_to_idx=class_to_idx, is_train=False)
test_iter = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

# --- 2. 执行预测 ---
all_preds = []
with torch.no_grad():
    for X, _ in test_iter:
        X = X.to( device) # 确保测试数据也移动到GPU
        outputs = net(X)
        predicted_indices = torch.argmax(outputs, dim=1)
        all_preds.extend(predicted_indices.cpu().numpy())

print("预测完成！")

# --- 3. 转换预测结果 ---
idx_to_class = {idx: cls for cls, idx in class_to_idx.items()}
pred_labels = [idx_to_class[i] for i in all_preds]

# --- 4. 生成提交文件 ---
test_df = pd.read_csv("test.csv")
submission_df = pd.DataFrame({
    'image': test_df['image'],
    'label': pred_labels
})
submission_df.to_csv('last_submission.csv', index=False)

print("last_submission.csv 文件已成功生成！")

直接使用当前模型，准备对测试集进行预测...
预测完成！
last_submission.csv 文件已成功生成！
