In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import platform
from torch.utils.data import DataLoader, Subset, random_split
from sklearn.metrics import confusion_matrix
import seaborn as sns
import pandas as pd
import time
import os

In [2]:
# ==============================
# 1. 环境配置和超参数设置
# ==============================
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")

使用设备: cuda:0


In [3]:
# 实验超参数
config = {
    'batch_size': 128,          # 每次迭代训练的样本数量
    'learning_rate': 0.001,     # 优化器的学习率，控制参数更新幅度
    'num_epochs': 15,           # 训练总轮数（完整遍历训练集的次数）
    'num_workers': 4,           # 数据加载时使用的进程数，加速数据预处理
    'optimizer': 'Adam',        # 选择的优化器（这里指定为Adam）
    'pretrained': True,         # 是否使用预训练模型的权重
    'data_augmentation': True   # 是否对训练数据进行数据增强
}

In [4]:
# 创建结果保存目录
os.makedirs('results',exist_ok=True)
os.makedirs('checkpoints',exist_ok=True)

In [7]:
# ==============================
# 2. 数据预处理和加载
# ==============================
# 数据增强和归一化
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),  # 随机裁剪：先填充4像素再裁回32x32，增加边缘信息多样性
    transforms.RandomHorizontalFlip(),     # 50%概率水平翻转，增强视角鲁棒性
    transforms.ToTensor(),                 # 转换为PyTorch张量（0-255→0-1）
    transforms.Normalize(                  # 标准化：(像素值-均值)/标准差
        (0.4914, 0.4822, 0.4465),          # CIFAR-10 RGB通道均值
        (0.2023, 0.1994, 0.2010)           # CIFAR-10 RGB通道标准差
    )
]) if config['data_augmentation'] else transforms.Compose([
    # 不启用数据增强时，仅做张量转换和标准化
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

# 测试集转换（无数据增强）
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

In [8]:
# 下载并加载CIFAR-10数据集
train_dataset = torchvision.datasets.CIFAR10(
    root='./data',       # 数据保存路径
    train=True,          # 加载训练集（50000张图片）
    download=True,       # 本地无数据则自动下载
    transform=transform_train  # 应用训练集转换
)
test_dataset = torchvision.datasets.CIFAR10(
    root='./data', 
    train=False,         # 加载测试集（10000张图片）
    download=True, 
    transform=transform_test   # 应用测试集转换
)

In [18]:
# 划分训练集和验证集 (90% 训练, 10% 验证)
train_subset, val_subset = random_split(train_dataset, [0.9,0.1])

In [21]:
# 验证集使用测试集transform（无数据增强）
val_subset.dataset.transform = transform_test

In [22]:
# 数据加载器
train_loader = DataLoader(
    train_subset,
    batch_size=config['batch_size'],  # 每批128张图片
    shuffle=True,                     # 训练集打乱顺序，增强随机性
    num_workers=config['num_workers'] # 4个进程并行加载数据
)
val_loader = DataLoader(
    val_subset, 
    batch_size=config['batch_size'],
    shuffle=False,                    # 验证集无需打乱
    num_workers=config['num_workers']
)
test_loader = DataLoader(
    test_dataset, 
    batch_size=config['batch_size'],
    shuffle=False,                    # 测试集按顺序加载
    num_workers=config['num_workers']
)


In [23]:
# CIFAR-10类别名称
classes = ('plane', 'car', 'bird', 'cat', 'deer', 
           'dog', 'frog', 'horse', 'ship', 'truck')

In [27]:
# ==============================
# 3. 模型定义 - 修改ResNet-18
# ==============================
class CustomResNet18(nn.Module):
    def __init__(self,num_classes=10):
        super().__init__()

        # 加载预训练ResNet-18
        self.model = torchvision.models.resnet18(pretrained=config['pretrained'])
        
        # 修改第一层卷积：适配CIFAR-10的32x32输入
        self.model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.model.bn1 = nn.BatchNorm2d(64)
        
        # 修改最后的全连接层
        num_features = self.model.fc.in_features
        self.model.fc = nn.Linear(num_features, num_classes)

        def forward(self, x):
            return self.model(x)

In [28]:
# 创建模型并移至设备
model = CustomResNet18(num_classes=10).to(device)

# 打印模型结构
print("模型结构:")
print(model)



模型结构:
CustomResNet18(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True