# Training


In [None]:
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import os

# --- 1. 环境与设备配置 ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"当前使用的设备: {device}")

# 确保保存路径存在
if not os.path.exists("./model"): 
    os.makedirs("./model")

# --- 2. 准备数据集 ---
transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
])

train_data = torchvision.datasets.CIFAR10(root="./data", train=True, transform=transform, download=True)
test_data = torchvision.datasets.CIFAR10(root="./data", train=False, transform=transform, download=True)

train_dataloader = DataLoader(train_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=False)

# --- 3. 搭建神经网络 ---
class Cifar10Model(nn.Module):
    def __init__(self):
        super(Cifar10Model, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 32, 5, 1, 2),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 32, 5, 1, 2),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 5, 1, 2),
            nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(64*4*4, 64),
            nn.ReLU(),  # 全连接层之间加激活函数
            nn.Linear(64, 10)
        )

    def forward(self, x):
        return self.model(x)

# --- 4. 初始化模型、损失函数与优化器 ---
model = Cifar10Model().to(device)
loss_fn = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# --- 5. 训练参数设定 ---
epochs = 10
total_train_step = 0
writer = SummaryWriter("./logs_train")

# --- 6. 核心训练循环 ---
if __name__ == "__main__":
    for i in range(epochs):
        print(f"-------第 {i+1} 轮训练开始-------")
        
        # --- 训练步骤 ---
        model.train()
        for data in train_dataloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            
            outputs = model(images)
            loss = loss_fn(outputs, labels)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            total_train_step += 1
            if total_train_step % 100 == 0:
                print(f"训练步数: {total_train_step}, Loss: {loss.item():.4f}")
                writer.add_scalar("train_loss", loss.item(), total_train_step)

        # --- 测试步骤 ---
        model.eval()
        total_test_loss = 0
        total_accuracy = 0
        
        with torch.no_grad():
            for data in test_dataloader:
                images, labels = data
                images, labels = images.to(device), labels.to(device)
                
                outputs = model(images)
                loss = loss_fn(outputs, labels)
                
                # 统计 Loss
                total_test_loss += loss.item()
                # 统计 准确率
                accuracy = (outputs.argmax(1) == labels).sum()
                total_accuracy += accuracy.item()
        
        # 计算平均指标
        avg_test_loss = total_test_loss / len(test_dataloader)
        avg_accuracy = total_accuracy / len(test_data)
        
        print(f"第 {i+1} 轮测试结果:")
        print(f"  - 平均 Loss: {avg_test_loss:.4f}")
        print(f"  - 准确率: {avg_accuracy:.2%}")
        
        # 记录到 TensorBoard
        writer.add_scalar("test_loss", avg_test_loss, i + 1)
        writer.add_scalar("test_accuracy", avg_accuracy, i + 1)
        
        # 推荐的保存方式：仅保存参数
        torch.save(model.state_dict(), f"./model/model_{i+1}.pth")
        print(f"已保存第 {i+1} 轮模型权重")

    writer.close()
    print("训练结束！")

当前使用的设备: cuda
-------第 1 轮训练开始-------
训练步数: 100, Loss: 2.3039
训练步数: 200, Loss: 2.3002
训练步数: 300, Loss: 2.2853
训练步数: 400, Loss: 2.2783
训练步数: 500, Loss: 2.2747
训练步数: 600, Loss: 2.1622
训练步数: 700, Loss: 2.1014
第 1 轮测试结果:
  - 平均 Loss: 2.0372
  - 准确率: 26.32%
已保存第 1 轮模型权重
-------第 2 轮训练开始-------
训练步数: 800, Loss: 1.9697
训练步数: 900, Loss: 2.0723
训练步数: 1000, Loss: 2.0238
训练步数: 1100, Loss: 2.0192
训练步数: 1200, Loss: 1.9493
训练步数: 1300, Loss: 1.8786
训练步数: 1400, Loss: 1.8750
训练步数: 1500, Loss: 1.7889
第 2 轮测试结果:
  - 平均 Loss: 1.8313
  - 准确率: 34.12%
已保存第 2 轮模型权重
-------第 3 轮训练开始-------
训练步数: 1600, Loss: 1.7448
训练步数: 1700, Loss: 1.8920
训练步数: 1800, Loss: 1.6709
训练步数: 1900, Loss: 1.7264
训练步数: 2000, Loss: 1.6821
训练步数: 2100, Loss: 1.6649
训练步数: 2200, Loss: 1.5522
训练步数: 2300, Loss: 1.5745
第 3 轮测试结果:
  - 平均 Loss: 1.6285
  - 准确率: 41.45%
已保存第 3 轮模型权重
-------第 4 轮训练开始-------
训练步数: 2400, Loss: 1.5727
训练步数: 2500, Loss: 1.5312
训练步数: 2600, Loss: 1.7046
训练步数: 2700, Loss: 1.4900
训练步数: 2800, Loss: 1.7524
训练步数: 2900, Loss: 1