In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [5]:
# 设置随机种子，确保可复现性
torch.manual_seed(0)

# 超参数设置
batch_size = 64
epochs = 15
learning_rate = 0.001
dropout_rate = 0.1
weight_decay = 1e-2

# 数据预处理
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # 标准化
])

# 加载 CIFAR-10 数据集
train_dataset = datasets.CIFAR10(root='../data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='../data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

Files already downloaded and verified
Files already downloaded and verified


In [6]:
# 定义卷积神经网络
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        # 卷积层和池化层
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)  # 输入通道数3，输出通道数32
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # 全连接层
        self.fc1 = nn.Linear(128 * 8 * 8, 256)  # 8192, 256
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)  # 输出10个类别

        # Dropout
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x = F.relu(self.conv1(x))                      # 输入 [64, 3, 32, 32]
        x = self.pool(F.relu(self.conv2(x)))           # 输出 [64, 64, 16, 16]
        x = F.relu(self.conv3(x))                      # 输出 [64, 128, 16, 16]
        x = self.pool(x)                               # 输出 [64, 128, 8, 8]


        x = x.view(x.size(0), -1)                      # 展平 [batch_size, 128*4*4]

        x = F.relu(self.fc1(x))                        # 输出 [64, 256]
        x = self.dropout(x)
        x = F.relu(self.fc2(x))                        # 输出 [64, 128]
        x = self.dropout(x)
        x = self.fc3(x)                                # 输出 [64, 10]

        return x                                       # 返回 logits

model = CNN()

In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [8]:
# regularization method 2
criterion  = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

In [9]:
# 训练函数
def train(model, train_loader, criterion, optimizer):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for data, target in train_loader:
        optimizer.zero_grad()
        output = model(data)  # 输出 logits，形状为 [batch_size, 10]
        
        
        loss = criterion(output, target)  # 确保 target 的形状为 [batch_size]
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(output, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

    accuracy = 100 * correct / total
    avg_loss = total_loss / len(train_loader)
    return avg_loss, accuracy


In [10]:
# 测试函数
def test(model, test_loader, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)

            loss = criterion(output, target)
            total_loss += loss.item()
            _, predicted = torch.max(output, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

    accuracy = 100 * correct / total
    avg_loss = total_loss / len(test_loader)
    return avg_loss, accuracy

In [11]:
# 训练和评估模型
for epoch in range(epochs):
    train_loss, train_acc = train(model, train_loader, criterion, optimizer)
    test_loss, test_acc = test(model, test_loader, criterion)
    
    print(f"Epoch [{epoch+1}/{epochs}]")
    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.2f}%")
    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.2f}%")

# 保存模型
torch.save(model.state_dict(), "cnn_cifar10.pth")

Epoch [1/15]
Train Loss: 1.6979, Train Accuracy: 36.89%
Test Loss: 1.4644, Test Accuracy: 47.35%
Epoch [2/15]
Train Loss: 1.4026, Train Accuracy: 48.64%
Test Loss: 1.3058, Test Accuracy: 52.08%
Epoch [3/15]
Train Loss: 1.2859, Train Accuracy: 53.52%
Test Loss: 1.2356, Test Accuracy: 56.03%
Epoch [4/15]
Train Loss: 1.1999, Train Accuracy: 56.76%
Test Loss: 1.1252, Test Accuracy: 59.61%
Epoch [5/15]
Train Loss: 1.1302, Train Accuracy: 59.90%
Test Loss: 1.0539, Test Accuracy: 62.93%
Epoch [6/15]
Train Loss: 1.0808, Train Accuracy: 61.61%
Test Loss: 1.0537, Test Accuracy: 63.21%
Epoch [7/15]
Train Loss: 1.0422, Train Accuracy: 63.17%
Test Loss: 0.9723, Test Accuracy: 65.98%
Epoch [8/15]
Train Loss: 1.0180, Train Accuracy: 64.07%
Test Loss: 0.9481, Test Accuracy: 66.92%
Epoch [9/15]
Train Loss: 0.9957, Train Accuracy: 65.04%
Test Loss: 0.9860, Test Accuracy: 65.54%
Epoch [10/15]
Train Loss: 0.9859, Train Accuracy: 65.33%
Test Loss: 0.9409, Test Accuracy: 67.64%
Epoch [11/15]
Train Loss: 0.9

# 结论


**L1 正则化：**

```shell
Epoch [1/15]
Train Loss: 1.1401, Train Accuracy: 59.75%
Test Loss: 0.9162, Test Accuracy: 68.10%
Epoch [2/15]
Train Loss: 0.9579, Train Accuracy: 67.05%
Test Loss: 0.8320, Test Accuracy: 71.80%
Epoch [3/15]
Train Loss: 0.8448, Train Accuracy: 70.81%
Test Loss: 0.7795, Test Accuracy: 72.88%
Epoch [4/15]
Train Loss: 0.7525, Train Accuracy: 74.35%
Test Loss: 0.7394, Test Accuracy: 74.79%
Epoch [5/15]
Train Loss: 0.6790, Train Accuracy: 76.70%
Test Loss: 0.7349, Test Accuracy: 74.85%
Epoch [6/15]
Train Loss: 0.6164, Train Accuracy: 78.75%
Test Loss: 0.7212, Test Accuracy: 75.93%
Epoch [7/15]
Train Loss: 0.5715, Train Accuracy: 80.34%
Test Loss: 0.7185, Test Accuracy: 76.09%
Epoch [8/15]
Train Loss: 0.5194, Train Accuracy: 82.14%
Test Loss: 0.7247, Test Accuracy: 76.14%
Epoch [9/15]
Train Loss: 0.4809, Train Accuracy: 83.49%
Test Loss: 0.7380, Test Accuracy: 76.58%
Epoch [10/15]
Train Loss: 0.4419, Train Accuracy: 84.66%
Test Loss: 0.7650, Test Accuracy: 76.05%
Epoch [11/15]
Train Loss: 0.4161, Train Accuracy: 85.65%
Test Loss: 0.7621, Test Accuracy: 76.62%
Epoch [12/15]
Train Loss: 0.3908, Train Accuracy: 86.50%
Test Loss: 0.8029, Test Accuracy: 76.50%
Epoch [13/15]
Train Loss: 0.3765, Train Accuracy: 86.90%
Test Loss: 0.7987, Test Accuracy: 76.41%
Epoch [14/15]
Train Loss: 0.3577, Train Accuracy: 87.61%
Test Loss: 0.8377, Test Accuracy: 76.45%
Epoch [15/15]
Train Loss: 0.3425, Train Accuracy: 88.09%
Test Loss: 0.8302, Test Accuracy: 76.68%
```

[附加] **L2 正则化：**

```shell
Epoch [1/15]
Train Loss: 0.7353, Train Accuracy: 75.38%
Test Loss: 0.9027, Test Accuracy: 69.42%
Epoch [2/15]
Train Loss: 1.0549, Train Accuracy: 63.99%
Test Loss: 0.9474, Test Accuracy: 68.99%
Epoch [3/15]
Train Loss: 1.0906, Train Accuracy: 62.80%
Test Loss: 1.0132, Test Accuracy: 64.91%
Epoch [4/15]
Train Loss: 1.0924, Train Accuracy: 62.43%
Test Loss: 0.9672, Test Accuracy: 66.09%
Epoch [5/15]
Train Loss: 1.0924, Train Accuracy: 62.19%
Test Loss: 0.9529, Test Accuracy: 67.68%
Epoch [6/15]
Train Loss: 1.0855, Train Accuracy: 62.58%
Test Loss: 0.9493, Test Accuracy: 67.80%
Epoch [7/15]
Train Loss: 1.0815, Train Accuracy: 62.66%
Test Loss: 0.9549, Test Accuracy: 67.62%
Epoch [8/15]
Train Loss: 1.0825, Train Accuracy: 62.74%
Test Loss: 0.9457, Test Accuracy: 67.85%
Epoch [9/15]
Train Loss: 1.0732, Train Accuracy: 63.06%
Test Loss: 0.9741, Test Accuracy: 66.44%
Epoch [10/15]
Train Loss: 1.0727, Train Accuracy: 62.98%
Test Loss: 0.9604, Test Accuracy: 67.47%
Epoch [11/15]
Train Loss: 1.0636, Train Accuracy: 63.45%
Test Loss: 0.9399, Test Accuracy: 68.77%
Epoch [12/15]
Train Loss: 1.0717, Train Accuracy: 63.09%
Test Loss: 0.9559, Test Accuracy: 67.07%
Epoch [13/15]
Train Loss: 1.0680, Train Accuracy: 63.36%
Test Loss: 0.9550, Test Accuracy: 67.03%
Epoch [14/15]
Train Loss: 1.0660, Train Accuracy: 63.22%
Test Loss: 0.9544, Test Accuracy: 67.09%
Epoch [15/15]
Train Loss: 1.0629, Train Accuracy: 63.42%
Test Loss: 0.9663, Test Accuracy: 66.58%
```
