# ResNet74构建  
## 层数  
74指的是74层可学习层数(主要指卷积层)   

**resnet50层数**    
初始层: 1个卷积层 (conv1)   
├── Stage 2: 3个残差块 × 每个块3个卷积层 = 9个卷积层    
├── Stage 3: 4个残差块 × 3个卷积层 = 12个卷积层  
├── Stage 4: 6个残差块 × 3个卷积层 = 18个卷积层    
├── Stage 5: 3个残差块 × 3个卷积层 = 9个卷积层   
└── 分类头: 1个全连接层  
总计50层  

标准ResNet层数 = 初始层 + 各Stage残差块的总卷积层 + 分类层   

## ResNet74设计  
**resnet74层数**
初始层: 1个卷积层 (conv1)     
├── Stage 2: 3个残差块 × 每个块3个卷积层 = 9个卷积层       
├── Stage 3: 6个残差块 × 3个卷积层 = 18个卷积层    
├── Stage 4: 12个残差块 × 3个卷积层 = 36个卷积层     
├── Stage 5: 3个残差块 × 3个卷积层 = 9个卷积层   
└── 分类头: 1个全连接层
总计：1 + 9 + 24 + 36 + 9 + 1 = 74层 (含1个FC层)   



## 关于resnet的bottleneck等残差块的用法 
### 1.bottleneck  
bottleneck是resnet的残差块，其工作流程为     
```  
输入 [256通道]   
    ↓  
1×1 Conv (降维) → [64通道]   
    ↓      
3×3 Conv (处理) → [64通道]  
    ↓  
1×1 Conv (升维) → [256通道]  
    ↓  
+ 输入 → 输出 [256通道]  
```  
使用残差块可以改变输入的维度，从而与网络的输出匹配，实现相加  

**参数**   
inplanes=3, 输入3通道  
planes=64, 中间层通道数    
expansion = 4, 扩展因子，压缩和放大比率，bottleneck内部定义好的    
中间通道数压缩到64通道，极大减少计算量，最后再由64-> 4 * 64 升维，匹配维度   

**解释**  
一个残差块，本质上就是3个卷积层+残差传播  




In [3]:
import torch 
import torch.nn as nn 
import torch.optim as optim 
import torchvision
from torchvision.models.resnet import Bottleneck,BasicBlock
import torchvision.transforms as transforms

from src import ResNet74, train_model, test_model

In [7]:
# 加载数据 
# Hyperparameters
num_epochs = 10
batch_size = 128
learning_rate = 0.001

# Data Preprocessing
# CIFAR-10 stats for normalization
stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(*stats),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(*stats),
])

# Load CIFAR-10 Dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [None]:
# ========== 导入库 ==========
import torch 
import torch.nn as nn 
import torch.optim as optim 
import torchvision
import torchvision.transforms as transforms
from src import ResNet74, train_model, test_model

# ========== 超参数 ==========
num_epochs = 10
batch_size = 128
learning_rate = 0.001

# ========== 数据加载 ==========
# CIFAR-10 stats for normalization
stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(*stats),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(*stats),
])

# Load CIFAR-10 Dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

# ========== 模型定义 ==========
# 定义ResNet74模型
resnet74 = ResNet74()

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet74.parameters(), lr=learning_rate)

# 设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 将模型移动到GPU
resnet74.to(device)



In [None]:
# 训练
print("开始训练...")
train_model(resnet74, trainloader, criterion, optimizer, num_epochs, device)


In [None]:
# 验证：直接复用 test_model
test_acc = test_model(resnet74, testloader, device)
print(f"[验证] test_acc: {test_acc}")


尝试先进行下采样  

在初始化层进行下采样，(32\*32->8\*8)  
能够极大的提高速度，但是采样效果如何需要检查

In [None]:
# ========== 导入库 ==========
import torch 
import torch.nn as nn 
import torch.optim as optim 
import torchvision
import torchvision.transforms as transforms
from src import ResNet74_ForwardDownsample, train_model, test_model

# ========== 超参数 ==========
num_epochs = 10
batch_size = 128
learning_rate = 0.001

# ========== 数据加载 ==========
# CIFAR-10 stats for normalization
stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(*stats),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(*stats),
])

# Load CIFAR-10 Dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

# ========== 模型定义 ==========
# 提前下采样
resnet74_forward_downsample = ResNet74_ForwardDownsample()

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet74_forward_downsample.parameters(), lr=learning_rate)

# 设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 将模型移动到GPU
resnet74_forward_downsample.to(device)

# ========== 训练 ==========
print("开始训练...")
train_model(resnet74_forward_downsample, trainloader, criterion, optimizer, num_epochs, device)


**分析** 
提前采样对于模型的效果几乎没有影响，但是速度提高了几倍，因此后续选择提前下采样模型

## 与resnet50和resnet101对比  
**resnet50**训练结果为：  
```
Epoch [1/10], Step [100/391], Loss: 0.7444
Epoch [1/10], Step [200/391], Loss: 0.7427
Epoch [1/10], Step [300/391], Loss: 0.7702
Epoch [1/10] Finished. Loss: 0.9023, Acc: 69.60%, Time: 13.13s
Epoch [2/10], Step [100/391], Loss: 0.4222
Epoch [2/10], Step [200/391], Loss: 0.4974
Epoch [2/10], Step [300/391], Loss: 0.5619
Epoch [2/10] Finished. Loss: 0.5742, Acc: 80.86%, Time: 12.18s
Epoch [3/10], Step [100/391], Loss: 0.4592
Epoch [3/10], Step [200/391], Loss: 0.5474
Epoch [3/10], Step [300/391], Loss: 0.8081
Epoch [3/10] Finished. Loss: 0.4961, Acc: 83.45%, Time: 12.09s
Epoch [4/10], Step [100/391], Loss: 0.3375
Epoch [4/10], Step [200/391], Loss: 0.5660
Epoch [4/10], Step [300/391], Loss: 0.4323
Epoch [4/10] Finished. Loss: 0.4513, Acc: 84.94%, Time: 13.55s
Epoch [5/10], Step [100/391], Loss: 0.4669
Epoch [5/10], Step [200/391], Loss: 0.5576
Epoch [5/10], Step [300/391], Loss: 0.3808
Epoch [5/10] Finished. Loss: 0.4130, Acc: 86.02%, Time: 12.94s
Epoch [6/10], Step [100/391], Loss: 0.3590
Epoch [6/10], Step [200/391], Loss: 0.3644
Epoch [6/10], Step [300/391], Loss: 0.3143
Epoch [6/10] Finished. Loss: 0.3839, Acc: 87.01%, Time: 12.78s
Epoch [7/10], Step [100/391], Loss: 0.4016
Epoch [7/10], Step [200/391], Loss: 0.5128
Epoch [7/10], Step [300/391], Loss: 0.3991
Epoch [7/10] Finished. Loss: 0.3611, Acc: 87.91%, Time: 12.61s
Epoch [8/10], Step [100/391], Loss: 0.3980
Epoch [8/10], Step [200/391], Loss: 0.3987
Epoch [8/10], Step [300/391], Loss: 0.3759
Epoch [8/10] Finished. Loss: 0.3526, Acc: 88.08%, Time: 12.54s
Epoch [9/10], Step [100/391], Loss: 0.3414
Epoch [9/10], Step [200/391], Loss: 0.3202
Epoch [9/10], Step [300/391], Loss: 0.3707
Epoch [9/10] Finished. Loss: 0.3283, Acc: 88.86%, Time: 13.47s
Epoch [10/10], Step [100/391], Loss: 0.2357
Epoch [10/10], Step [200/391], Loss: 0.3436
Epoch [10/10], Step [300/391], Loss: 0.3381
Epoch [10/10] Finished. Loss: 0.3213, Acc: 89.10%, Time: 12.42s
```
其**混淆矩阵**为:  

![](/home/frank/files/programs/AI引论第三次实验/docs/图/image.png)

**resnet74**(提前下采样版)训练结果为  
```
开始训练...
Epoch [1/10], Step [100/391], Loss: 1.8861
Epoch [1/10], Step [200/391], Loss: 1.7756
Epoch [1/10], Step [300/391], Loss: 1.4092
Epoch [1/10] Finished. Loss: 1.8694, Acc: 32.17%, Time: 28.33s
Epoch [2/10], Step [100/391], Loss: 1.4348
Epoch [2/10], Step [200/391], Loss: 1.5027
Epoch [2/10], Step [300/391], Loss: 1.5143
Epoch [2/10] Finished. Loss: 1.4333, Acc: 47.85%, Time: 27.79s
Epoch [3/10], Step [100/391], Loss: 1.1920
Epoch [3/10], Step [200/391], Loss: 1.3776
Epoch [3/10], Step [300/391], Loss: 1.0717
Epoch [3/10] Finished. Loss: 1.2141, Acc: 56.63%, Time: 27.81s
Epoch [4/10], Step [100/391], Loss: 1.1107
Epoch [4/10], Step [200/391], Loss: 1.1451
Epoch [4/10], Step [300/391], Loss: 1.6226
Epoch [4/10] Finished. Loss: 1.1555, Acc: 58.94%, Time: 27.77s
Epoch [5/10], Step [100/391], Loss: 0.9038
Epoch [5/10], Step [200/391], Loss: 0.9857
Epoch [5/10], Step [300/391], Loss: 0.8440
Epoch [5/10] Finished. Loss: 0.9981, Acc: 64.54%, Time: 27.72s
Epoch [6/10], Step [100/391], Loss: 0.9491
Epoch [6/10], Step [200/391], Loss: 0.7109
Epoch [6/10], Step [300/391], Loss: 0.9405
Epoch [6/10] Finished. Loss: 0.8585, Acc: 70.00%, Time: 27.77s
Epoch [7/10], Step [100/391], Loss: 0.8924
Epoch [7/10], Step [200/391], Loss: 0.6162
Epoch [7/10], Step [300/391], Loss: 0.6777
Epoch [7/10] Finished. Loss: 0.7376, Acc: 74.24%, Time: 27.78s
Epoch [8/10], Step [100/391], Loss: 0.6907
Epoch [8/10], Step [200/391], Loss: 0.7328
Epoch [8/10], Step [300/391], Loss: 0.4758
Epoch [8/10] Finished. Loss: 0.6548, Acc: 77.32%, Time: 27.73s
Epoch [9/10], Step [100/391], Loss: 0.7051
Epoch [9/10], Step [200/391], Loss: 0.6360
Epoch [9/10], Step [300/391], Loss: 0.6406
Epoch [9/10] Finished. Loss: 0.6433, Acc: 77.88%, Time: 27.77s
Epoch [10/10], Step [100/391], Loss: 0.5214
Epoch [10/10], Step [200/391], Loss: 0.5709
Epoch [10/10], Step [300/391], Loss: 0.6254
Epoch [10/10] Finished. Loss: 0.5527, Acc: 81.01%, Time: 27.75s
```
其**混淆矩阵**为：  
![](/home/frank/files/programs/AI引论第三次实验/docs/图/image2.png)

**resnet101**训练结果为：  
```
Epoch [1/10], Step [100/391], Loss: 1.0764
Epoch [1/10], Step [200/391], Loss: 0.8227
Epoch [1/10], Step [300/391], Loss: 0.6122
Epoch [1/10] Finished. Loss: 0.9219, Acc: 69.06%, Time: 46.20s
Epoch [2/10], Step [100/391], Loss: 0.6602
Epoch [2/10], Step [200/391], Loss: 0.5660
Epoch [2/10], Step [300/391], Loss: 0.5085
Epoch [2/10] Finished. Loss: 0.6540, Acc: 78.40%, Time: 45.42s
Epoch [3/10], Step [100/391], Loss: 0.5468
Epoch [3/10], Step [200/391], Loss: 0.5041
Epoch [3/10], Step [300/391], Loss: 0.6935
Epoch [3/10] Finished. Loss: 0.6301, Acc: 78.74%, Time: 45.17s
Epoch [4/10], Step [100/391], Loss: 0.4524
Epoch [4/10], Step [200/391], Loss: 0.4250
Epoch [4/10], Step [300/391], Loss: 1.0053
Epoch [4/10] Finished. Loss: 0.5687, Acc: 80.82%, Time: 45.43s
Epoch [5/10], Step [100/391], Loss: 0.5762
Epoch [5/10], Step [200/391], Loss: 0.6150
Epoch [5/10], Step [300/391], Loss: 0.6673
Epoch [5/10] Finished. Loss: 0.5793, Acc: 80.46%, Time: 45.33s
Epoch [6/10], Step [100/391], Loss: 0.3270
Epoch [6/10], Step [200/391], Loss: 0.7554
Epoch [6/10], Step [300/391], Loss: 0.5401
Epoch [6/10] Finished. Loss: 0.4772, Acc: 83.72%, Time: 45.63s
Epoch [7/10], Step [100/391], Loss: 0.4165
Epoch [7/10], Step [200/391], Loss: 0.4409
Epoch [7/10], Step [300/391], Loss: 0.4321
Epoch [7/10] Finished. Loss: 0.4149, Acc: 85.88%, Time: 45.57s
Epoch [8/10], Step [100/391], Loss: 0.4250
Epoch [8/10], Step [200/391], Loss: 0.3236
Epoch [8/10], Step [300/391], Loss: 0.4051
Epoch [8/10] Finished. Loss: 0.3883, Acc: 86.64%, Time: 45.76s
Epoch [9/10], Step [100/391], Loss: 0.4278
Epoch [9/10], Step [200/391], Loss: 0.4560
Epoch [9/10], Step [300/391], Loss: 0.3332
Epoch [9/10] Finished. Loss: 0.3874, Acc: 86.89%, Time: 45.49s
Epoch [10/10], Step [100/391], Loss: 0.2510
Epoch [10/10], Step [200/391], Loss: 0.3466
Epoch [10/10], Step [300/391], Loss: 0.6309
Epoch [10/10] Finished. Loss: 0.3696, Acc: 87.40%, Time: 45.01s
```
其**混淆矩阵**为：  
![](/home/frank/files/programs/AI引论第三次实验/docs/图/image3.png)  

训练时间为resnet50 = resnet74/2 = resnet101/4    
而准确度方面略有提升，但是不多，可能是因为数据模型较简单  

## 不同epcho训练  
epcho_less = 5  
epcho_more = 15  


In [None]:
# ========== 导入库 ==========
import torch 
import torch.nn as nn 
import torch.optim as optim 
import torchvision
import torchvision.transforms as transforms
from src import ResNet74_ForwardDownsample, train_model, test_model, plot_confusion_matrix

# ========== 超参数 ==========
epcho_less = 5
epcho_more = 15
batch_size = 128
learning_rate = 0.001

# ========== 数据加载 ==========
# CIFAR-10 stats for normalization
stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(*stats),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(*stats),
])

# Load CIFAR-10 Dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

# ========== 模型定义 ==========
# 提前下采样模型
resnet74_forward_downsample_less_epcho = ResNet74_ForwardDownsample()
resnet74_forward_downsample_more_epcho = ResNet74_ForwardDownsample()

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer_for_less = optim.Adam(resnet74_forward_downsample_less_epcho.parameters(), lr=learning_rate)
optimizer_for_more = optim.Adam(resnet74_forward_downsample_more_epcho.parameters(), lr=learning_rate)

# 设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 将模型移动到GPU
resnet74_forward_downsample_less_epcho.to(device)
resnet74_forward_downsample_more_epcho.to(device)


In [None]:
# 训练
print("开始训练少epcho模型...")
train_model(resnet74_forward_downsample_less_epcho, trainloader, criterion, optimizer_for_less, epcho_less, device)

In [None]:
print("开始训练多epcho模型...")
train_model(resnet74_forward_downsample_more_epcho, trainloader, criterion, optimizer_for_more, epcho_more, device)


In [None]:
# 测试
test_acc_less_epcho = test_model(resnet74_forward_downsample_less_epcho, testloader, device)
test_acc_more_epcho = test_model(resnet74_forward_downsample_more_epcho, testloader, device)

print(f"epcho_less: {test_acc_less_epcho}, epcho_more: {test_acc_more_epcho}")

# 混淆矩阵
plot_confusion_matrix(resnet74_forward_downsample_less_epcho, testloader, device)
plot_confusion_matrix(resnet74_forward_downsample_more_epcho, testloader, device)


**训练结果**  
```
少epcho模型...
Epoch [1/5], Step [100/391], Loss: 1.8486
Epoch [1/5], Step [200/391], Loss: 1.6182
Epoch [1/5], Step [300/391], Loss: 1.5407
Epoch [1/5] Finished. Loss: 1.7783, Acc: 35.41%, Time: 28.03s
Epoch [2/5], Step [100/391], Loss: 1.4870
Epoch [2/5], Step [200/391], Loss: 1.3618
Epoch [2/5], Step [300/391], Loss: 1.2692
Epoch [2/5] Finished. Loss: 1.3505, Acc: 51.24%, Time: 27.85s
Epoch [3/5], Step [100/391], Loss: 1.2715
Epoch [3/5], Step [200/391], Loss: 1.1449
Epoch [3/5], Step [300/391], Loss: 1.1403
Epoch [3/5] Finished. Loss: 1.2610, Acc: 55.08%, Time: 27.84s
Epoch [4/5], Step [100/391], Loss: 1.0273
Epoch [4/5], Step [200/391], Loss: 1.0395
Epoch [4/5], Step [300/391], Loss: 1.0795
Epoch [4/5] Finished. Loss: 1.0639, Acc: 62.20%, Time: 27.83s
Epoch [5/5], Step [100/391], Loss: 0.8022
Epoch [5/5], Step [200/391], Loss: 0.8870
Epoch [5/5], Step [300/391], Loss: 1.0477
Epoch [5/5] Finished. Loss: 0.8941, Acc: 68.48%, Time: 27.86s
```
混淆矩阵和准确率见代码块


```
开始训练多epcho模型...
Epoch [1/15], Step [100/391], Loss: 1.8893
Epoch [1/15], Step [200/391], Loss: 1.9474
Epoch [1/15], Step [300/391], Loss: 1.5950
Epoch [1/15] Finished. Loss: 1.7850, Acc: 35.43%, Time: 28.23s
Epoch [2/15], Step [100/391], Loss: 1.2189
Epoch [2/15], Step [200/391], Loss: 1.3935
Epoch [2/15], Step [300/391], Loss: 1.6186
Epoch [2/15] Finished. Loss: 1.3937, Acc: 49.60%, Time: 27.93s
Epoch [3/15], Step [100/391], Loss: 1.3161
Epoch [3/15], Step [200/391], Loss: 1.1906
Epoch [3/15], Step [300/391], Loss: 1.3275
Epoch [3/15] Finished. Loss: 1.2331, Acc: 55.88%, Time: 27.89s
Epoch [4/15], Step [100/391], Loss: 1.0584
Epoch [4/15], Step [200/391], Loss: 1.0186
Epoch [4/15], Step [300/391], Loss: 0.8985
Epoch [4/15] Finished. Loss: 1.0345, Acc: 63.41%, Time: 28.01s
Epoch [5/15], Step [100/391], Loss: 0.8054
Epoch [5/15], Step [200/391], Loss: 0.9698
Epoch [5/15], Step [300/391], Loss: 0.7892
Epoch [5/15] Finished. Loss: 0.8872, Acc: 68.75%, Time: 27.96s
Epoch [6/15], Step [100/391], Loss: 0.7485
Epoch [6/15], Step [200/391], Loss: 0.8768
Epoch [6/15], Step [300/391], Loss: 0.6838
Epoch [6/15] Finished. Loss: 0.7760, Acc: 72.82%, Time: 27.91s
Epoch [7/15], Step [100/391], Loss: 0.6107
Epoch [7/15], Step [200/391], Loss: 0.7260
Epoch [7/15], Step [300/391], Loss: 0.6078
Epoch [7/15] Finished. Loss: 0.7016, Acc: 75.50%, Time: 27.91s
Epoch [8/15], Step [100/391], Loss: 0.6772
Epoch [8/15], Step [200/391], Loss: 0.6345
Epoch [8/15], Step [300/391], Loss: 0.6877
Epoch [8/15] Finished. Loss: 0.6305, Acc: 78.07%, Time: 28.01s
Epoch [9/15], Step [100/391], Loss: 0.7210
Epoch [9/15], Step [200/391], Loss: 0.6852
Epoch [9/15], Step [300/391], Loss: 0.5579
Epoch [9/15] Finished. Loss: 0.5839, Acc: 79.90%, Time: 27.89s
Epoch [10/15], Step [100/391], Loss: 0.5321
Epoch [10/15], Step [200/391], Loss: 0.4527
Epoch [10/15], Step [300/391], Loss: 0.5371
Epoch [10/15] Finished. Loss: 0.5415, Acc: 81.23%, Time: 27.86s
Epoch [11/15], Step [100/391], Loss: 0.5609
Epoch [11/15], Step [200/391], Loss: 0.5817
Epoch [11/15], Step [300/391], Loss: 0.5312
Epoch [11/15] Finished. Loss: 0.5040, Acc: 82.57%, Time: 27.89s
Epoch [12/15], Step [100/391], Loss: 0.5155
Epoch [12/15], Step [200/391], Loss: 0.4372
Epoch [12/15], Step [300/391], Loss: 0.5019
Epoch [12/15] Finished. Loss: 0.4767, Acc: 83.40%, Time: 27.84s
Epoch [13/15], Step [100/391], Loss: 0.4135
Epoch [13/15], Step [200/391], Loss: 0.4565
Epoch [13/15], Step [300/391], Loss: 0.4121
Epoch [13/15] Finished. Loss: 0.4538, Acc: 84.32%, Time: 27.83s
Epoch [14/15], Step [100/391], Loss: 0.3785
Epoch [14/15], Step [200/391], Loss: 0.5740
Epoch [14/15], Step [300/391], Loss: 0.4304
Epoch [14/15] Finished. Loss: 0.4272, Acc: 85.45%, Time: 27.77s
Epoch [15/15], Step [100/391], Loss: 0.3762
Epoch [15/15], Step [200/391], Loss: 0.3297
Epoch [15/15], Step [300/391], Loss: 0.5063
Epoch [15/15] Finished. Loss: 0.4003, Acc: 86.16%, Time: 27.85s
```

## 训练增强
基于自主设计ResNet74模型，设计实现至少包括亮度调整、随机噪声、色调调整、随机裁剪、随机翻转、Mixup的数据增强策略；并探讨分析不同数据增强策略对模型性能的影响；  

**实现方式**   
使用transform.Compose实现  
Compose中输入的是增强方式，统一对数据进行调用  

In [3]:
from torchvision import transforms
from torchvision.transforms import functional as F
import random
random.seed(42) # 设置随机种子

# 1.亮度+色调调整
# 使用torchvision实现，本质上是对通道进行放缩
brightness_aug = transforms.ColorJitter(brightness=0.2)

# 2.随机噪声
# 使用高斯噪声，本质上是给每个通道的每个像素加上一个(0,sigma^2)高斯分布的噪声
class AddGaussianNoise:
    def __init__(self, sigma=0.03, p=0.5):
        self.sigma = sigma
        self.p = p
    def __call__(self, x):
        if torch.rand(1).item() > self.p:
            return x
        return torch.clamp(x + torch.randn_like(x) * self.sigma, 0.0, 1.0)

# 随机裁剪
random_crop = transforms.RandomCrop(32, padding=4)

# 随机水平翻转         
random_horizontal_flip = transforms.RandomHorizontalFlip(p=0.5)             

# 标准化
stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))  # CIFAR-10数据的mean和std

compser = transforms.Compose([
    brightness_aug,
    random_crop,
    random_horizontal_flip,
    transforms.ToTensor(),   # 转换为tensor后，才能增加噪声
    AddGaussianNoise(sigma=0.03, p=0.5),
    transforms.Normalize(*stats), # 标准化 
])

In [4]:
# 增强训练集
# 加载数据 
# Hyperparameters
num_epochs = 10
batch_size = 128
learning_rate = 0.001

# Data Preprocessing
# CIFAR-10 stats for normalization
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(*stats),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(*stats),
])

# Load CIFAR-10 Dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')


Files already downloaded and verified
Files already downloaded and verified


In [None]:
# ========== 导入库 ==========
import torch 
import torch.nn as nn 
import torch.optim as optim 
import torchvision
import torchvision.transforms as transforms
from src import ResNet74_ForwardDownsample, train_model, test_model

# ========== 超参数 ==========
num_epochs = 10
batch_size = 128
learning_rate = 0.001

# ========== 数据加载 ==========
# CIFAR-10 stats for normalization
stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(*stats),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(*stats),
])

# Load CIFAR-10 Dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

# ========== 模型定义 ==========
# 提前下采样
resnet74_forward_downsample = ResNet74_ForwardDownsample()

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet74_forward_downsample.parameters(), lr=learning_rate)

# 设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 将模型移动到GPU
resnet74_forward_downsample.to(device)

# ========== 训练 ==========
print("开始训练...")
train_model(resnet74_forward_downsample, trainloader, criterion, optimizer, num_epochs, device)


开始训练...
Epoch [1/10], Step [100/391], Loss: 1.9633
Epoch [1/10], Step [200/391], Loss: 1.5802
Epoch [1/10], Step [300/391], Loss: 1.6270
Epoch [1/10] Finished. Loss: 1.8107, Acc: 34.92%, Time: 32.75s
Epoch [2/10], Step [100/391], Loss: 1.3612
Epoch [2/10], Step [200/391], Loss: 1.7011
Epoch [2/10], Step [300/391], Loss: 1.3324
Epoch [2/10] Finished. Loss: 1.3707, Acc: 50.09%, Time: 32.11s
Epoch [3/10], Step [100/391], Loss: 1.2249
Epoch [3/10], Step [200/391], Loss: 1.0129
Epoch [3/10], Step [300/391], Loss: 1.1321
Epoch [3/10] Finished. Loss: 1.1463, Acc: 59.01%, Time: 32.23s
Epoch [4/10], Step [100/391], Loss: 1.0608
Epoch [4/10], Step [200/391], Loss: 0.8602
Epoch [4/10], Step [300/391], Loss: 0.9724
Epoch [4/10] Finished. Loss: 1.0054, Acc: 64.54%, Time: 32.23s
Epoch [5/10], Step [100/391], Loss: 0.8051
Epoch [5/10], Step [200/391], Loss: 1.0412
Epoch [5/10], Step [300/391], Loss: 0.7768
Epoch [5/10] Finished. Loss: 0.8828, Acc: 68.89%, Time: 32.20s
Epoch [6/10], Step [100/391], Lo

**训练结果**   
```  
Epoch [1/10], Step [100/391], Loss: 1.9633
Epoch [1/10], Step [200/391], Loss: 1.5802
Epoch [1/10], Step [300/391], Loss: 1.6270
Epoch [1/10] Finished. Loss: 1.8107, Acc: 34.92%, Time: 32.75s
Epoch [2/10], Step [100/391], Loss: 1.3612
Epoch [2/10], Step [200/391], Loss: 1.7011
Epoch [2/10], Step [300/391], Loss: 1.3324
Epoch [2/10] Finished. Loss: 1.3707, Acc: 50.09%, Time: 32.11s
Epoch [3/10], Step [100/391], Loss: 1.2249
Epoch [3/10], Step [200/391], Loss: 1.0129
Epoch [3/10], Step [300/391], Loss: 1.1321
Epoch [3/10] Finished. Loss: 1.1463, Acc: 59.01%, Time: 32.23s
Epoch [4/10], Step [100/391], Loss: 1.0608
Epoch [4/10], Step [200/391], Loss: 0.8602
Epoch [4/10], Step [300/391], Loss: 0.9724
Epoch [4/10] Finished. Loss: 1.0054, Acc: 64.54%, Time: 32.23s
Epoch [5/10], Step [100/391], Loss: 0.8051
Epoch [5/10], Step [200/391], Loss: 1.0412
Epoch [5/10], Step [300/391], Loss: 0.7768
Epoch [5/10] Finished. Loss: 0.8828, Acc: 68.89%, Time: 32.20s
Epoch [6/10], Step [100/391], Loss: 0.6868
Epoch [6/10], Step [200/391], Loss: 0.9745
Epoch [6/10], Step [300/391], Loss: 0.7198
Epoch [6/10] Finished. Loss: 0.7762, Acc: 72.97%, Time: 32.22s
Epoch [7/10], Step [100/391], Loss: 0.6809
Epoch [7/10], Step [200/391], Loss: 0.6886
Epoch [7/10], Step [300/391], Loss: 0.7381
Epoch [7/10] Finished. Loss: 0.6759, Acc: 76.46%, Time: 32.36s
Epoch [8/10], Step [100/391], Loss: 0.6103
Epoch [8/10], Step [200/391], Loss: 0.5391
Epoch [8/10], Step [300/391], Loss: 0.6168
Epoch [8/10] Finished. Loss: 0.6306, Acc: 78.25%, Time: 32.45s
Epoch [9/10], Step [100/391], Loss: 0.4286
Epoch [9/10], Step [200/391], Loss: 0.4977
Epoch [9/10], Step [300/391], Loss: 0.4577
Epoch [9/10] Finished. Loss: 0.5728, Acc: 80.29%, Time: 32.23s
Epoch [10/10], Step [100/391], Loss: 0.4904
Epoch [10/10], Step [200/391], Loss: 0.5301
Epoch [10/10], Step [300/391], Loss: 0.4686
Epoch [10/10] Finished. Loss: 0.5291, Acc: 81.62%, Time: 31.99s
```
效果提升不明显  


## Dropout  
dropout类似随机森林随机选择不同的特征和不同的样本进行分析，神经网络每次随机选择(1-dropout_rate)的神经元进行训练，这样能提高模型的稳健性，避免过拟合。  

In [9]:
# ========== 导入库 ==========
import torch 
import torch.nn as nn 
import torch.optim as optim 
import torchvision
import torchvision.transforms as transforms
from src import ResNet74_ForwardDownsample, train_model, test_model

# ========== 超参数 ==========
num_epochs = 10
batch_size = 128
learning_rate = 0.001
dropout_rate = 0.1  # Dropout比率，用于训练时防止过拟合

# ========== 数据加载 ==========
# CIFAR-10 stats for normalization
stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(*stats),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(*stats),
])

# Load CIFAR-10 Dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

# ========== 模型定义 ==========
# 创建带Dropout的模型（dropout_rate=0.5，可在训练时防止过拟合）
resnet74_forward_downsample_dropout = ResNet74_ForwardDownsample(dropout_rate=dropout_rate)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet74_forward_downsample_dropout.parameters(), lr=learning_rate)

# 设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 将模型移动到GPU
resnet74_forward_downsample_dropout.to(device)

# ========== 训练 ==========
# 训练（Dropout在训练模式下自动启用，在评估模式下自动关闭）
print("开始训练带Dropout的模型...")
train_model(resnet74_forward_downsample_dropout, trainloader, criterion, optimizer, num_epochs, device)


Files already downloaded and verified
Files already downloaded and verified
开始训练带Dropout的模型...
Epoch [1/10], Step [100/391], Loss: 1.8588
Epoch [1/10], Step [200/391], Loss: 1.7270
Epoch [1/10], Step [300/391], Loss: 1.6182
Epoch [1/10] Finished. Loss: 1.8127, Acc: 34.88%, Time: 32.34s
Epoch [2/10], Step [100/391], Loss: 1.5624
Epoch [2/10], Step [200/391], Loss: 1.4140
Epoch [2/10], Step [300/391], Loss: 1.3970
Epoch [2/10] Finished. Loss: 1.3831, Acc: 50.09%, Time: 32.32s
Epoch [3/10], Step [100/391], Loss: 1.3465
Epoch [3/10], Step [200/391], Loss: 1.5738
Epoch [3/10], Step [300/391], Loss: 1.1816
Epoch [3/10] Finished. Loss: 1.2325, Acc: 56.84%, Time: 32.26s
Epoch [4/10], Step [100/391], Loss: 1.1451
Epoch [4/10], Step [200/391], Loss: 1.3160
Epoch [4/10], Step [300/391], Loss: 1.1260
Epoch [4/10] Finished. Loss: 1.0210, Acc: 64.39%, Time: 32.25s
Epoch [5/10], Step [100/391], Loss: 0.9873
Epoch [5/10], Step [200/391], Loss: 0.9453
Epoch [5/10], Step [300/391], Loss: 0.7612
Epoch [5

**训练结果** 
尝试dropout_rate = 0.1 
```
Files already downloaded and verified
Files already downloaded and verified
开始训练带Dropout的模型...
Epoch [1/10], Step [100/391], Loss: 1.8588
Epoch [1/10], Step [200/391], Loss: 1.7270
Epoch [1/10], Step [300/391], Loss: 1.6182
Epoch [1/10] Finished. Loss: 1.8127, Acc: 34.88%, Time: 32.34s
Epoch [2/10], Step [100/391], Loss: 1.5624
Epoch [2/10], Step [200/391], Loss: 1.4140
Epoch [2/10], Step [300/391], Loss: 1.3970
Epoch [2/10] Finished. Loss: 1.3831, Acc: 50.09%, Time: 32.32s
Epoch [3/10], Step [100/391], Loss: 1.3465
Epoch [3/10], Step [200/391], Loss: 1.5738
Epoch [3/10], Step [300/391], Loss: 1.1816
Epoch [3/10] Finished. Loss: 1.2325, Acc: 56.84%, Time: 32.26s
Epoch [4/10], Step [100/391], Loss: 1.1451
Epoch [4/10], Step [200/391], Loss: 1.3160
Epoch [4/10], Step [300/391], Loss: 1.1260
Epoch [4/10] Finished. Loss: 1.0210, Acc: 64.39%, Time: 32.25s
Epoch [5/10], Step [100/391], Loss: 0.9873
Epoch [5/10], Step [200/391], Loss: 0.9453
Epoch [5/10], Step [300/391], Loss: 0.7612
Epoch [5/10] Finished. Loss: 0.9112, Acc: 68.32%, Time: 32.25s
Epoch [6/10], Step [100/391], Loss: 0.7443
Epoch [6/10], Step [200/391], Loss: 0.6399
Epoch [6/10], Step [300/391], Loss: 0.8097
Epoch [6/10] Finished. Loss: 0.7998, Acc: 72.35%, Time: 32.40s
Epoch [7/10], Step [100/391], Loss: 0.6835
Epoch [7/10], Step [200/391], Loss: 1.2223
Epoch [7/10], Step [300/391], Loss: 0.7935
Epoch [7/10] Finished. Loss: 0.9083, Acc: 68.67%, Time: 32.27s
Epoch [8/10], Step [100/391], Loss: 0.6770
Epoch [8/10], Step [200/391], Loss: 0.7064
Epoch [8/10], Step [300/391], Loss: 1.0260
Epoch [8/10] Finished. Loss: 0.8671, Acc: 70.15%, Time: 32.17s
Epoch [9/10], Step [100/391], Loss: 0.6703
Epoch [9/10], Step [200/391], Loss: 0.5689
Epoch [9/10], Step [300/391], Loss: 0.6375
Epoch [9/10] Finished. Loss: 0.6775, Acc: 76.51%, Time: 32.19s
Epoch [10/10], Step [100/391], Loss: 0.5023
Epoch [10/10], Step [200/391], Loss: 0.6268
Epoch [10/10], Step [300/391], Loss: 0.6697
Epoch [10/10] Finished. Loss: 0.5798, Acc: 79.91%, Time: 32.18s
```

尝试dropout_rate = 0.5   
```
Files already downloaded and verified
Files already downloaded and verified
开始训练带Dropout的模型...
Epoch [1/10], Step [100/391], Loss: 2.5184
Epoch [1/10], Step [200/391], Loss: 1.9133
Epoch [1/10], Step [300/391], Loss: 1.9602
Epoch [1/10] Finished. Loss: 1.9317, Acc: 31.98%, Time: 32.65s
Epoch [2/10], Step [100/391], Loss: 1.6535
Epoch [2/10], Step [200/391], Loss: 1.6508
Epoch [2/10], Step [300/391], Loss: 1.4679
Epoch [2/10] Finished. Loss: 1.5205, Acc: 46.19%, Time: 32.28s
Epoch [3/10], Step [100/391], Loss: 1.4125
Epoch [3/10], Step [200/391], Loss: 1.3044
Epoch [3/10], Step [300/391], Loss: 1.4053
Epoch [3/10] Finished. Loss: 1.4422, Acc: 49.20%, Time: 32.38s
Epoch [4/10], Step [100/391], Loss: 1.5226
Epoch [4/10], Step [200/391], Loss: 1.2685
Epoch [4/10], Step [300/391], Loss: 1.3416
Epoch [4/10] Finished. Loss: 1.2904, Acc: 55.24%, Time: 32.26s
Epoch [5/10], Step [100/391], Loss: 1.0342
Epoch [5/10], Step [200/391], Loss: 1.1651
Epoch [5/10], Step [300/391], Loss: 0.9670
Epoch [5/10] Finished. Loss: 1.1500, Acc: 60.68%, Time: 32.34s
Epoch [6/10], Step [100/391], Loss: 1.3182
Epoch [6/10], Step [200/391], Loss: 1.0687
Epoch [6/10], Step [300/391], Loss: 1.0229
Epoch [6/10] Finished. Loss: 1.0907, Acc: 63.30%, Time: 32.23s
Epoch [7/10], Step [100/391], Loss: 1.5879
Epoch [7/10], Step [200/391], Loss: 1.1042
Epoch [7/10], Step [300/391], Loss: 1.5659
Epoch [7/10] Finished. Loss: 1.2071, Acc: 58.54%, Time: 32.26s
Epoch [8/10], Step [100/391], Loss: 1.1420
Epoch [8/10], Step [200/391], Loss: 1.3771
Epoch [8/10], Step [300/391], Loss: 1.3157
Epoch [8/10] Finished. Loss: 1.0996, Acc: 62.67%, Time: 32.23s
Epoch [9/10], Step [100/391], Loss: 0.8296
Epoch [9/10], Step [200/391], Loss: 0.7793
Epoch [9/10], Step [300/391], Loss: 0.7730
Epoch [9/10] Finished. Loss: 0.9814, Acc: 66.32%, Time: 32.18s
Epoch [10/10], Step [100/391], Loss: 1.0850
Epoch [10/10], Step [200/391], Loss: 0.8481
Epoch [10/10], Step [300/391], Loss: 0.8189
Epoch [10/10] Finished. Loss: 0.8438, Acc: 70.88%, Time: 32.12s
```

发现使用dropout效果不好，可能需要尝试更低的dropout_rate

## 不同学习器、学习率、学习策略
**三种优化器配置**  
- SGD + StepLR（阶梯式衰减）  
    - 学习率：0.01（SGD 通常需要更大）  
    - 动量：0.9  
    - 调度策略：每 3 个 epoch 学习率乘以 0.1    

- Adam + CosineAnnealingLR（余弦退火）  
    - 学习率：0.001  
    - 调度策略：余弦退火，学习率按余弦函数衰减

- RMSprop + ExponentialLR（指数衰减）    
    - 学习率：0.001  
    - 调度策略：每个 epoch 学习率乘以 0.95  

In [None]:
# ========== 导入库 ==========
import torch 
import torch.nn as nn 
import torch.optim as optim 
import torchvision
import torchvision.transforms as transforms
from src import ResNet74_ForwardDownsample, train_model, test_model

# ========== 超参数 ==========
num_epochs = 10
batch_size = 128
learning_rate = 0.01  # 初始学习率（SGD通常需要更大的学习率）
momentum = 0.9  # SGD的动量参数
weight_decay = 1e-4  # 权重衰减（L2正则化）

# ========== 数据加载 ==========
# CIFAR-10 stats for normalization
stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(*stats),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(*stats),
])

# Load CIFAR-10 Dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

# ========== 模型定义 ==========
# 创建模型
model_sgd = ResNet74_ForwardDownsample(dropout_rate=0.0)
model_adam = ResNet74_ForwardDownsample(dropout_rate=0.0)
model_rmsprop = ResNet74_ForwardDownsample(dropout_rate=0.0)

# 设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ========== 不同优化器配置 ==========
# 1. SGD优化器 + StepLR学习率调度（阶梯式衰减）
criterion = nn.CrossEntropyLoss()
optimizer_sgd = optim.SGD(model_sgd.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay)
scheduler_sgd = optim.lr_scheduler.StepLR(optimizer_sgd, step_size=3, gamma=0.1)  # 每3个epoch学习率乘以0.1

# 2. Adam优化器 + CosineAnnealingLR学习率调度（余弦退火）
optimizer_adam = optim.Adam(model_adam.parameters(), lr=0.001, weight_decay=weight_decay)
scheduler_adam = optim.lr_scheduler.CosineAnnealingLR(optimizer_adam, T_max=num_epochs)  # 余弦退火调度

# 3. RMSprop优化器 + ExponentialLR学习率调度（指数衰减）
optimizer_rmsprop = optim.RMSprop(model_rmsprop.parameters(), lr=0.001, weight_decay=weight_decay)
scheduler_rmsprop = optim.lr_scheduler.ExponentialLR(optimizer_rmsprop, gamma=0.95)  # 每个epoch学习率乘以0.95

# 将模型移动到GPU
model_sgd.to(device)
model_adam.to(device)
model_rmsprop.to(device)

# ========== 训练函数（带学习率调度） ==========
def train_with_scheduler(model, trainloader, criterion, optimizer, scheduler, num_epochs, device, model_name):
    """训练模型，并在每个epoch后更新学习率"""
    model.train()
    total_step = len(trainloader)
    
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        
        for i, (images, labels) in enumerate(trainloader):
            images = images.to(device)
            labels = labels.to(device)
            
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            if (i+1) % 100 == 0:
                current_lr = optimizer.param_groups[0]['lr']
                print(f'[{model_name}] Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{total_step}], Loss: {loss.item():.4f}, LR: {current_lr:.6f}')
        
        # 更新学习率
        scheduler.step()
        epoch_acc = 100 * correct / total
        epoch_loss = running_loss / len(trainloader)
        current_lr = optimizer.param_groups[0]['lr']
        print(f'[{model_name}] Epoch [{epoch+1}/{num_epochs}] Finished. Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.2f}%, LR: {current_lr:.6f}\n')

# ========== 训练不同优化器的模型 ==========
print("=" * 60)
print("训练 SGD + StepLR 模型")
print("=" * 60)
train_with_scheduler(model_sgd, trainloader, criterion, optimizer_sgd, scheduler_sgd, num_epochs, device, "SGD+StepLR")

print("=" * 60)
print("训练 Adam + CosineAnnealingLR 模型")
print("=" * 60)
train_with_scheduler(model_adam, trainloader, criterion, optimizer_adam, scheduler_adam, num_epochs, device, "Adam+Cosine")

print("=" * 60)
print("训练 RMSprop + ExponentialLR 模型")
print("=" * 60)
train_with_scheduler(model_rmsprop, trainloader, criterion, optimizer_rmsprop, scheduler_rmsprop, num_epochs, device, "RMSprop+Exp")

# ========== 测试和比较 ==========
print("=" * 60)
print("测试结果对比")
print("=" * 60)
test_acc_sgd = test_model(model_sgd, testloader, device)
test_acc_adam = test_model(model_adam, testloader, device)
test_acc_rmsprop = test_model(model_rmsprop, testloader, device)

print(f"SGD + StepLR 测试准确率: {test_acc_sgd:.2f}%")
print(f"Adam + CosineAnnealingLR 测试准确率: {test_acc_adam:.2f}%")
print(f"RMSprop + ExponentialLR 测试准确率: {test_acc_rmsprop:.2f}%")


Files already downloaded and verified
Files already downloaded and verified
训练 SGD + StepLR 模型
[SGD+StepLR] Epoch [1/10], Step [100/391], Loss: 2.2414, LR: 0.010000
[SGD+StepLR] Epoch [1/10], Step [200/391], Loss: 2.2793, LR: 0.010000
[SGD+StepLR] Epoch [1/10], Step [300/391], Loss: 2.1041, LR: 0.010000
[SGD+StepLR] Epoch [1/10] Finished. Loss: 2.1984, Acc: 24.92%, LR: 0.010000

[SGD+StepLR] Epoch [2/10], Step [100/391], Loss: 1.5160, LR: 0.010000
[SGD+StepLR] Epoch [2/10], Step [200/391], Loss: 1.6396, LR: 0.010000
[SGD+StepLR] Epoch [2/10], Step [300/391], Loss: 1.8833, LR: 0.010000
[SGD+StepLR] Epoch [2/10] Finished. Loss: 1.6414, Acc: 39.70%, LR: 0.010000

[SGD+StepLR] Epoch [3/10], Step [100/391], Loss: 1.5962, LR: 0.010000
[SGD+StepLR] Epoch [3/10], Step [200/391], Loss: 1.4716, LR: 0.010000
[SGD+StepLR] Epoch [3/10], Step [300/391], Loss: 1.4392, LR: 0.010000
[SGD+StepLR] Epoch [3/10] Finished. Loss: 1.4828, Acc: 45.94%, LR: 0.001000

[SGD+StepLR] Epoch [4/10], Step [100/391], L