# 1. 优化器

① 损失函数调用backward方法，就可以调用损失函数的反向传播方法，就可以求出我们需要调节的梯度，我们就可以利用我们的优化器就可以根据梯度对参数进行调整，达到整体误差降低的目的。

② 梯度要清零，如果梯度不清零会导致梯度累加。

# 2. 神经网络优化一轮

In [1]:
import torch
import torchvision
from torch import nn 
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.data import DataLoader

# ==========================================
# 1. 数据准备
# ==========================================
dataset = torchvision.datasets.CIFAR10('D:\\深度学习\\100_土堆数据集\\dataset', train=False, transform=torchvision.transforms.ToTensor())       
dataloader = DataLoader(dataset, batch_size=64, drop_last=True)

# ==========================================
# 2. 模型定义
# ==========================================
class Tudui(nn.Module):
    def __init__(self):
        super().__init__()        
        self.model1 = Sequential(
            Conv2d(3, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 64, 5, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10)
        )
        
    def forward(self, x):
        x = self.model1(x)
        return x

# ==========================================
# 3. 准备训练工具 (Loss & Optimizer)
# ==========================================
loss = nn.CrossEntropyLoss() # 交叉熵损失函数
tudui = Tudui()

# 语法: torch.optim.SGD(params, lr)
# 作用: 创建一个随机梯度下降(SGD)优化器。
# 参数: 
#   - tudui.parameters(): 告诉优化器，哪些参数是可以修改的（就是网络里的权重 W 和偏置 b）。
#   - lr=0.01 (Learning Rate): 学习率。决定了每一步修政的幅度。
#     如果太大，容易走过头（震荡）；如果太小，学得太慢。
optim = torch.optim.SGD(tudui.parameters(), lr=0.01)

# ==========================================
# 4. 训练循环 (Training Loop)
# ==========================================
# 注意：这一层循环只遍历了一遍数据集（这叫 1个 Epoch）
for data in dataloader:
    imgs, targets = data
    outputs = tudui(imgs)
    
    # --- Step 1: 计算误差 ---
    result_loss = loss(outputs, targets) 
    
    # --- Step 2: 梯度清零 (重要！) ---
    # 语法: optim.zero_grad()
    # 作用: 把模型中所有参数的梯度 (.grad) 属性重置为 0。
    # 为什么？因为 PyTorch 默认会"累加"梯度。
    # 如果不清零，第2个batch的梯度会加在第1个batch上，导致更新方向错误。
    optim.zero_grad()
    
    # --- Step 3: 反向传播 ---
    # 作用: 计算当前 Loss 对每个参数的梯度，并存入 .grad 属性中。
    # 此时，优化器知道了该往哪个方向走才能降低 Loss。
    result_loss.backward() 
    
    # --- Step 4: 更新参数 ---
    # 语法: optim.step()
    # 作用: 执行参数更新公式： param = param - lr * grad
    # 只有执行了这一步，模型的参数才真正发生了改变！
    optim.step() 
    
    # 打印当前的 Loss 值
    # 就像你说的，因为这只是第1轮(Epoch 1)，且只看了一遍数据，
    # Loss 可能会震荡或者下降不明显，这是正常的。
    print(result_loss)

tensor(2.3176, grad_fn=<NllLossBackward0>)
tensor(2.2906, grad_fn=<NllLossBackward0>)
tensor(2.3152, grad_fn=<NllLossBackward0>)
tensor(2.3027, grad_fn=<NllLossBackward0>)
tensor(2.3069, grad_fn=<NllLossBackward0>)
tensor(2.3095, grad_fn=<NllLossBackward0>)
tensor(2.3006, grad_fn=<NllLossBackward0>)
tensor(2.3214, grad_fn=<NllLossBackward0>)
tensor(2.3058, grad_fn=<NllLossBackward0>)
tensor(2.3061, grad_fn=<NllLossBackward0>)
tensor(2.3121, grad_fn=<NllLossBackward0>)
tensor(2.2968, grad_fn=<NllLossBackward0>)
tensor(2.3046, grad_fn=<NllLossBackward0>)
tensor(2.3035, grad_fn=<NllLossBackward0>)
tensor(2.3030, grad_fn=<NllLossBackward0>)
tensor(2.2945, grad_fn=<NllLossBackward0>)
tensor(2.3091, grad_fn=<NllLossBackward0>)
tensor(2.2915, grad_fn=<NllLossBackward0>)
tensor(2.2906, grad_fn=<NllLossBackward0>)
tensor(2.2971, grad_fn=<NllLossBackward0>)
tensor(2.3147, grad_fn=<NllLossBackward0>)
tensor(2.3048, grad_fn=<NllLossBackward0>)
tensor(2.2983, grad_fn=<NllLossBackward0>)
tensor(2.29

# 3. 神经网络优化多轮

In [2]:
import torch
import torchvision
from torch import nn 
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10('D:\\深度学习\\100_土堆数据集\\dataset',train=False,transform=torchvision.transforms.ToTensor())       
dataloader = DataLoader(dataset, batch_size=64,drop_last=True)

class Tudui(nn.Module):
    def __init__(self):
        super(Tudui, self).__init__()        
        self.model1 = Sequential(
            Conv2d(3,32,5,padding=2),
            MaxPool2d(2),
            Conv2d(32,32,5,padding=2),
            MaxPool2d(2),
            Conv2d(32,64,5,padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024,64),
            Linear(64,10)
        )
        
    def forward(self, x):
        x = self.model1(x)
        return x
    
loss = nn.CrossEntropyLoss() # 交叉熵    
tudui = Tudui()
optim = torch.optim.SGD(tudui.parameters(),lr=0.01)   # 随机梯度下降优化器
for epoch in range(20):
    running_loss = 0.0
    for data in dataloader:
        imgs, targets = data
        outputs = tudui(imgs)
        result_loss = loss(outputs, targets) # 计算实际输出与目标输出的差距
        optim.zero_grad()  # 梯度清零
        result_loss.backward() # 反向传播，计算损失函数的梯度
        optim.step()   # 根据梯度，对网络的参数进行调优
        running_loss = running_loss + result_loss
    print(running_loss) # 对这一轮所有误差的总和

tensor(358.2396, grad_fn=<AddBackward0>)
tensor(353.9762, grad_fn=<AddBackward0>)
tensor(336.3852, grad_fn=<AddBackward0>)
tensor(317.4470, grad_fn=<AddBackward0>)
tensor(309.1635, grad_fn=<AddBackward0>)
tensor(300.7821, grad_fn=<AddBackward0>)
tensor(291.5074, grad_fn=<AddBackward0>)
tensor(284.2061, grad_fn=<AddBackward0>)
tensor(277.2598, grad_fn=<AddBackward0>)
tensor(271.1585, grad_fn=<AddBackward0>)
tensor(265.6717, grad_fn=<AddBackward0>)
tensor(260.4376, grad_fn=<AddBackward0>)
tensor(255.3174, grad_fn=<AddBackward0>)
tensor(250.4014, grad_fn=<AddBackward0>)
tensor(245.8668, grad_fn=<AddBackward0>)
tensor(241.8106, grad_fn=<AddBackward0>)
tensor(238.1533, grad_fn=<AddBackward0>)
tensor(234.7964, grad_fn=<AddBackward0>)
tensor(231.6664, grad_fn=<AddBackward0>)
tensor(228.6825, grad_fn=<AddBackward0>)


# 4. 神经网络学习率优化

In [5]:
import torch
import torchvision
from torch import nn 
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.data import DataLoader

# ==========================================
# 1. 数据准备
# ==========================================
dataset = torchvision.datasets.CIFAR10('D:\\深度学习\\100_土堆数据集\\dataset', train=False, transform=torchvision.transforms.ToTensor(), download=True)       
dataloader = DataLoader(dataset, batch_size=64, drop_last=True)

# ==========================================
# 2. 模型定义
# ==========================================
class Tudui(nn.Module):
    def __init__(self):
        super().__init__()        
        self.model1 = Sequential(
            Conv2d(3, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 64, 5, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10)
        )
        
    def forward(self, x):
        x = self.model1(x)
        return x

# ==========================================
# 3. 初始化训练组件
# ==========================================
loss = nn.CrossEntropyLoss()
tudui = Tudui()

# 定义优化器：初始学习率 lr = 0.01
optim = torch.optim.SGD(tudui.parameters(), lr=0.01)   

# 定义学习率调度器 (Scheduler) 
# 作用：随着训练进行，动态调整学习率。
# 逻辑：每过 step_size (5) 个单位，将学习率乘以 gamma (0.1)。
#       即：0.01 -> 0.001 -> 0.0001 ...
scheduler = torch.optim.lr_scheduler.StepLR(optim, step_size=5, gamma=0.1) 

# ==========================================
# 4. 训练循环
# ==========================================
for epoch in range(20):
    running_loss = 0.0
    
    # --- 内层循环：遍历每一个 Batch ---
    for data in dataloader:
        imgs, targets = data
        outputs = tudui(imgs)
        result_loss = loss(outputs, targets)
        
        optim.zero_grad()  # 清空梯度
        result_loss.backward() # 反向传播
        optim.step()   # 更新参数
        running_loss = running_loss + result_loss
    scheduler.step() 
    print(running_loss)

tensor(358.3723, grad_fn=<AddBackward0>)
tensor(355.0641, grad_fn=<AddBackward0>)
tensor(343.2172, grad_fn=<AddBackward0>)
tensor(320.0874, grad_fn=<AddBackward0>)
tensor(310.6642, grad_fn=<AddBackward0>)
tensor(302.8482, grad_fn=<AddBackward0>)
tensor(301.8209, grad_fn=<AddBackward0>)
tensor(300.8760, grad_fn=<AddBackward0>)
tensor(299.9315, grad_fn=<AddBackward0>)
tensor(298.9742, grad_fn=<AddBackward0>)
tensor(298.1701, grad_fn=<AddBackward0>)
tensor(298.0667, grad_fn=<AddBackward0>)
tensor(297.9653, grad_fn=<AddBackward0>)
tensor(297.8642, grad_fn=<AddBackward0>)
tensor(297.7632, grad_fn=<AddBackward0>)
tensor(297.6711, grad_fn=<AddBackward0>)
tensor(297.6606, grad_fn=<AddBackward0>)
tensor(297.6502, grad_fn=<AddBackward0>)
tensor(297.6399, grad_fn=<AddBackward0>)
tensor(297.6295, grad_fn=<AddBackward0>)
