# Optimizer

In [1]:
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
import torch
from torchvision import datasets, transforms

dataset = datasets.CIFAR10(root='../data', train=False, download=True, transform=transforms.ToTensor())

dataloader = torch.utils.data.DataLoader(dataset, batch_size=64)

class Cifar(nn.Module):
    def __init__(self):
        super(Cifar, self).__init__()
        self.conv1 = Conv2d(in_channels=3, out_channels=32, kernel_size=5, stride=1, padding=2)
        self.maxpool1 = MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = Conv2d(in_channels=32, out_channels=32, kernel_size=5, stride=1, padding=2)
        self.maxpool2 = MaxPool2d(kernel_size=2, stride=2)
        self.conv3 = Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2)
        self.maxpool3 = MaxPool2d(kernel_size=2, stride=2)
        self.flatten = Flatten()
        self.linear1 = Linear(in_features=1024, out_features=64)
        self.linear2 = Linear(in_features=64, out_features=10)

        self.model1 = Sequential(
            Conv2d(in_channels=3, out_channels=32, kernel_size=5, stride=1, padding=2),
            MaxPool2d(kernel_size=2, stride=2),
            Conv2d(in_channels=32, out_channels=32, kernel_size=5, stride=1, padding=2),
            MaxPool2d(kernel_size=2, stride=2),
            Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2),
            MaxPool2d(kernel_size=2, stride=2),
            Flatten(),
            Linear(in_features=1024, out_features=64),
            Linear(in_features=64, out_features=10)
        ) # Sequential model

    def forward(self, x):
        x = self.model1(x) # Using Sequential model
        return x

model = Cifar()
loss_fn = nn.CrossEntropyLoss()
optim = torch.optim.SGD(model.parameters(), lr=0.01) # 随机梯度下降优化器

for epoch in range(20):
    running_loss = 0.0
    for data in dataloader:
        imgs, targets = data
        outputs = model(imgs)
        result = loss_fn(outputs, targets) # 计算损失
        optim.zero_grad() # 清除之前的梯度
        result.backward() # 反向传播计算梯度
        optim.step() # 更新参数
        running_loss += result
    print(running_loss)



100.0%


tensor(360.0392, grad_fn=<AddBackward0>)
tensor(354.4189, grad_fn=<AddBackward0>)
tensor(337.6451, grad_fn=<AddBackward0>)
tensor(319.2448, grad_fn=<AddBackward0>)
tensor(309.6237, grad_fn=<AddBackward0>)
tensor(300.1746, grad_fn=<AddBackward0>)
tensor(291.7544, grad_fn=<AddBackward0>)
tensor(283.5793, grad_fn=<AddBackward0>)
tensor(275.7719, grad_fn=<AddBackward0>)
tensor(268.7389, grad_fn=<AddBackward0>)
tensor(262.3963, grad_fn=<AddBackward0>)
tensor(256.6054, grad_fn=<AddBackward0>)
tensor(251.3780, grad_fn=<AddBackward0>)
tensor(246.7645, grad_fn=<AddBackward0>)
tensor(242.6764, grad_fn=<AddBackward0>)
tensor(238.9920, grad_fn=<AddBackward0>)
tensor(235.5791, grad_fn=<AddBackward0>)
tensor(232.3628, grad_fn=<AddBackward0>)
tensor(229.3004, grad_fn=<AddBackward0>)
tensor(226.3523, grad_fn=<AddBackward0>)
