## 3.7 softmax回归的简洁实现

In [1]:
import torch
from torch import nn
from torch.nn import init
import numpy as np
import sys
sys.path.append("..") 
import d2lzh_pytorch as d2l

print(torch.__version__)

1.12.1+cpu


## 3.7.1 获取和读取数据

In [2]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

## 3.7.2 定义和初始化模型

In [3]:
num_inputs = 784
num_outputs = 10

class LinearNet(nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super(LinearNet, self).__init__()
        self.linear = nn.Linear(num_inputs, num_outputs)
    def forward(self, x): # x shape: (batch, 1, 28, 28)
        y = self.linear(x.view(x.shape[0], -1))
        return y

net = LinearNet(num_inputs, num_outputs)

In [4]:
class FlattenLayer(nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()
    def forward(self, x): # x shape: (batch, *, *, ...)
        return x.view(x.shape[0], -1)

In [5]:
from collections import OrderedDict

net = nn.Sequential(
    # FlattenLayer(),
    # nn.Linear(num_inputs, num_outputs)
    OrderedDict([
        ('flatten', FlattenLayer()),
        ('linear', nn.Linear(num_inputs, num_outputs)),
    ])
)

In [6]:
init.normal_(net.linear.weight, mean=0, std=0.01)
init.constant_(net.linear.bias, val=0) 

Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

In [7]:
loss = nn.CrossEntropyLoss()

In [8]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.1)

In [9]:
num_epochs = 5
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)

epoch 1, loss 0.0031, train acc 0.747, test acc 0.783
epoch 2, loss 0.0022, train acc 0.813, test acc 0.812
epoch 3, loss 0.0021, train acc 0.825, test acc 0.818
epoch 4, loss 0.0020, train acc 0.832, test acc 0.823
epoch 5, loss 0.0019, train acc 0.837, test acc 0.822


## 问题
1. 更换三个不同大小的学习率,观察学习率对于模型训练的影响
2. 更换三个不同大小的batch_size，观察batch_size对于模型训练的影响
3. 将模型替换为下方卷积神经网络，观察模型训练效果:

In [10]:
class ConvNet(nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super(ConvNet, self).__init__()
        self.conv2d_blocks = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=5, kernel_size=3, stride=1, padding='same'),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=5, out_channels=10, kernel_size=3, stride=1, padding='same')
        )
        self.linear = nn.Linear(num_inputs*10, num_outputs)
    def forward(self, x): # x shape: (batch, 1, 28, 28)
        x = self.conv2d_blocks(x)
        y = self.linear(x.view(x.shape[0], -1))
        return y