<a href="https://colab.research.google.com/github/SuperNZH/Deep-Learning-Practice/blob/main/Dive%20in%20DL/2_2_fashion_mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 导入需要的包

In [1]:
# 更方便的实现SOFTMAX，使用pytorch本身的方法
import torch
from torch import nn
from torch.nn import init
import numpy as np
import sys

from google.colab import drive
drive.mount('/content/drive')
sys.path.append('/content/drive/MyDrive/Colab_Notebooks/Dive_in_ML')
import d2lzh_pytorch as d2l

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# 读取数据

In [2]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

  cpuset_checked))


# 定义和初始化模型

In [3]:
# 先定义net
# softmax是一个全连接层

num_inputs = 784
num_outputs = 10

class LinearNet(nn.Module):
  def __init__(self, num_inputs, num_outputs):
    super(LinearNet, self).__init__()
    self.linear = nn.Linear(num_inputs, num_outputs)
  def forward(self, x):
    # 之所以这个地方要用view的原因，是因为传进来的x不是标准的二维格式
    # view的后面那个-1是把三位数据降为二维
    # 所以如果想直接linear(x)的话，在传x进去之前就要先把它的维度格式处理好
    y = self.linear(x.view(x.shape[0], -1))
    return y

In [4]:
# 实例化model
net = LinearNet(num_inputs, num_outputs)

In [5]:
# 初始化权重

init.normal_(net.linear.weight, mean=0, std=0.01)
init.constant_(net.linear.bias, val=0)

Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

# 更加简略的创建model

In [6]:
'''

class FlattenLayer(nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()
    def forward(self, x): # x shape: (batch, *, *, ...)
        return x.view(x.shape[0], -1)

'''

'\n\nclass FlattenLayer(nn.Module):\n    def __init__(self):\n        super(FlattenLayer, self).__init__()\n    def forward(self, x): # x shape: (batch, *, *, ...)\n        return x.view(x.shape[0], -1)\n\n'

In [7]:
'''

from collections import OrderedDict

net = nn.Sequential(
    # FlattenLayer(),
    # nn.Linear(num_inputs, num_outputs)
    OrderedDict([
        ('flatten', FlattenLayer()),
        ('linear', nn.Linear(num_inputs, num_outputs))
    ])
)

'''

"\n\nfrom collections import OrderedDict\n\nnet = nn.Sequential(\n    # FlattenLayer(),\n    # nn.Linear(num_inputs, num_outputs)\n    OrderedDict([\n        ('flatten', FlattenLayer()),\n        ('linear', nn.Linear(num_inputs, num_outputs))\n    ])\n)\n\n"

# SOFTMAX --> Cross-Entropy Function

In [8]:
# 分开定义softmax和损失函数可能会造成数值不稳定，所以用pytorch内置的函数

loss = nn.CrossEntropyLoss()

# Optimization 优化

In [9]:
# 用学习率0.01来优化，gradient decent
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

# Model Training

In [10]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n

In [11]:
# training其实就是epochs循环的细节实现
# 通过多次迭代模型参数，在每次迭代中，根据当前读取的小批量数据样本（特征X和标签y），通过调用backward反向函数计算小批量随机梯度，并调用优化算法sgd迭代模型参数。
num_epochs = 10

def train(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, optimizer=None):
  for epoch in range(num_epochs):
    train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
    # 损失函数为什么要用sum这个地方的评论解释不错 https://zhuanlan.zhihu.com/p/427853673l 因为loss算出来是一个10x1的损失向量，sum()之后变成一个总损失，标量，bw只能传标量
    for X, y in train_iter:
      y_hat = net(X)
      l = loss
      l = loss(y_hat, y).sum()

      # backward之前手动清空梯度的原因是 https://www.zhihu.com/question/303070254 简单来说是处于pytorch的设计原因，节省内存
      if optimizer is not None:
        optimizer.zero_grad()
      elif params is not None and params[0].grad is not None:
        for param in params:
          param.grad.data.zero_()

      l.backward() # 算梯度

      #step()的作用就是会更新所有的参数
      if optimizer is None:
        d2l.sgd(params, lr, batch_size)
      else:
        optimizer.step()

      train_l_sum += l.item()
      train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
      n += y.shape[0]

    test_acc = evaluate_accuracy(test_iter, net)
    print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))

In [12]:
train(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)

  cpuset_checked))


epoch 1, loss 0.0053, train acc 0.643, test acc 0.679
epoch 2, loss 0.0036, train acc 0.716, test acc 0.725
epoch 3, loss 0.0031, train acc 0.750, test acc 0.746
epoch 4, loss 0.0029, train acc 0.768, test acc 0.758
epoch 5, loss 0.0028, train acc 0.780, test acc 0.770
epoch 6, loss 0.0026, train acc 0.788, test acc 0.779
epoch 7, loss 0.0026, train acc 0.795, test acc 0.783
epoch 8, loss 0.0025, train acc 0.800, test acc 0.788
epoch 9, loss 0.0024, train acc 0.804, test acc 0.790
epoch 10, loss 0.0024, train acc 0.807, test acc 0.795
