In [58]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import random
import torch
import torchvision
import torchvision.transforms as transforms
from collections import OrderedDict

In [59]:
# 加载数据
def fashion_mnist_dataload(batch_size):
    mnist_train = torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMnist', train=True, download=True,transform=transforms.ToTensor())
    mnist_test =  torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMnist',train=False, download=True,transform=transforms.ToTensor())
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=0)
    test_iter =  torch.utils.data.DataLoader(mnist_test, batch_size=batch_size,shuffle=True, num_workers=0)
    return train_iter, test_iter

In [60]:
batch_size = 256
train_iter, test_iter = fashion_mnist_dataload(batch_size)

In [79]:
def evaluate_accuracy(data_set,net):
    n = 0
    acc = 0.0
    for X,y in data_set:
        acc += (net(X).argmax(dim=1)==y).sum()
        n += y.shape[0]
    return acc/n

In [62]:
# 定义和初始化模型

num_inputs = 784
num_outputs = 10

# dense层
class LinearNet(torch.nn.Module):
    def __init__(self,num_inputs,num_outputs):
        super(LinearNet, self).__init__()
        self.linear = torch.nn.Linear(num_inputs,num_outputs)
    def forward(self, x):  # x.shape = (batch, 1, 28, 28)
        y = self.linear(x.view(x.shape[0], -1))
        return y

In [20]:
# 定义模型 后面用到
# Flatter 层
class FlattenLayer(torch.nn.Module):
    def __init__(self):
        super(FlattenLayer,self).__init__()
    def forward(self, x):
        return x. view(x.shape[0], -1)

In [23]:
net = torch.nn.Sequential(
                    OrderedDict([
                        ('flatten',FlattenLayer()),
                        ('linear', LinearNet(num_inputs,num_outputs))
                    ])
                )

In [63]:
# 实例化模型
net = LinearNet(num_inputs, num_outputs)

In [64]:
torch.nn.init.normal_(net.linear.weight, mean=0, std=0.01)
torch.nn.init.constant(net.linear.bias, val=0)

  


Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

In [77]:
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, optimizer=None):
    for epoch in range(num_epochs):
        train_loss_sum, train_acc_sum ,n= 0.0, 0.0, 0
        for X,y in train_iter:
            # 正向传播
            y_hat = net(X)
            # 计算损失值
            l = loss(y_hat,y)
            # 梯度清零
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
            # 反向传播，计算梯度值
            l.backward()
            # 更新梯度值
            if optimizer is not None:
                optimizer.step()
            else:
                for param in params:
                    param.data -= lr * param.grad / batch_size
            
            train_loss_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1)==y).sum().item()
            n += y.shape[0]
        test_acc = evaluate_accuracy(test_iter,net)
        print('num_epochs %d, loss %.4f, train acc %.3f, test acc %.3f'% (epoch + 1, train_loss_sum / n, train_acc_sum / n, test_acc))
            
            

In [80]:
epochs = 6
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(),lr=0.02)
train_ch3(net, train_iter, test_iter, loss, epochs, batch_size,  None, None, optimizer)

num_epochs 1, loss 0.0022, train acc 0.819, test acc 0.809
num_epochs 2, loss 0.0022, train acc 0.822, test acc 0.811
num_epochs 3, loss 0.0021, train acc 0.825, test acc 0.814
num_epochs 4, loss 0.0021, train acc 0.827, test acc 0.816
num_epochs 5, loss 0.0021, train acc 0.828, test acc 0.816
num_epochs 6, loss 0.0020, train acc 0.830, test acc 0.818
