In [13]:
import torch
import torch.nn as nn
import torch.nn.init as init
from torch.utils.data import DataLoader,Dataset
import torchvision
import torchvision.transforms as transforms
import numpy as np
import sys

from d2lzh_pytorch import evaluate_accuracy

sys.path.append("..")
import d2lzh_pytorch as d2l

In [2]:
batch_size = 256
mnist_train = torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMNIST',train=True,download=True,transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMNIST',train=False,download=True,transform=transforms.ToTensor())

In [4]:
train_iter = DataLoader(mnist_train,batch_size=batch_size,shuffle = True)
test_iter = DataLoader(mnist_test,batch_size=batch_size,shuffle=True)

In [5]:
num_inputs = 784
num_outputs = 10
class LinearNet(nn.Module):
    def __init__(self,num_inputs,num_outputs):
        super(LinearNet,self).__init__()
        self.linear = nn.Linear(num_inputs,num_outputs)

    def forward(self,x):
        y = self.linear(x.view(x.shape[0],-1))
        return y

net = LinearNet(num_inputs,num_outputs)
net

LinearNet(
  (linear): Linear(in_features=784, out_features=10, bias=True)
)

In [9]:
init.normal_(net.linear.weight,mean=0,std=0.01)
init.constant_(net.linear.bias,val=0)

Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

In [10]:
loss = nn.CrossEntropyLoss()
optim = torch.optim.SGD(net.parameters(),lr=0.01)


In [14]:
epochs = 5
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
              params=None, lr=None, optimizer=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y in train_iter:
            y_hat = net(X)
            l = loss(y_hat, y).sum()

            # 梯度清零
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()

            l.backward()
            if optimizer is None:
                d2l.sgd(params, lr, batch_size)
            else:
                optimizer.step()  # “softmax回归的简洁实现”一节将用到


            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))
train_ch3(net, train_iter, test_iter, loss, epochs, batch_size, optimizer=optim,lr=0.01)

epoch 1, loss 0.0036, train acc 0.716, test acc 0.725
epoch 2, loss 0.0031, train acc 0.750, test acc 0.745
epoch 3, loss 0.0029, train acc 0.767, test acc 0.761
epoch 4, loss 0.0028, train acc 0.779, test acc 0.768
epoch 5, loss 0.0026, train acc 0.788, test acc 0.778
