## softmax全连接层使用Pytorch实现

In [46]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn.init as init
import numpy as np
import sys
import time

### 读取数据

In [47]:
def load_dataset(dataSet, batch_size=64):
    if sys.platform.startswith('win'):
        num_workers = 0  # 0表示不用额外的进程来加速读取数据
    else:
        num_workers = 4
        print('linux')
    dataset_iter = torch.utils.data.DataLoader(dataSet, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    return dataset_iter

### 定义模型

In [48]:
class softmaxModel(torch.nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super(softmaxModel, self).__init__()
        self.linear = torch.nn.Linear(num_inputs, num_outputs)
    def forward(self, x):
        y = self.linear(x.view(x.shape[0], -1))
        return y

### 定义损失函数

In [49]:
loss = torch.nn.CrossEntropyLoss()
print(loss)

CrossEntropyLoss()


### 定义优化函数

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.1
    momentum: 0
    nesterov: False
    weight_decay: 0
)


### 评价准确率

In [162]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n

### 训练模型

In [173]:
def training_loop(net, loss, optimizer, n_epochs, train_dataset, test_dataset, batch_size=64):
    train_iter = load_dataset(train_dataset, batch_size)
    test_iter = load_dataset(test_dataset, batch_size)
    for epoch in range(n_epochs+1):
        train_loss_sum, train_acc_sum, n = 0.0, 0.0, 0
        for x, lable in train_iter:
            optimizer.zero_grad()
            y = net(x)                 # 前向传播
            #lable = lable.to(torch.float)
            #lable = lable.view(-1, 1)
            
            l = loss(y, lable).sum()   # 计算损失
            #optimizer.zero_grad()      # 梯度归0
            l.backward()               # 后向传播
            optimizer.step()           # 更新参数
            # print(net.parameters())
            
            train_loss_sum += l.item()
            train_acc_sum += (y.argmax(dim=1) == lable).sum().item()
            n += lable.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
              % (epoch + 1, train_loss_sum / n, train_acc_sum / n, test_acc))
        
def train_ch3(net, train_dataset, test_dataset, loss, num_epochs, batch_size, optimizer=None):
    print("come in")
    train_iter = load_dataset(train_dataset, batch_size)
    test_iter = load_dataset(test_dataset, batch_size)
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y in train_iter:
            y_hat = net(X)
            l = loss(y_hat, y).sum()
            # 梯度清零
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
            l.backward()
            optimizer.step()  # “softmax回归的简洁实现”一节将用到

                
            
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))

In [175]:
if __name__ == '__main__':
    savePath = './Fashion-MNIST/'
    mnist_train = torchvision.datasets.FashionMNIST(root=savePath, train=True, download=True, transform=transforms.ToTensor())
    mnist_test = torchvision.datasets.FashionMNIST(root=savePath, train=False, download=True, transform=transforms.ToTensor())
    
    # 初始化模型参数
    num_inputs = 784
    num_outputs = 10
    net = softmaxModel(num_inputs, num_outputs)
    init.normal_(net.linear.weight, mean=0, std=0.1)
    init.constant_(net.linear.bias, val=1)
    for param in net.parameters():
        print(param)
    n_epochs=10
    optimizer = torch.optim.SGD(net.parameters(), lr = 0.1)
    print(optimizer)
    train_ch3(net, mnist_train, mnist_test, loss, optimizer=optimizer, batch_size=64, num_epochs=n_epochs )

Parameter containing:
tensor([[-0.0328, -0.0147, -0.0448,  ..., -0.0235, -0.1237,  0.1265],
        [ 0.1271, -0.0347, -0.0332,  ..., -0.2823,  0.0621, -0.1517],
        [ 0.0393,  0.0320, -0.0233,  ..., -0.0496,  0.0446, -0.1199],
        ...,
        [-0.1625, -0.0586,  0.0127,  ...,  0.1730,  0.0589,  0.0612],
        [-0.0766,  0.0029, -0.0142,  ...,  0.0416, -0.1974,  0.0316],
        [-0.0617,  0.0774, -0.0335,  ..., -0.0814,  0.1372,  0.0524]],
       requires_grad=True)
Parameter containing:
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], requires_grad=True)
SGD (
Parameter Group 0
    dampening: 0
    lr: 0.1
    momentum: 0
    nesterov: False
    weight_decay: 0
)
come in
linux
linux
epoch 1, loss 0.0100, train acc 0.783, test acc 0.807
epoch 2, loss 0.0078, train acc 0.830, test acc 0.821
epoch 3, loss 0.0073, train acc 0.839, test acc 0.831
epoch 4, loss 0.0071, train acc 0.843, test acc 0.817
epoch 5, loss 0.0069, train acc 0.848, test acc 0.831
epoch 6, loss 0.0068, tra