In [1]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

from bokeh.io import show, output_notebook
from bokeh.plotting import figure, gridplot
from bokeh.models import LinearAxis, Range1d
output_notebook()

### Data
通过`torchvision.datasets`下载`MNIST`数据。  
训练集：`train=True`  
测试集：`train=False`  
常用的还有`torchvision.datasets.ImageFolder()`，按文件夹取图片。  

`torchvision.transforms`可以对图片做处理。

In [2]:
train_dataset = dsets.MNIST(root='../dataset', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = dsets.MNIST(root='../dataset', train=False, transform=transforms.ToTensor(), download=True)

In [3]:
train_dataset

Dataset MNIST
    Number of datapoints: 60000
    Root location: ../dataset
    Split: Train
    StandardTransform
Transform: ToTensor()

In [4]:
test_dataset

Dataset MNIST
    Number of datapoints: 10000
    Root location: ../dataset
    Split: Test
    StandardTransform
Transform: ToTensor()

In [5]:
train_dataset.data[0].shape

torch.Size([28, 28])

In [6]:
batch_size = 50
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

### Model
构造线性回归模型

In [7]:
class LogisticRegression(nn.Module):
    def __init__(self, dim_in, dim_out):
        super().__init__()
        self.linear = nn.Linear(dim_in, dim_out)
    def forward(self, x):
        return self.linear(x)

模型没有大变化，损失项被定义为 `nn.CrossEntropyLoss()`，即交叉熵。

In [8]:
dim_in = 28*28
dim_out = 10
lrate = 0.001
epochs = 10

model = LogisticRegression(dim_in, dim_out).double()
criterion = nn.CrossEntropyLoss()
optim = torch.optim.SGD(model.parameters(), lr=lrate)

注意：每次反向传播的时候都需要将参数的梯度归零。  
`optim.step()`则在每个`Variable`的`grad`都被计算出来后，更新每个`Variable`的数值

在每次训练中都用`train_loader`中的一个`batch`作为训练数据。

In [9]:
result = []
for e in range(epochs):
    for i, (inputs, targets) in enumerate(train_loader):
        inputs = inputs.reshape(-1, 28*28).double()
        optim.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optim.step()
    result.append(float(loss))
    
fig = figure()
fig.line(range(len(result)), result)
show(fig)

### Result
检验预测结果

`re = torch.max(Tensor,dim)`, 返回的re为一个二维向量，其中`re[0]`为最大值的`Tensor`，re[1]为最大值对应的`index`

In [10]:
correct = 0
total = 0
for i, (inputs, targets) in enumerate(test_loader):
    inputs = inputs.reshape(-1, 28*28).double()
    outputs = model(inputs)
    _, preds = torch.max(outputs.data, 1)
    total += len(outputs)
    correct += (preds == targets).sum()
accuracy = 100 * correct.double() / total
print('Accuracy of the model on the 10000 test images: %.2f %%' % (accuracy))

Accuracy of the model on the 10000 test images: 87.28 %


### Save Model

In [11]:
torch.save(model.state_dict(), 'logistic_regression_model.pkl')