In [6]:
import os
import torch
from torch import nn, optim
import torch.nn.functional as F
#from torch.autograd import Variable
from torchvision import datasets, transforms

In [7]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

In [8]:
trainset = datasets.MNIST('data', train=True, download=True, transform=transform)
testset = datasets.MNIST('data', train=False, download=True, transform=transform)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


96.7%IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

102.8%


Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


100.0%


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


112.7%

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw






In [9]:
class LeNet(nn.Module):
    # 定义Net的初始化函数，本函数定义了神经网络的基本结构
    def __init__(self):
        # 继承父类的初始化方法，即先运行nn.Module的初始化函数
        super(LeNet,self).__init__()
        # C1卷积层：输入1张灰度图片，输出6张特征图，卷积核5x5
        self.c1 = nn.Conv2d(1,6,(5,5))
        # C3卷积层：输入6张特征图，输出16张特征图，卷积核5x5
        self.c3 = nn.Conv2d(6,16,5)
        # 全连接层S4->C5：从S4到C5是全连接，S4层中16*4*4个节点全连接到C5层的120个节点上
        self.fc1 = nn.Linear(16*4*4,120)
        # 全连接层C5->F6：C5层的120个节点全连接到F6的84个节点上
        self.fc2 = nn.Linear(120,84)
        # 全连接层F6->OUTPUT：F6层的84个节点全连接到OUTPUT层的10个节点上，10个节点的输出代表着0到9的不同分值。
        self.fc3 = nn.Linear(84,10)

    # 定义向前传播函数
    def forward(self,x):
        # 输入的灰度图片x经过c1的卷积之后得到6张特征图，然后使用relu函数，增强网络的非线性拟合能力，接着使用2x2窗口的最大池化，然后更新到x
        x = F.max_pool2d(F.relu(self.c1(x)),2)
        # 输入x经过c3的卷积之后由原来的6张特征图变成16张特征图，经过relu函数，并使用最大池化后将结果更新到x
        x = F.max_pool2d(F.relu(self.c3(x)),2)
        # 使用view函数将张量x（S4）变形成一维向量形式，总特征数不变，为全连接层做准备
        x = x.view(-1,self.num_flat_features(x))
        # 输入S4经过全连接层fc1，再经过relu，更新到x
        x = F.relu(self.fc1(x))
        # 输入C5经过全连接层fc2，再经过relu，更新到x
        x = F.relu(self.fc2(x))
        # 输入F6经过全连接层fc3，更新到x
        x = self.fc3(x)
        return x

    # 计算张量x的总特征量
    def num_flat_features(self,x):
        # 由于默认批量输入，第零维度的batch剔除
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [11]:
lenet = LeNet()

criterion=nn.CrossEntropyLoss()
optimizer = optim.SGD(lenet.parameters(),lr=0.001,momentum=0.9)

In [12]:
trainloader = torch.utils.data.DataLoader(trainset,batch_size=4, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset,batch_size=4, shuffle=False, num_workers=2)

In [15]:
def train(model,criterion,optimizer,epochs=1):
    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader,0):
            inputs,labels = data

            optimizer.zero_grad()
            outputs = model(inputs)

            loss = criterion(outputs,labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            if i%1000==999:
                print('[Epoch:%d, Batch:%5d] Loss: %.3f' % (epoch+1, i+1, running_loss / 1000))
                running_loss = 0.0

    print('Finished Training')

In [16]:
train(lenet,criterion,optimizer,epochs=2)

[Epoch:1, Batch: 1000] Loss: 1.579
[Epoch:1, Batch: 2000] Loss: 0.329
[Epoch:1, Batch: 3000] Loss: 0.200
[Epoch:1, Batch: 4000] Loss: 0.170
[Epoch:1, Batch: 5000] Loss: 0.142
[Epoch:1, Batch: 6000] Loss: 0.106
[Epoch:1, Batch: 7000] Loss: 0.120
[Epoch:1, Batch: 8000] Loss: 0.103
[Epoch:1, Batch: 9000] Loss: 0.102
[Epoch:1, Batch:10000] Loss: 0.107
[Epoch:1, Batch:11000] Loss: 0.102
[Epoch:1, Batch:12000] Loss: 0.099
[Epoch:1, Batch:13000] Loss: 0.093
[Epoch:1, Batch:14000] Loss: 0.076
[Epoch:1, Batch:15000] Loss: 0.078
[Epoch:2, Batch: 1000] Loss: 0.070
[Epoch:2, Batch: 2000] Loss: 0.062
[Epoch:2, Batch: 3000] Loss: 0.056
[Epoch:2, Batch: 4000] Loss: 0.072
[Epoch:2, Batch: 5000] Loss: 0.064
[Epoch:2, Batch: 6000] Loss: 0.063
[Epoch:2, Batch: 7000] Loss: 0.064
[Epoch:2, Batch: 8000] Loss: 0.053
[Epoch:2, Batch: 9000] Loss: 0.048
[Epoch:2, Batch:10000] Loss: 0.053
[Epoch:2, Batch:11000] Loss: 0.052
[Epoch:2, Batch:12000] Loss: 0.063
[Epoch:2, Batch:13000] Loss: 0.060
[Epoch:2, Batch:1400

In [17]:
torch.save(lenet,'model.pkl')

In [None]:
lenet=torch.load('model.pkl')