In [1]:
import torch
import torchvision.datasets as dataset
import torchvision.transforms as transforms
import torch.utils.data as data_utils

In [2]:
# data
train_data = dataset.MNIST(root="mnist",
                           train=True,
                           transform=transforms.ToTensor(),
                           download=True)

test_data = dataset.MNIST(root="mnist",
                           train=False,
                           transform=transforms.ToTensor(),
                           download=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to mnist\MNIST\raw\train-images-idx3-ubyte.gz


HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…

Extracting mnist\MNIST\raw\train-images-idx3-ubyte.gz to mnist\MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to mnist\MNIST\raw\train-labels-idx1-ubyte.gz


HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…

Extracting mnist\MNIST\raw\train-labels-idx1-ubyte.gz to mnist\MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to mnist\MNIST\raw\t10k-images-idx3-ubyte.gz



HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…

Extracting mnist\MNIST\raw\t10k-images-idx3-ubyte.gz to mnist\MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to mnist\MNIST\raw\t10k-labels-idx1-ubyte.gz


HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…

Extracting mnist\MNIST\raw\t10k-labels-idx1-ubyte.gz to mnist\MNIST\raw
Processing...


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


Done!


In [3]:
# net
class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv =torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, kernel_size=5, padding=2), # 灰度图，输入channel为1，定义输出channel为32，卷积核5*5
            torch.nn.BatchNorm2d(32),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2)
        )
        self.fc = torch.nn.Linear(14 * 14 * 32, 10) # 输入图片28*28，pooling为2，所以输出为14*14，channel为32, 10表示0-9
    def forward(self, x):
        out = self.conv(x) 
        out = out.view(out.size()[0], -1) # 通过view对shape修改，卷积后的tensor为4阶，nchw的顺序，out.size()[0]是batchsize
        out = self.fc(out)
        return out
cnn = CNN()
cnn = cnn.cuda()




In [4]:
# batchsize
train_loader = data_utils.DataLoader(dataset=train_data,
                                     batch_size=64,
                                     shuffle=True)

test_loader = data_utils.DataLoader(dataset=test_data,
                                     batch_size=64,
                                     shuffle=True)

In [5]:
# loss
loss_func = torch.nn.CrossEntropyLoss()

In [6]:
# optimizer
optimizer = torch.optim.Adam(cnn.parameters(), lr=0.01)

In [7]:
# training
for epoch in range(10):
    for i, (images, labels) in enumerate(train_loader):
        images = images.cuda()
        labels = labels.cuda()

        outputs = cnn(images)
        loss = loss_func(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print("epoch is {}, ite is "
          "{}/{}, loss is {}".format(epoch+1, i,
                                     len(train_data) // 64, # batchsize为64
                                     loss.item())) # loss为tensor，loss.item()为具体的值
    # eval/test
    loss_test = 0
    accuracy = 0
    for i, (images, labels) in enumerate(test_loader):
        images = images.cuda()
        labels = labels.cuda()
        outputs = cnn(images)
        #[batchsize]
        #outputs = batchsize * cls_num
        loss_test += loss_func(outputs, labels)
        _, pred = outputs.max(1) # 相当于one-hot
        accuracy += (pred == labels).sum().item()

    accuracy = accuracy / len(test_data)
    loss_test = loss_test / (len(test_data) // 64)

    print("epoch is {}, accuracy is {}, "
          "loss test is {}".format(epoch + 1,
                                   accuracy,
                                   loss_test.item()))

epoch is 1, ite is 937/937, loss is 0.06873176246881485
epoch is 1, accuracy is 0.9766, loss test is 0.07042369246482849
epoch is 2, ite is 937/937, loss is 0.05819771811366081
epoch is 2, accuracy is 0.9855, loss test is 0.04847172647714615
epoch is 3, ite is 937/937, loss is 0.001544624101370573
epoch is 3, accuracy is 0.9845, loss test is 0.04848683997988701
epoch is 4, ite is 937/937, loss is 0.06189391389489174
epoch is 4, accuracy is 0.979, loss test is 0.06930312514305115
epoch is 5, ite is 937/937, loss is 0.025315267965197563
epoch is 5, accuracy is 0.9843, loss test is 0.05037533864378929
epoch is 6, ite is 937/937, loss is 0.006601848639547825
epoch is 6, accuracy is 0.9832, loss test is 0.05782075971364975
epoch is 7, ite is 937/937, loss is 0.007877813652157784
epoch is 7, accuracy is 0.9851, loss test is 0.05763240531086922
epoch is 8, ite is 937/937, loss is 0.00038773377309553325
epoch is 8, accuracy is 0.9815, loss test is 0.07472898066043854
epoch is 9, ite is 937/937

In [8]:
# save model
torch.save(cnn, "mnist_model.pkl")

In [9]:
# inference
cnn = torch.load("mnist_model.pkl")
cnn = cnn.cuda()

loss_test = 0
accuracy = 0

import cv2

for i, (images, labels) in enumerate(test_loader):
    images = images.cuda()
    labels = labels.cuda()
    outputs = cnn(images)
    _, pred = outputs.max(1)
    accuracy += (pred == labels).sum().item()

    images = images.cpu().numpy()
    labels = labels.cpu().numpy()
    pred = pred.cpu().numpy()
    #batchsize * 1 * 28 * 28

    for idx in range(images.shape[0]):
        im_data = images[idx]
        im_label = labels[idx]
        im_pred = pred[idx]
        im_data = im_data.transpose(1, 2, 0)
accuracy = accuracy / len(test_data)
print(accuracy)

0.9822
