# 实验2 运用CNN进行MNIST手写数字识别

本次实验是经典的MNIST手写数字识别，要求使用CNN来实现。因为我修过这方面的专业选修课，所以准备使用我以前用过的AlexNet来进行分类。AlexNet使用pytorch来搭建。

In [3]:
# 导包
import torch
from torch import nn, optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F

In [4]:
# 用pytorch官方的API读取数据
training_data = datasets.MNIST(
    root="./data",
    train=True,
    download=True,
    transform=transforms.ToTensor()
)

test_data = datasets.MNIST(
    root="./data",
    train=False,
    download=True,
    transform=transforms.ToTensor()
)

batch_size = 100

train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

In [5]:
# 查看使用硬件
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using "+ str(device))

Using cuda


AlexNet通过5层卷积核提取出图片特征，最后用一个简单的前馈神经网络的分类器用于最后的分类

In [6]:
# 定义卷积神经网络
class AlexNet(nn.Module):
    def __init__(self,):
        super(AlexNet, self).__init__()
        self.feature_extraction = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(in_features=256 * 3 * 3, out_features=1024),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(in_features=1024, out_features=512),
            nn.ReLU(inplace=True),
            nn.Linear(in_features=512, out_features=10),
        )

    def forward(self, inputs):
        inputs = self.feature_extraction(inputs)
        inputs = inputs.view(-1, 256 * 3 * 3)
        inputs = self.classifier(inputs)
        return F.log_softmax(inputs, dim=1)


model = AlexNet()
print(model)

if torch.cuda.is_available():
    model.cuda()

AlexNet(
  (feature_extraction): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=2304, out_features=1024, bias=True)
    (2): ReLU(inplace=True)
    (3): Dropout(p

In [7]:
# 定义损失函数和优化算法
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [8]:
# 定义训练函数和测试函数
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test(dataloader, model):
    size = len(dataloader.dataset)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= size
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [9]:
# 定义epoch数量并开始训练
epochs = 30
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model)
print("Done!")

Epoch 1
-------------------------------
loss: 2.299282  [    0/60000]
loss: 2.301512  [10000/60000]
loss: 2.299158  [20000/60000]
loss: 2.302033  [30000/60000]
loss: 2.286094  [40000/60000]
loss: 2.138496  [50000/60000]
Test Error: 
 Accuracy: 81.1%, Avg loss: 0.005829 

Epoch 2
-------------------------------
loss: 0.587896  [    0/60000]
loss: 0.351457  [10000/60000]
loss: 0.339950  [20000/60000]
loss: 0.146083  [30000/60000]
loss: 0.105740  [40000/60000]
loss: 0.033158  [50000/60000]
Test Error: 
 Accuracy: 95.8%, Avg loss: 0.001320 

Epoch 3
-------------------------------
loss: 0.059357  [    0/60000]
loss: 0.095500  [10000/60000]
loss: 0.092435  [20000/60000]
loss: 0.024516  [30000/60000]
loss: 0.043396  [40000/60000]
loss: 0.024010  [50000/60000]
Test Error: 
 Accuracy: 98.4%, Avg loss: 0.000489 

Epoch 4
-------------------------------
loss: 0.025964  [    0/60000]
loss: 0.068566  [10000/60000]
loss: 0.074376  [20000/60000]
loss: 0.018576  [30000/60000]
loss: 0.022572  [40000/6

可以看到在10个epoch之后在测试集上的准确率就已经达到了99%，之后一直在99%浮动，最后稳定在99.3%，达到了这次作业的要求，如果要达到老师说的99.7%的准确率，可能要使用其他网络结构了。