# 任务1：PyTorch搭建LeNet模型进行MNIST分类

**任务要求：利用PyTorch框架搭建一个LeNet模型，并针对MNIST数据集进行训练和测试。**  
  
同样的，__你需要在TODO标注部分填写你的代码。__

In [1]:
import torch
from torch import nn, optim
from torch.autograd import Variable
from torch.nn import functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
use_gpu = True if torch.cuda.is_available() else False
print('Use GPU:', use_gpu)

Use GPU: False


## 1. 数据集加载

利用torchvision的datasets加载MNSIT数据集

In [3]:
# 图像处理方式
transform = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize([0.5],[0.5])]
)

In [4]:
# 加载训练数据集
batch_size = 64
train_dataset = datasets.MNIST(root='./data/', train=True, transform=transform, download=True)
print(train_dataset)

# 创建训练dataLoader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
train_num = len(train_dataset)
print('train image num:', train_num)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=9912422.0), HTML(value='')))


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=28881.0), HTML(value='')))


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=1648877.0), HTML(value='')))


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=4542.0), HTML(value='')))


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw

Processing...
Done!
Dataset MNIST
    Number of datapoints: 60000
    Root location: ./data/
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=[0.5], std=[0.5])
           )
train image num: 60000


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [5]:
# 加载测试数据集
test_dataset = datasets.MNIST(root='./data/', train=False, transform=transform, download=True)
print(test_dataset)

# 创建测试dataLoader
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
test_num = len(test_dataset)
print('test image num:', test_num)

Dataset MNIST
    Number of datapoints: 10000
    Root location: ./data/
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=[0.5], std=[0.5])
           )
test image num: 10000


## 2. LeNet模型构建

利用PyTorch构建LeNet-5模型  

友情链接(将会使用到的pytorch语句)：[nn.Conv2d()](https://blog.csdn.net/qq_38863413/article/details/104108808) / [nn.MaxPool2d()](https://blog.csdn.net/qq_36387683/article/details/107638184) / [nn.Linear()](https://blog.csdn.net/qq_42079689/article/details/102873766) / [F.relu()](https://pytorch.org/docs/stable/generated/torch.nn.functional.relu.html?highlight=f%20relu#torch.nn.functional.relu)  

[卷积前后图像大小变化公式(定义卷积操作时需要计算)](https://blog.csdn.net/qq_32256033/article/details/103345337)

![jupyter](./data/lenet_model.png)

In [6]:
#######################################################################
# TODO:                                                               #
# 使用pytorch搭建LeNet模型                                            #
#######################################################################

class LeNet(nn.Module):
    
    def __init__(self):
        super(LeNet, self).__init__()
        
        ''' 
        请在此处定义LeNet模型将会用到的模块
        
        提示：
        包括卷积层，采样层，全连接层，激活函数等
        你将会用到nn.Conv2d()/nn.MaxPool2d()/nn.Linear()/F.relu()等pytorch接口函数，
        
        例如，你可以定义某个卷积操作为
        self.conv = nn.Conv2d(......)
        '''
        
        '''
        LeNet网络结构：
            input -> 卷积层1 -> 激活 -> 池化 ->卷积层2 -> 激活 -> 池化 -> fatten -> fc1 -> fc2 > fc3
        '''
        # 通道数由1变为6 所以chan_in = 1 chan_out = 6
        # 网络结构里的input是32*32 但是数据集的图片 是28*28 所以周围补上2个0 使得28*28 变为 32*32 所以padding=2
        # 尺寸从32变为28 计算可得 卷积核大小为5
        self.conv1 = nn.Conv2d(in_channels=1,out_channels=6,kernel_size=5,padding=2)
        # 激活函数选择relu
        self.activate = nn.ReLU()
        # 由上面的网络结构图可知 池化层的核大小为（2，2）
        self.pooling = nn.MaxPool2d(kernel_size=2)
        # 通道数由6变为16 所以chan_in = 6 chan_out = 16
        # 尺寸从14变为28 计算可得 卷积核大小为10
        self.conv2 = nn.Conv2d(in_channels=6,out_channels=16,kernel_size=5)
        # 将feature map平铺成向量
        self.fatten = nn.Flatten()
        # 全连接层 从图中可得 输入神经元16*5*5个 输出神经元120个
        self.fc1 = nn.Linear(16*5*5,120)
        # 全连接层 从图中可得 输入神经元120个 输出神经元84个
        self.fc2 = nn.Linear(120,84)
        # 全连接层 从图中可得 输入神经元84个 输出神经元10个 之所以最后是10  是因为数字0-9是10个类别
        self.fc3 = nn.Linear(84,10)
        
        
    def forward(self, x):
        
        ''' 请在此处编写LeNet模型处理图像的过程 '''
        
        # 提示
        # 与上图模型结构图不同的是，输入的图像形状为28×28
        # 即x: (64, 1, 28, 28)

        # C1卷积阶段
        # 你需要调用定义的卷积操作，以得到shape为(64, 6, 28, 28)的特征
        x = self.conv1(x)

        # 激活函数，你需要用到relu函数
        x =self.activate(x)

        # S2池化阶段
        # 你需要调用定义的池化操作，以得到shape为(64, 6, 14, 14)的特征
        x = self.pooling(x)

        # C3卷积阶段
        # 你需要调用定义的卷积操作，以得到shape为(64, 16, 10, 10)的特征
        x = self.conv2(x)

        # 激活函数，你需要用到relu函数
        x = self.activate(x)

        # S4池化阶段
        # 你需要调用定义的池化操作，以得到shape为(64, 16, 5, 5)的特征
        x = self.pooling(x)

        # 此阶段你需要将特征平铺，以得到shape为(64, 400)的特征
        x = self.fatten(x)

        # C5全连接阶段
        # 你需要调用定义的全连接操作，以得到shape为(64, 120)的特征
        x = self.fc1(x)

        # 激活函数，你需要用到relu函数
        x =self.activate(x)

        # F6全连接阶段
        # 你需要调用定义的全连接操作，以得到shape为(64, 84)的特征
        x =self.fc2(x)

        # 激活函数，你需要用到relu函数
        x = self.activate(x)

        # OUTPUT
        # 你需要调用定义的全连接操作，以得到shape为(64, 10)的特征
        x = self.fc3(x)

        return x

#######################################################################
#                         END OF YOUR CODE                            #
#######################################################################

In [7]:
# 实例化
cnn = LeNet()
if use_gpu:
    cnn = cnn.cuda()
print(cnn)

LeNet(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (activate): ReLU()
  (pooling): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


## 3. 优化器和损失函数

定义优化器(SGD/Adam)和交叉熵损失函数

In [8]:
# 优化器
learning_rate = 0.001
optimizer = optim.Adam(cnn.parameters(), lr=learning_rate, betas=(0.9, 0.99))
print(optimizer)

# 损失函数
criterion = nn.CrossEntropyLoss()
print(criterion)

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.99)
    eps: 1e-08
    lr: 0.001
    weight_decay: 0
)
CrossEntropyLoss()


## 4. 模型训练和测试

模型训练函数

In [9]:
# 训练CNN模型
def train(epoch):
    cnn.train()
    train_loss = 0
    correct = 0
    for batch_idx, (data, label) in enumerate(train_loader):
        if use_gpu:  # 使用GPU
            data, label = Variable(data).cuda(), Variable(label).cuda()
        
        # 模型预测结果
        predict = cnn(data)
        
        # 计算损失函数
        loss = criterion(predict, label)     
        pred = predict.data.argmax(1)
        correct += (pred==label).sum().item()
        
        # 梯度清零
        optimizer.zero_grad()
        
        # 反向传播
        loss.backward()
        
        # 更新网络参数
        optimizer.step()
        
        # 输出训练阶段loss信息
        train_loss += loss.item()
        train_loss_list.append(loss.item())
        if batch_idx % 200 == 0:
            print('Train Epoch: {} [{:05d}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), train_num,
                100 * batch_idx * len(data) / train_num, loss.item()))

    train_loss /= train_num
    accuracy = correct / train_num
    train_acc_list.append(accuracy)
    # 输出训练阶段loss信息
    print('Train Epoch: {}\tAverage loss: {:.4f}\tAccuracy: {}/{} ({:.2f}%)'.format(
        epoch, train_loss, correct, train_num, 100.0 * accuracy))

模型测试函数

In [10]:
# 测试CNN模型
def test():
    cnn.eval()
    test_loss = 0
    correct = 0
    for data, label in test_loader:
        if use_gpu:  # 使用GPU
            data, label = Variable(data).cuda(), Variable(label).cuda()
        
        # 模型预测结果
        predict = cnn(data)
        # 计算batch损失和
        loss = criterion(predict, label)
        # 预测label
        pred = predict.data.argmax(1)
        # 预测正确数
        correct += (pred==label).sum().item()
        
        test_loss += loss.item()
        test_loss_list.append(loss.item())

    test_loss /= test_num
    accuracy = correct / test_num
    test_acc_list.append(accuracy)
    # 输出测试阶段loss信息
    print('Test Epoch: {}\tAverage loss: {:.4f}\tAccuracy: {}/{} ({:.2f}%)\n'.format(
        epoch, test_loss, correct, test_num, 100.0 * accuracy))

In [None]:
import time

train_loss_list = []
test_loss_list = []
train_acc_list = []
test_acc_list = []
epoch_num = 10

start_time = time.time()
for epoch in range(1, epoch_num+1):
    # 每轮训练完测试
    train(epoch)
    test()
    
end_time = time.time()
print("时间开销："+str(end_time-start_time))

Train Epoch: 1	Average loss: 0.0042	Accuracy: 54890/60000 (91.48%)
Test Epoch: 1	Average loss: 0.0011	Accuracy: 9774/10000 (97.74%)

Train Epoch: 2	Average loss: 0.0010	Accuracy: 58784/60000 (97.97%)
Test Epoch: 2	Average loss: 0.0007	Accuracy: 9862/10000 (98.62%)

Train Epoch: 3	Average loss: 0.0007	Accuracy: 59137/60000 (98.56%)
Test Epoch: 3	Average loss: 0.0007	Accuracy: 9855/10000 (98.55%)

Train Epoch: 4	Average loss: 0.0005	Accuracy: 59328/60000 (98.88%)
Test Epoch: 4	Average loss: 0.0004	Accuracy: 9916/10000 (99.16%)

Train Epoch: 5	Average loss: 0.0004	Accuracy: 59466/60000 (99.11%)
Test Epoch: 5	Average loss: 0.0005	Accuracy: 9908/10000 (99.08%)

Train Epoch: 6	Average loss: 0.0004	Accuracy: 59575/60000 (99.29%)
Test Epoch: 6	Average loss: 0.0005	Accuracy: 9907/10000 (99.07%)

Train Epoch: 7	Average loss: 0.0003	Accuracy: 59622/60000 (99.37%)
Test Epoch: 7	Average loss: 0.0005	Accuracy: 9911/10000 (99.11%)

Train Epoch: 8	Average loss: 0.0003	Accuracy: 59678/60000 (99.46%)
Te

## 5. 损失函数和正确率曲线

训练损失函数图 & 测试损失函数图 & 训练/测试正确率图

In [None]:
plt.figure(figsize=(12,6))
plt.plot(train_loss_list)
plt.title('train loss', fontsize=18)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.grid()
plt.show()

plt.figure(figsize=(12,6))
plt.plot(test_loss_list)
plt.title('test loss', fontsize=18)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.grid()
plt.show()

plt.figure(figsize=(12,8))
plt.plot(train_acc_list, 'o-')
plt.plot(test_acc_list, 'o-')
plt.title('Accuracy', fontsize=18)
plt.xlabel('Epoch', fontsize=16)
plt.ylabel('Accuracy', fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.legend(['Train', 'Test'], fontsize=16)
plt.grid()
plt.show()

# 任务2：torchvision预训练模型测试真实图像分类

**任务要求：利用torchvision中的预训练CNN模型来对真实的图像进行分类，预测每张图片的top5类别。**  
**数据: real_image, class_index.json**

In [None]:
import torch
from torchvision import models, datasets, transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import numpy as np
import os
import json
import time
import matplotlib.pyplot as plt
%matplotlib inline

## 1. 类别索引

构建类别索引词典

In [None]:
f = open('./data/class_index.json')
class_index = json.load(f)
print('class num:', len(class_index))
class_dict = {int(k): v[1] for k, v in class_index.items()}
# print(class_dict)

### 2. 预训练模型

加载预训练CNN模型。[参考链接](https://pytorch.org/vision/stable/models.html)

In [None]:
#######################################################################
# TODO:                                                               #
# 加载预训练CNN模型                                                   #
#######################################################################

# 请在下方编写加载torchvision预训练模型代码
# 你可以对比多个模型的结果，例如alexnet/googlenet/resnet
# 注意pretrained参数设为True

model = models.googlenet(pretrained=True)


# 将模型设为测试模型，你将会用到.eval()方法


#######################################################################
#                         END OF YOUR CODE                            #
#######################################################################

### 3. 图像预处理

图像缩放、裁剪、转Tensor、归一化

In [None]:
# 图像预处理转换代码
image_transforms = transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

### 4. 测试数据集加载

构建测试数据集，迭代返回预处理后的Tensor格式图像和原始图像

In [None]:
class TestDataset():
    def __init__(self, root, transforms=None):
        imgs = os.listdir(root)
        self.imgs = [os.path.join(root, img) for img in imgs]
        self.transforms = transforms
        
    def __getitem__(self, index):
        img_path = self.imgs[index]
        img_pil = Image.open(img_path)
        label = None
        img_np = np.asarray(img_pil)
        data = self.transforms(img_pil)
        return data, img_np
    
    def __len__(self):
        return len(self.imgs)

In [None]:
test_dir = './data/real_image/'
test_dataset = TestDataset(test_dir, image_transforms)
print('test image num:', test_dataset.__len__())

### 5. 模型预测图像类别

在测试模式下，对于每张图片显示原始图像，并输出模型预测的top5类别及top1类别

In [None]:
# 预测图像类别
for i, (data, img_np) in enumerate(test_dataset):
    # 显示原始图像
    plt.imshow(img_np)
    plt.title('image-%d' % (i+1), fontsize=14)
    plt.show()
    
    # 预测图像类别
    predict = model(data.unsqueeze(0))
    predict = predict[0].cpu().data
    probs, topk_index = torch.topk(predict, k=5, dim=-1)
    pred_topk = [class_dict[k.item()] for k in topk_index]
    print('top-5:')
    for pred, prob in zip(pred_topk, probs):
        print('{}:  {:.4}%'.format(pred, prob))
    print('\ntop-1:', pred_topk[0])