In [1]:
import torch
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision import datasets

# 卷积神经网络

## 相关概念

- Feature Extraction - 特征提取器: 利用卷积层提取特征
- Classifier - 分类器: 利用全连接层进行分类
- Convolutional Neural Network - 卷积神经网络
- Convolutional Layer - 卷积层
- Pooling Layer - 池化层
- Fully Connected Layer - 全连接层
- Activation Function - 激活函数
- Loss Function - 损失函数
- Optimizer - 优化器
- Batch Normalization - 批量归一化
- Dropout - Dropout
- Weight Initialization - 权重初始化
- Regularization - 正则化
- Activation - 激活函数



## Convolutional Layer

In [2]:
in_channel, out_channel, kernel_size = 5, 10, 3
width, height = 100, 100
batch_size = 1

input = torch.randn(batch_size, in_channel, width, height)
conv_layer = torch.nn.Conv2d(in_channel, out_channel, kernel_size)
output = conv_layer(input)

print(input.shape)
print(output.shape)
print(conv_layer.weight.shape)

torch.Size([1, 5, 100, 100])
torch.Size([1, 10, 98, 98])
torch.Size([10, 5, 3, 3])


### Padding = 1 & Stride = 2

参数说明: 
- Padding: 填充大小，默认为0，表示不填充。
- Stride: 步长，默认为1。

In [3]:
input = [
    3, 4, 6, 5, 7,
    2, 4, 6, 8, 2, 
    1, 6, 7, 8, 4, 
    9, 7, 4, 6, 2, 
    3, 7, 5, 4, 1
]
input = torch.tensor(input, dtype=torch.float32).view(1, 1, 5, 5)

conv_layer = torch.nn.Conv2d(1, 1, 3, padding=1, stride=2, bias=False)
kernel = torch.Tensor([1, 2, 3, 4, 5, 6, 7, 8, 9]).view(1, 1, 3, 3)
conv_layer.weight.data = kernel.data

output = conv_layer(input)
print(output)

tensor([[[[ 91., 224., 127.],
          [192., 282., 122.],
          [ 96., 110.,  31.]]]], grad_fn=<ConvolutionBackward0>)


## 下采样: Max Pooling - 最大池化层

通道数不变, 宽高缩小为原来的一半

In [4]:
input = [
    3, 4, 6, 5, 
    2, 4, 6, 8, 
    1, 6, 7, 8,
    9, 7, 4, 6,
]
input = torch.tensor(input, dtype=torch.float32).view(1, 1, 4, 4)

maxpooling_layer = torch.nn.MaxPool2d(2)

output = maxpooling_layer(input)
print(output)

tensor([[[[4., 8.],
          [9., 8.]]]])


## Prepare Dataset (ignore this part)

In [5]:
batch_size = 64
transform = transforms.Compose([
    transforms.ToTensor(),                      # Convert PIL image to PyTorch Tensor.
    transforms.Normalize((0.1307,), (0.3081,))  # Normalize using the mean value and std loss value
])

train_dataset = datasets.MNIST(root='./dataset/mnist', train=True, transform=transform, download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = datasets.MNIST(root='./dataset/mnist', train=False, transform=transform, download=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

## 网络实现

In [6]:
class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = torch.nn.Conv2d(1, 10, 5)
        self.conv2 = torch.nn.Conv2d(10, 20, 5)
        self.pooling = torch.nn.MaxPool2d(2)
        self.fc = torch.nn.Linear(320, 10)

    def forward(self, x):
        # Flatten data from (n, 1, 28, 28) to (n, 784)
        batch_size = x.size(0)
        x = self.pooling(F.relu(self.conv1(x)))
        x = self.pooling(F.relu(self.conv2(x)))
        x = x.view(batch_size, -1)              # Flatten
        x = self.fc(x)
        return x
    
model = Net()

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

## Move model to GPU

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [7]:
def train(epoch: int):
    running_loss = 0.0
    for batch_idx, (inputs, target) in enumerate(train_loader):
        inputs, target = inputs.to(device), target.to(device)
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if batch_idx % 100 == 99:
            print(f'[{epoch + 1}, {batch_idx + 1:5d}] loss: {running_loss / 100:.3f}')
            running_loss = 0.0

def test():
    correct = 0
    total = 0
    with torch.no_grad():
        for (images, labels) in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy on test set: {100 * correct / total: .4f}%')

if __name__ == '__main__':
    for epoch in range(10):
        train(epoch)
        if epoch % 10 == 9:
            test()

[1,   100] loss: 0.998
[1,   200] loss: 0.314
[1,   300] loss: 0.230
[1,   400] loss: 0.195
[1,   500] loss: 0.170
[1,   600] loss: 0.145
[1,   700] loss: 0.150
[1,   800] loss: 0.131
[1,   900] loss: 0.115
[2,   100] loss: 0.107
[2,   200] loss: 0.107
[2,   300] loss: 0.093
[2,   400] loss: 0.076
[2,   500] loss: 0.093
[2,   600] loss: 0.087
[2,   700] loss: 0.095
[2,   800] loss: 0.086
[2,   900] loss: 0.082
[3,   100] loss: 0.085
[3,   200] loss: 0.080
[3,   300] loss: 0.075
[3,   400] loss: 0.069
[3,   500] loss: 0.070
[3,   600] loss: 0.064
[3,   700] loss: 0.068
[3,   800] loss: 0.058
[3,   900] loss: 0.070
[4,   100] loss: 0.061
[4,   200] loss: 0.062
[4,   300] loss: 0.066
[4,   400] loss: 0.061
[4,   500] loss: 0.057
[4,   600] loss: 0.062
[4,   700] loss: 0.059
[4,   800] loss: 0.064
[4,   900] loss: 0.057
[5,   100] loss: 0.063
[5,   200] loss: 0.051
[5,   300] loss: 0.059
[5,   400] loss: 0.050
[5,   500] loss: 0.052
[5,   600] loss: 0.051
[5,   700] loss: 0.046
[5,   800] 

## 作业

- 设计一个更加复杂的CNN: 
  - ConvLayer * 3
  - ReLU Layer * 3
  - MaxPoolingLayer * 3
  - Linear Layer * 3
- 尝试一个不同配置的CNN: 
  - 比较不同配置的CNN的性能