# 卷积神经网络

In [21]:
import torch
import torch.nn as nn

In [22]:
def conv_example(in_channel, kernel):
    # in_channel: (28, 28)
    # kernel:     ( 5,  5)
    output = torch.zeros(24, 24)
    for h in range(24):
        for w in range(24):
            inputs = in_channel[h: h + 5, w: w + 5]
            output[h, w] = (inputs * kernel).sum()
    return output

In [23]:
m = nn.Conv2d(1, 1, (5, 5), bias=False)

In [24]:
x = torch.randn(1, 1, 28, 28)

In [25]:
re = m(x)
re.shape

torch.Size([1, 1, 24, 24])

In [26]:
m.weight.shape

torch.Size([1, 1, 5, 5])

In [27]:
re1 = conv_example(x.squeeze(), m.weight.squeeze())

In [28]:
re1.shape

torch.Size([24, 24])

In [29]:
torch.all((re - re1).abs() < 0.001)

tensor(True)

In [30]:
m1 = nn.Conv2d(3, 4, (5, 5))
m1.weight.shape

torch.Size([4, 3, 5, 5])

In [31]:
x1 = torch.randn(10, 3, 28, 28)
m1(x1).shape

torch.Size([10, 4, 24, 24])

In [32]:
p = nn.MaxPool2d(2, 2)
p(x1).shape

torch.Size([10, 3, 14, 14])

### 卷积神经网络的实现

In [33]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

torch.manual_seed(12046)

<torch._C.Generator at 0x123e80d10>

In [34]:
dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
train_set, val_set = random_split(dataset, [50000, 10000])
test_set = datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())

train_loader = DataLoader(train_set, batch_size=500, shuffle=True)
val_loader = DataLoader(val_set, batch_size=500, shuffle=True)
test_loader = DataLoader(test_set, batch_size=500, shuffle=True)


In [35]:
class CNN(nn.Module):

  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(1, 20, (5, 5))
    self.pool1 = nn.MaxPool2d(2, 2)
    self.conv2 = nn.Conv2d(20, 40, (5, 5))
    self.pool2 = nn.MaxPool2d(2, 2)
    self.fc1 = nn.Linear(40 * 4 * 4, 120)
    self.fc2 = nn.Linear(120, 10)

  def forward(self, x):
    # x : (B, 1, 28, 28)
    B = x.shape[0]                         # (B,  1, 28, 28)
    x = F.relu(self.conv1(x))              # (B, 20, 24, 24)
    x = self.pool1(x)                      # (B, 20, 12, 12)
    x = F.relu(self.conv2(x))              # (B, 40,  8,  8)
    x = self.pool2(x)                      # (B, 40,  4,  4)
    x = F.relu(self.fc1(x.view(B, -1)))    # (B, 120)
    x = self.fc2(x)                        # (B, 10)
    return x
  
model = CNN()

In [36]:
eval_iters = 10

def estimate_loss(model):
  re = {}
  # 将模型切换为评估模式
  model.eval()
  re['train'] = _loss(model, train_loader)
  re['val'] = _loss(model, val_loader)
  re['test'] = _loss(model, test_loader)
  # 将模型切换为训练模式
  model.train()
  return re

@torch.no_grad()
def _loss(model, dataloader):
  # 估计模型效果
  loss = []
  acc = []
  data_iter = iter(dataloader)
  for t in range(eval_iters):
    inputs, labels = next(data_iter)
    # inputs: (500, 1, 28, 28)
    # labels: (500)
    B, C, H, W = inputs.shape
    #logits = model(inputs.view(B, -1))
    logits = model(inputs)
    loss.append(F.cross_entropy(logits, labels))
    # preds = torch.argmax(F.softmax(logits, dim=-1), dim=-1)
    preds = torch.argmax(logits, dim=-1)
    acc.append((preds == labels).sum() / B)
  re = {
    'loss': torch.tensor(loss).mean().item(),
    'acc': torch.tensor(acc).mean().item()
  }
  return re

In [41]:
def train_model(model, optimizer, epochs=10, penalty=False):
  lossi = []
  for e in range(epochs):
    for data in train_loader:
      inputs, lables = data
      logits = model(inputs)
      loss = F.cross_entropy(logits, lables)
      lossi.append(loss.item())
      if penalty:
        w = torch.cat([p.view(-1) for p in model.parameters()])
        loss += 0.001 * w.abs().sum() + 0.002 * w.square().sum()
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
    stats = estimate_loss(model)
    train_loss = f'{stats["train"]["loss"]:.3f}'
    val_loss = f'{stats["val"]["loss"]:.3f}'
    test_loss = f'{stats["test"]["loss"]:.3f}'
    print(f'epoch {e} train {train_loss} val {val_loss} test {test_loss}')
  return lossi

In [42]:
_ = train_model(model, optim.Adam(model.parameters(), lr=0.01))

epoch 0 train 0.069 val 0.072 test 0.060
epoch 1 train 0.043 val 0.062 test 0.054
epoch 2 train 0.037 val 0.041 test 0.040
epoch 3 train 0.037 val 0.049 test 0.045
epoch 4 train 0.022 val 0.041 test 0.041
epoch 5 train 0.020 val 0.040 test 0.045
epoch 6 train 0.023 val 0.045 test 0.037
epoch 7 train 0.014 val 0.040 test 0.038
epoch 8 train 0.011 val 0.038 test 0.043
epoch 9 train 0.009 val 0.032 test 0.037


In [43]:
estimate_loss(model)

{'train': {'loss': 0.013266381807625294, 'acc': 0.9953999519348145},
 'val': {'loss': 0.03646098077297211, 'acc': 0.9896000027656555},
 'test': {'loss': 0.036671943962574005, 'acc': 0.9911999702453613}}

In [46]:
class CNN2(nn.Module):

  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(1, 20, (5, 5))
    self.ln1 = nn.LayerNorm([20, 24, 24])
    self.pool1 = nn.MaxPool2d(2, 2)
    self.conv2 = nn.Conv2d(20, 40, (5, 5))
    self.ln2 = nn.LayerNorm([40, 8, 8])
    self.pool2 = nn.MaxPool2d(2, 2)
    self.fc1 = nn.Linear(40 * 4 * 4, 120)
    self.dp = nn.Dropout(0.2)
    self.fc2 = nn.Linear(120, 10)

  def forward(self, x):
    # x : (B, 1, 28, 28)
    B = x.shape[0]                        # (B,  1, 28, 28)
    x = F.relu(self.ln1(self.conv1(x)))   # (B, 20, 24, 24)
    x = self.pool1(x)                     # (B, 20, 12, 12)
    x = F.relu(self.ln2(self.conv2(x)))   # (B, 40,  8,  8)
    x = self.pool2(x)                     # (B, 40,  4,  4)
    x = F.relu(self.fc1(x.view(B, -1)))   # (B, 120)
    x = self.dp(x)
    x = self.fc2(x)                       # (B, 10)
    return x
  
model2 = CNN2()

In [47]:
_ = train_model(model2, optim.Adam(model2.parameters(), lr=0.01))

epoch 0 train 0.098 val 0.108 test 0.106
epoch 1 train 0.044 val 0.054 test 0.044
epoch 2 train 0.038 val 0.045 test 0.048
epoch 3 train 0.030 val 0.048 test 0.040
epoch 4 train 0.036 val 0.055 test 0.048
epoch 5 train 0.015 val 0.038 test 0.029
epoch 6 train 0.023 val 0.043 test 0.034
epoch 7 train 0.011 val 0.033 test 0.030
epoch 8 train 0.011 val 0.032 test 0.031
epoch 9 train 0.011 val 0.036 test 0.038


In [48]:
estimate_loss(model2)

{'train': {'loss': 0.01224433071911335, 'acc': 0.9962000846862793},
 'val': {'loss': 0.044408101588487625, 'acc': 0.9894000291824341},
 'test': {'loss': 0.03508155792951584, 'acc': 0.991399884223938}}