<a href="https://colab.research.google.com/github/BarryLiu-97/Pytorch-Tutorial/blob/master/08_Basic_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch

In [None]:
in_channels, out_channels = 5, 10
width, height = 100, 100
kernel_size = 3
batch_size = 1

Convolution:滤波，提取特征

In [None]:
# pytorch中所有输入数据必须是小批量数据，所以必须设定batch
input = torch.randn(batch_size, in_channels, width, height) #从正态分布进行采样
# 卷积核大小一般用奇数
conv_layer = torch.nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size) # 这三个参数必须设置，卷积一般用正方形
output = conv_layer(input)

print(input.shape)
print(output.shape)
print(conv_layer.weight.shape) #输入通道数必须等于卷积核通道数

torch.Size([1, 5, 100, 100])
torch.Size([1, 10, 98, 98])
torch.Size([10, 5, 3, 3])


Padding:希望输出的矩阵大小与原图像一致，故会在原图像外周添加一圈零再进行卷积操作。  
具体padding几圈零由卷积核的长和宽决定，3×3是一圈，5×5是两圈。  
kernel_size / 2的整数部分（整除）  
padding方式很多，常见是补零

In [None]:
input = [3,4,6,5,7,
     2,4,6,8,2,
     1,6,7,8,4,
     9,7,4,6,2,
     3,7,5,4,1]
input = torch.Tensor(input).view(1, 1, 5, 5) # B*C*W*H

conv_layer = torch.nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False) # 进行卷积之后不加偏置，stride默认为1
# 人工初始化卷积层权重
kernel = torch.Tensor([1,2,3,4,5,6,7,8,9]).view(1, 1, 3, 3) # 输出通道数*输入通道数*W*H
conv_layer.weight.data = kernel.data

output = conv_layer(input)
print(output)

tensor([[[[ 91., 168., 224., 215., 127.],
          [114., 211., 295., 262., 149.],
          [192., 259., 282., 214., 122.],
          [194., 251., 253., 169.,  86.],
          [ 96., 112., 110.,  68.,  31.]]]],
       grad_fn=<MkldnnConvolutionBackward>)


In [None]:
input = [3,4,6,5,7,
     2,4,6,8,2,
     1,6,7,8,4,
     9,7,4,6,2,
     3,7,5,4,1]
input = torch.Tensor(input).view(1, 1, 5, 5) # B*C*W*H

conv_layer = torch.nn.Conv2d(1, 1, kernel_size=3, padding=1, stride=2, bias=False) # 进行卷积之后不加偏置
# 人工初始化卷积层权重
kernel = torch.Tensor([1,2,3,4,5,6,7,8,9]).view(1, 1, 3, 3) # 输出通道数*输入通道数*W*H
conv_layer.weight.data = kernel.data

output = conv_layer(input)
print(output)

tensor([[[[ 91., 224., 127.],
          [192., 282., 122.],
          [ 96., 110.,  31.]]]], grad_fn=<MkldnnConvolutionBackward>)


Pooling:降采样，降采样后通道数不变

In [None]:
input = [3,4,6,5,
     2,4,6,8,
     1,6,7,8,
     9,7,4,6,]
input = torch.Tensor(input).view(1, 1, 4, 4) # B*C*W*H

maxpooling_layer = torch.nn.MaxPool2d(kernel_size=2) #stride也被自动设成2
output = maxpooling_layer(input)
print(output)

tensor([[[[4., 8.],
          [9., 8.]]]])


# 简单的卷积神经网络处理MNIST

In [None]:
import torch
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
import time

In [None]:
batch_size = 64
transform = transforms.Compose([
  transforms.ToTensor(),              #将数据转换为channel×width×height格式，为了更高效地进行运算
  transforms.Normalize((0.1307, ), (0.3081, ))  #均值和标准差，用于数据标准化，这是对MNIST进行计算后得到的结果，已经算好了
])

train_dataset = datasets.MNIST(root='../dataset/mnist',
                train=True, download=True,
                transform = transform)
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)

test_dataset = datasets.MNIST(root='../dataset/mnist',
                train=False, download=True,
                transform = transform)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../dataset/mnist/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../dataset/mnist/MNIST/raw/train-images-idx3-ubyte.gz to ../dataset/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../dataset/mnist/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../dataset/mnist/MNIST/raw/train-labels-idx1-ubyte.gz to ../dataset/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../dataset/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../dataset/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz to ../dataset/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../dataset/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../dataset/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../dataset/mnist/MNIST/raw
Processing...
Done!




In [None]:
class Net(torch.nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.conv1 = torch.nn.Conv2d(1, 10, kernel_size=5)
    self.conv2 = torch.nn.Conv2d(10, 20, kernel_size=5)
    self.pooling = torch.nn.MaxPool2d(2)
    self.fc = torch.nn.Linear(320, 10)# 320是经过maxpooling后的元素数量：20*4*4

  def forward(self, x):

    batch_size = x.size(0)
    x = F.relu(self.pooling(self.conv1(x)))  # 先relu再池化与先池化再relu区别不大？
    x = F.relu(self.pooling(self.conv2(x)))
    x = x.view(batch_size, -1) # flatten
    x = self.fc(x)
    return x

model = Net()

In [None]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)  #冲量值设置为0.5，优化训练过程

如何在GPU中进行训练

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 若当前cuda是可行的，使用第一个可见的设备(即GPU)
model.to(device) # 参数、缓存等，所有的模块放入cuda，使用GPU

Net(
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (pooling): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): Linear(in_features=320, out_features=10, bias=True)
)

用于计算的张量也要转移到cuda中

In [None]:
def train(epoch):
  running_loss = 0.
  for batch_idx, data in enumerate(train_loader, 0):
    inputs, target = data
    inputs, target = inputs.to(device), target.to(device)  # 转移到cuda，且在同一块显卡上
    optimizer.zero_grad()

    # forward + backard + update
    outputs = model(inputs)
    loss = criterion(outputs, target)
    loss.backward()
    optimizer.step()

    running_loss += loss.item()
    if batch_idx % 300 == 299:
      print('[%d, %5d] loss: %.3f' % (epoch+1, batch_idx + 1, running_loss / 2000))
      running_loss = 0.0

In [None]:
def test():
  correct = 0
  total = 0
  with torch.no_grad():
    for data in test_loader:
      inputs, target = data
      inputs, target = inputs.to(device), target.to(device)
      outputs = model(inputs)
      _, predicted = torch.max(outputs.data, dim=1)
      total += target.size(0)
      correct += (predicted == target).sum().item()
  print('Accuracy on test set: %d %% [%d/%d]' % (100*correct / total, correct, total))

In [None]:
start = time.time()
for epoch in range(10):
  train(epoch)
  test()
end = time.time()
print(str(end-start) + 's')

[1,   300] loss: 0.106
[1,   600] loss: 0.028
[1,   900] loss: 0.021
Accuracy on test set: 95 % [9589/10000]
[2,   300] loss: 0.016
[2,   600] loss: 0.016
[2,   900] loss: 0.014
Accuracy on test set: 97 % [9794/10000]
[3,   300] loss: 0.011
[3,   600] loss: 0.012
[3,   900] loss: 0.011
Accuracy on test set: 97 % [9790/10000]
[4,   300] loss: 0.009
[4,   600] loss: 0.010
[4,   900] loss: 0.010
Accuracy on test set: 98 % [9838/10000]
[5,   300] loss: 0.008
[5,   600] loss: 0.009
[5,   900] loss: 0.008
Accuracy on test set: 98 % [9863/10000]
[6,   300] loss: 0.007
[6,   600] loss: 0.007
[6,   900] loss: 0.008
Accuracy on test set: 98 % [9860/10000]
[7,   300] loss: 0.007
[7,   600] loss: 0.007
[7,   900] loss: 0.007
Accuracy on test set: 98 % [9872/10000]
[8,   300] loss: 0.006
[8,   600] loss: 0.007
[8,   900] loss: 0.006
Accuracy on test set: 98 % [9887/10000]
[9,   300] loss: 0.005
[9,   600] loss: 0.005
[9,   900] loss: 0.006
Accuracy on test set: 98 % [9869/10000]
[10,   300] loss: 0