# 基础的卷积神经网络

## 基本卷积的构成

In [1]:
import torch
# 输入通道和输出通道
in_channels,out_channels = 5,10
# 图像的宽高
width,height = 100,100
# 卷积核大小
kernel_size = 3
# pytorch里面的数据必须是小批量的数据
batch_size = 1 

In [2]:
# 输入的图像张量
input = torch.randn(batch_size,in_channels,width,height)
input

tensor([[[[-8.4643e-01, -5.8046e-01,  1.6872e+00,  ..., -8.2302e-01,
           -5.5077e-01,  6.6146e-01],
          [-1.0910e+00,  1.4095e+00,  6.3339e-01,  ...,  1.7499e+00,
           -6.5943e-01, -1.6500e+00],
          [ 4.7274e-01, -3.8538e-01,  4.3977e-01,  ..., -4.5004e-01,
           -1.0148e+00,  2.1304e-01],
          ...,
          [-1.8466e+00,  9.1769e-01,  9.4826e-01,  ...,  9.1058e-01,
            1.3262e+00,  2.9851e+00],
          [-4.8437e-01,  6.7868e-01, -1.1628e+00,  ..., -9.3004e-01,
           -6.7007e-02,  3.0856e-01],
          [ 2.4550e-01,  1.3549e+00,  7.7241e-01,  ..., -1.2235e+00,
           -1.1682e+00, -6.2993e-01]],

         [[ 3.4457e-01,  7.0932e-01,  5.0585e-01,  ..., -1.3123e+00,
            6.3619e-01, -3.1562e+00],
          [ 6.4439e-01, -1.3469e-01,  1.2464e+00,  ...,  8.0558e-01,
           -7.1414e-01,  2.7259e-04],
          [ 1.1583e+00, -1.4840e-01,  1.0533e+00,  ...,  7.1159e-01,
            1.5288e+00, -4.1921e-02],
          ...,
     

In [3]:
# 卷积生成,2维卷积层
conv_layer = torch.nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=kernel_size)

In [4]:
# 把input数据传给卷积层
output = conv_layer(input)
output

tensor([[[[ 5.3372e-01, -2.5594e-01,  1.8515e-01,  ...,  7.7881e-01,
           -2.4440e-02, -1.8127e-02],
          [ 4.3289e-01, -2.7195e-01,  6.4697e-01,  ..., -3.1034e-01,
            6.6424e-02, -8.8279e-01],
          [ 4.9335e-01, -3.5963e-01,  2.7641e-01,  ..., -1.6746e-01,
            2.5143e-01,  5.2255e-01],
          ...,
          [ 2.7859e-01, -9.2822e-01,  2.1141e-01,  ..., -4.6467e-01,
            4.6481e-01, -4.1446e-01],
          [-6.0661e-01,  4.8515e-01, -2.9640e-01,  ..., -1.0958e-01,
            7.5240e-01,  1.1881e+00],
          [-3.4446e-01,  1.8836e-03,  3.0658e-01,  ..., -8.2286e-01,
           -4.8712e-01,  3.0689e-01]],

         [[ 2.5905e-01,  1.8637e-01, -1.0172e+00,  ...,  8.1164e-01,
            2.6819e-01,  1.1422e+00],
          [-2.6802e-01, -1.6541e-01, -3.1709e-01,  ..., -3.8122e-01,
            5.2809e-01, -9.6966e-01],
          [ 5.9662e-01,  8.8899e-01,  3.9614e-01,  ..., -6.5002e-01,
            5.9954e-01,  8.6902e-01],
          ...,
     

In [5]:
print(input.shape) # 输入是5个通道，100 * 100
print(output.shape) # 输出是10个通道，98 * 98
print(conv_layer.weight.shape) # 卷积层权重的形状，10是输出的通道，表示10组卷积核，5是输入通道，表示每组里有5个3*3卷积核，5也可以表示卷积核的channel

# 卷积层并不在意输入的数据张量，你图像大，将来输出也大，你图像小，将来输出也小
# 它在意的是输入层的channel和卷积层的channel必须一致

torch.Size([1, 5, 100, 100])
torch.Size([1, 10, 98, 98])
torch.Size([10, 5, 3, 3])


## 填充 padding

In [6]:
import torch
input = [ 3 ,4 ,6 ,5 ,7,
2 ,4 ,6 ,8, 2,
1 ,6 ,7 ,8 ,4,
9 ,7 ,4 ,6, 2,
3, 7 ,5 ,4, 1]

input

[3, 4, 6, 5, 7, 2, 4, 6, 8, 2, 1, 6, 7, 8, 4, 9, 7, 4, 6, 2, 3, 7, 5, 4, 1]

In [7]:
# view第一个参数表示batch，第二个表示channel,其他是宽度和高度
input = torch.Tensor(input).view(1,1,5,5)
input

tensor([[[[3., 4., 6., 5., 7.],
          [2., 4., 6., 8., 2.],
          [1., 6., 7., 8., 4.],
          [9., 7., 4., 6., 2.],
          [3., 7., 5., 4., 1.]]]])

In [18]:
# padding=1表示输出层外面填充了一圈0
conv_layer = torch.nn.Conv2d(in_channels=1,out_channels=1,kernel_size=3,padding=1,bias=False)
conv_layer

Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)

In [19]:
# view第一个参数表示输入的通道数，第二个参数表示输出的通道数
kernel = torch.Tensor([1,2,3,4,5,6,7,8,9]).view(1,1,3,3)
# 卷积层权重的初始化
conv_layer.weight.data = kernel.data
conv_layer.weight.data

tensor([[[[1., 2., 3.],
          [4., 5., 6.],
          [7., 8., 9.]]]])

In [21]:
# padding=1输出是5*5的矩阵，padding=0输出是3*3的矩阵
output = conv_layer(input)
print(output)

tensor([[[[ 91., 168., 224., 215., 127.],
          [114., 211., 295., 262., 149.],
          [192., 259., 282., 214., 122.],
          [194., 251., 253., 169.,  86.],
          [ 96., 112., 110.,  68.,  31.]]]], grad_fn=<ConvolutionBackward0>)


## 添加stride

In [22]:
import torch
input = [ 3 ,4 ,6 ,5 ,7,
2 ,4 ,6 ,8, 2,
1 ,6 ,7 ,8 ,4,
9 ,7 ,4 ,6, 2,
3, 7 ,5 ,4, 1]

# view第一个参数表示batch，第二个表示channel,其他是宽度和高度
input = torch.Tensor(input).view(1,1,5,5)

# stride=2,表示卷积核向前移动2格进行扫描
conv_layer = torch.nn.Conv2d(in_channels=1,out_channels=1,kernel_size=3,stride=2,bias=False)

# view第一个参数表示输入的通道数，第二个参数表示输出的通道数
kernel = torch.Tensor([1,2,3,4,5,6,7,8,9]).view(1,1,3,3)
# 卷积层权重的初始化
conv_layer.weight.data = kernel.data

# 最后输出是 2 * 2 矩阵 
output = conv_layer(input)
print(output)

tensor([[[[211., 262.],
          [251., 169.]]]], grad_fn=<ConvolutionBackward0>)


## 下采样

下采样用的比较多的是MaxPooling，最大池化层，它是没有权重的
比如 2 * 2 的MaxPooling，默认 stride=2,相当于把图像分成 2 * 2 一组
池化的时候，batch和channel是不会变的

In [25]:
import torch

input = [3,4,6,5,
        2,4,6,8,
        1,6,7,8,
        9,7,4,6]

input = torch.Tensor(input).view(1,1,4,4)

maxpooling_layer = torch.nn.MaxPool2d(kernel_size=2)

output = maxpooling_layer(input)
print(output)

tensor([[[[4., 8.],
          [9., 8.]]]])


## 卷积神经网络基本应用

### 准备数据集

In [29]:
import torch
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim

batch_size = 64
# 使用torch把图像转化为tensor,图像 c * w * h
# 把单通道转换成多通道，把一系列的图像进行转换，变成 [0,1]之间。0.1307是均值，0.3081是标准差
# 使用Normalize也是希望像素值满足 0 1 分布
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,),(0.3081,))])
# 变换好之后直接放进数据集里
train_dataset = datasets.MNIST(root='./dataset/mnist',train=True,download=True,transform=transform)
train_loader = DataLoader(train_dataset,shuffle=True,batch_size=batch_size)
test_dataset = datasets.MNIST(root='./dataset/mnist',train=False,download=True,transform=transform)
test_loader = DataLoader(test_dataset,shuffle=False,batch_size=batch_size)

### 定义模型

In [30]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        # 由1个通道变成10个
        self.conv1 = torch.nn.Conv2d(1,10,kernel_size=5)
        # 由10个通道变成20个
        self.conv2 = torch.nn.Conv2d(10,20,kernel_size=5)
        # 池化层
        self.pooling = torch.nn.MaxPool2d(kernel_size=2)
        # 最后一层是线性层，320是最后算出来的，输出10维
        self.fc = torch.nn.Linear(320,10)
        
    def forward(self,x):
        # 铺平数据，data from (n,1,28,28) to (n,748)
        batch_size = x.size(0)
        x = self.pooling(F.relu(self.conv1(x)))
        x = self.pooling(F.relu(self.conv2(x)))
        x = x.view(batch_size,-1) # flatten
        x = self.fc(x)
        
        return x
        
    
model = Net()

# 把模型迁移到GPU上进行计算
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# model.to(device)

### 构造损失函数和优化器

In [31]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),lr=0.01,momentum=0.5)

### 构造训练器

In [32]:
def train(epoch):
    running_loss = 0.0
    for batch_idx,data in enumerate(train_loader,0):
        inputs,target = data
        # 把inputs,target都迁移到GPU中
        # inputs,target = inputs.to(device),target.to(device)
        # 优化器在优化之前先清零
        optimizer.zero_grad()
        # forward backward update
        outputs = model(inputs)
        loss = criterion(outputs,target)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        # 每300输出一次
        if batch_idx % 300 == 299:
            print('[%d,%5d] loss:%.3f' % (epoch+1,batch_idx+1,running_loss/300))
            # 每300一组数据跑完，清零
            running_loss = 0.0

### 测试

In [33]:
# test里面不需要计算反向传播，只需要计算正向的就行了
def test():
    # 表示正确了多少
    correct = 0
    # 表示总数有多少
    total = 0
    # 在test过程中是不需要计算梯度的
    # 这部分的代码不会求导
    with torch.no_grad():
        for data in test_loader:
            images,labels = data
            # 把数据迁移到GPU上
            # images,labels = images.to(device),labels.to(device)
            outputs = model(images)
            # dim=1 返回每一个行的最大值和索引
            # dim=0 返回每一列的最大值和索引
            # 这里的索引或者下标对应的是它的分类
            _,predicted = torch.max(outputs.data,dim=1)
            # label是一个元组，(N,1)
            # size是计算元素的个数，0表示计算行数，1表示计算列数
            total += labels.size(0)
            correct += (predicted==labels).sum().item()
            
    # 使用正确的数除以总数
    print('Accuracy on test set: %d %%' % (100 * correct/total))

In [34]:
for epoch in range(10):
    # 一轮训练
    train(epoch)
    # 一轮测试
    test()

[1,  300] loss:0.640
[1,  600] loss:0.203
[1,  900] loss:0.142
Accuracy on test set: 96 %
[2,  300] loss:0.110
[2,  600] loss:0.096
[2,  900] loss:0.094
Accuracy on test set: 97 %
[3,  300] loss:0.081
[3,  600] loss:0.070
[3,  900] loss:0.076
Accuracy on test set: 97 %
[4,  300] loss:0.066
[4,  600] loss:0.062
[4,  900] loss:0.060
Accuracy on test set: 98 %
[5,  300] loss:0.062
[5,  600] loss:0.051
[5,  900] loss:0.054
Accuracy on test set: 98 %
[6,  300] loss:0.048
[6,  600] loss:0.051
[6,  900] loss:0.048
Accuracy on test set: 98 %
[7,  300] loss:0.043
[7,  600] loss:0.045
[7,  900] loss:0.047
Accuracy on test set: 98 %
[8,  300] loss:0.041
[8,  600] loss:0.041
[8,  900] loss:0.043
Accuracy on test set: 98 %
[9,  300] loss:0.040
[9,  600] loss:0.038
[9,  900] loss:0.036
Accuracy on test set: 98 %
[10,  300] loss:0.034
[10,  600] loss:0.038
[10,  900] loss:0.035
Accuracy on test set: 98 %


### 总结
之前22轮才能达到98%，现在只需要6轮就能达到98%，模型确实大大改进了许多