# pytorch搭建神经网络(LeNet，GoogLeNet)

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms

## LeNet
![](https://gimg2.baidu.com/image_search/src=http%3A%2F%2Fupload-images.jianshu.io%2Fupload_images%2F18284906-035714552f62fba4.png&refer=http%3A%2F%2Fupload-images.jianshu.io&app=2002&size=f9999,10000&q=a80&n=0&g=0n&fmt=jpeg?sec=1645880094&t=7143360b19ac36dc2a1894a8088d06c2)

In [2]:
#coding=utf-8
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

class Net(nn.Module):
    #定义Net的初始化函数，这个函数定义了该神经网络的基本结构
    def __init__(self):
        super(Net, self).__init__() #复制并使用Net的父类的初始化方法，即先运行nn.Module的初始化函数
        #class torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
        self.conv1 = nn.Conv2d(1, 6, 5) # 定义conv1函数的是图像卷积函数：输入为图像（1个频道，即灰度图）,输出为 6张特征图, 卷积核为5x5正方形
        self.conv2 = nn.Conv2d(6, 16, 5)# 定义conv2函数的是图像卷积函数：输入为6张特征图,输出为16张特征图, 卷积核为5x5正方形
        self.fc1   = nn.Linear(16*5*5, 120) # 定义fc1（fullconnect）全连接函数1为线性函数：y = Wx + b，并将16*5*5个节点连接到120个节点上。
        self.fc2   = nn.Linear(120, 84)#定义fc2（fullconnect）全连接函数2为线性函数：y = Wx + b，并将120个节点连接到84个节点上。
        self.fc3   = nn.Linear(84, 10)#定义fc3（fullconnect）全连接函数3为线性函数：y = Wx + b，并将84个节点连接到10个节点上。

    #定义该神经网络的向前传播函数，该函数必须定义，一旦定义成功，向后传播函数也会自动生成（autograd）
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) #输入x经过卷积conv1之后，经过激活函数ReLU（原来这个词是激活函数的意思），使用2x2的窗口进行最大池化Max pooling，然后更新到x。
        x = F.max_pool2d(F.relu(self.conv2(x)), 2) #输入x经过卷积conv2之后，经过激活函数ReLU，使用2x2的窗口进行最大池化Max pooling，然后更新到x。
#         print("x大小",self.num_flat_features(x))
#         print("x大小:",self.My_num(x))
        x = x.view(-1, self.num_flat_features(x)) #view函数将张量x变形成一维的向量形式，总特征数并不改变，为接下来的全连接作准备,
        
        x = F.relu(self.fc1(x)) #输入x经过全连接1，再经过ReLU激活函数，然后更新x
        x = F.relu(self.fc2(x)) #输入x经过全连接2，再经过ReLU激活函数，然后更新x
        x = self.fc3(x) #输入x经过全连接3，然后更新x
        return x

    #使用num_flat_features函数计算张量x的总特征量（把每个数字都看出是一个特征，即特征总量），比如x是4*2*2的张量，那么它的特征总量就是16。
    def num_flat_features(self, x):
        size = x.size()[1:] # 这里为什么要使用[1:],是因为pytorch只接受批输入，也就是说一次性输入好几张图片，那么输入数据张量的维度自然上升到了4维。【1:】让我们把注意力放在后3维上面
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
    def My_num(self,x):
        #上面那种计算特征图体积的方法太繁琐
        features=x[0:1,0:,0:,0:]
        return features.numel()

net = Net()
# 以下代码是为了看一下我们需要训练的参数的数量
print(net)

input = torch.rand(3, 1, 32, 32)   # （批次,通道，高，宽）stand-in for a 32x32 black & white image
out=net.forward(input)
print(out)

print("接下来是模型的保存与加载================")
torch.save(net.state_dict(), "./LeNet.pt")
# 加载参数
the_model = Net()
the_model.load_state_dict(torch.load("./LeNet.pt"))
out=the_model.forward(input)
print(out)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)
tensor([[ 0.0520,  0.0264,  0.0614,  0.0353, -0.1081, -0.0122,  0.0719,  0.0431,
         -0.1319, -0.0326],
        [ 0.0509,  0.0243,  0.0626,  0.0318, -0.1082, -0.0167,  0.0671,  0.0463,
         -0.1385, -0.0304],
        [ 0.0538,  0.0247,  0.0602,  0.0338, -0.1084, -0.0157,  0.0689,  0.0374,
         -0.1342, -0.0307]], grad_fn=<AddmmBackward>)
tensor([[ 0.0520,  0.0264,  0.0614,  0.0353, -0.1081, -0.0122,  0.0719,  0.0431,
         -0.1319, -0.0326],
        [ 0.0509,  0.0243,  0.0626,  0.0318, -0.1082, -0.0167,  0.0671,  0.0463,
         -0.1385, -0.0304],
        [ 0.0538,  0.0247,  0.0602,  0.0338, -0.1084, -0.0157,  0.0689,  0.0374,
         -0.1342, -0.0307]], grad_fn=<A

## GoogLeNet

![](https://gimg2.baidu.com/image_search/src=http%3A%2F%2Fgithub.com%2Fliangshuang1995%2Fdeep-learning-papers%2Fraw%2Fmaster%2Fimages%2FGoogleNet%2Fgooglenet.png&refer=http%3A%2F%2Fgithub.com&app=2002&size=f9999,10000&q=a80&n=0&g=0n&fmt=jpeg?sec=1646153738&t=356c216d4b11f6b43d751671bb253c85)

### GoogLeNet是一个比较复杂的模型了，拿他练手。定义inception模块进行复用。
### inception模块封装成函数，参数的个数由论文确定

In [3]:
# 通过上一个模型的搭建可以知道，卷积层后面一般跟一个reLu
# 这里将其封装成类
class BasicConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super(BasicConv2d, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.conv(x)
        x = self.relu(x)
        return x



### 以下是inception模块需要的参数
![](GoogLeNet_Inception_parameter.png)

In [4]:
class Inception(nn.Module):
    #这里参数的个数由论文中表格确定，根据上面的表格可以发现，共有#1*1，#3*3red，...,pool_proj这6个参数。加上输入的通道数，共7个
    #以第一个模块为例inception(3a)    64，   96，      128，   16，      32，    32
    def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj):
        super(Inception, self).__init__()
        #回顾conv2d的参数
        #class torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
        self.branch1 = BasicConv2d(in_channels, ch1x1, kernel_size=1)

        self.branch2 = nn.Sequential(
            #先用1*1的降维
            BasicConv2d(in_channels, ch3x3red, kernel_size=1),
            #padding=1在上下左右四周补0
            BasicConv2d(ch3x3red, ch3x3, kernel_size=3, padding=1)   # 保证输出大小等于输入大小
        )

        self.branch3 = nn.Sequential(
            BasicConv2d(in_channels, ch5x5red, kernel_size=1),
            BasicConv2d(ch5x5red, ch5x5, kernel_size=5, padding=2)   # 保证输出大小等于输入大小
        )

        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            BasicConv2d(in_channels, pool_proj, kernel_size=1)
        )

    def forward(self, x):
        branch1 = self.branch1(x)
        branch2 = self.branch2(x)
        branch3 = self.branch3(x)
        branch4 = self.branch4(x)

        outputs = [branch1, branch2, branch3, branch4]
        #pytorch处理图片是(批次,通道,高,宽)
        return torch.cat(outputs, dim=1)#   1表示在列上进行拼接


class InceptionAux(nn.Module):
    #观察三个辅分类器的不同，发现输入通道不同，其余是固定的。为了更灵活加一个类别数。所以共两个参数
    def __init__(self, in_channels, num_classes):
        super(InceptionAux, self).__init__()
        self.averagePool = nn.AvgPool2d(kernel_size=5, stride=3)
        self.conv = BasicConv2d(in_channels, 128, kernel_size=1)  # output[batch, 128, 4, 4]

        self.fc1 = nn.Linear(2048, 1024)
        self.fc2 = nn.Linear(1024, num_classes)

    def forward(self, x):
        # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
        x = self.averagePool(x)
        # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
        x = self.conv(x)
        # N x 128 x 4 x 4
        x = torch.flatten(x, 1)
        x = F.dropout(x, 0.5, training=self.training)
        # N x 2048
        x = F.relu(self.fc1(x), inplace=True)
        x = F.dropout(x, 0.5, training=self.training)
        # N x 1024
        x = self.fc2(x)
        # N x num_classes
        return x


In [5]:
class GoogLeNet(nn.Module):
    def __init__(self, num_classes=1000, aux_logits=True, init_weights=False):
        super(GoogLeNet, self).__init__()
        self.aux_logits = aux_logits

        self.conv1 = BasicConv2d(3, 64, kernel_size=7, stride=2, padding=3)
        self.maxpool1 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

        self.conv2 = BasicConv2d(64, 64, kernel_size=1)
        self.conv3 = BasicConv2d(64, 192, kernel_size=3, padding=1)
        self.maxpool2 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
        #这里开始并行结构==========
        
        self.inception3a = Inception(192, 64, 96, 128, 16, 32, 32)
        self.inception3b = Inception(256, 128, 128, 192, 32, 96, 64)
        self.maxpool3 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

        self.inception4a = Inception(480, 192, 96, 208, 16, 48, 64)
        self.inception4b = Inception(512, 160, 112, 224, 24, 64, 64)
        self.inception4c = Inception(512, 128, 128, 256, 24, 64, 64)
        self.inception4d = Inception(512, 112, 144, 288, 32, 64, 64)
        self.inception4e = Inception(528, 256, 160, 320, 32, 128, 128)
        self.maxpool4 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

        self.inception5a = Inception(832, 256, 160, 320, 32, 128, 128)
        self.inception5b = Inception(832, 384, 192, 384, 48, 128, 128)

        if self.aux_logits:
            self.aux1 = InceptionAux(512, num_classes)
            self.aux2 = InceptionAux(528, num_classes)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(0.4)
        self.fc = nn.Linear(1024, num_classes)
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        # N x 3 x 224 x 224
        x = self.conv1(x)
        # N x 64 x 112 x 112
        x = self.maxpool1(x)
        # N x 64 x 56 x 56
        x = self.conv2(x)
        # N x 64 x 56 x 56
        x = self.conv3(x)
        # N x 192 x 56 x 56
        x = self.maxpool2(x)

        # N x 192 x 28 x 28
        x = self.inception3a(x)
        # N x 256 x 28 x 28
        x = self.inception3b(x)
        # N x 480 x 28 x 28
        x = self.maxpool3(x)
        # N x 480 x 14 x 14
        x = self.inception4a(x)
        # N x 512 x 14 x 14
        if self.training and self.aux_logits:    # eval model lose this layer
            #如果想要使用辅助分类器，这里加一个分支输出，注意不是x=self.aux1(x)
            aux1 = self.aux1(x)

        x = self.inception4b(x)
        # N x 512 x 14 x 14
        x = self.inception4c(x)
        # N x 512 x 14 x 14
        x = self.inception4d(x)
        # N x 528 x 14 x 14
        if self.training and self.aux_logits:    # eval model lose this layer
            aux2 = self.aux2(x)

        x = self.inception4e(x)
        # N x 832 x 14 x 14
        x = self.maxpool4(x)
        # N x 832 x 7 x 7
        x = self.inception5a(x)
        # N x 832 x 7 x 7
        x = self.inception5b(x)
        # N x 1024 x 7 x 7

        x = self.avgpool(x)
        # N x 1024 x 1 x 1
        x = torch.flatten(x, 1)
        # N x 1024
        x = self.dropout(x)
        x = self.fc(x)
        # N x 1000 (num_classes)
        if self.training and self.aux_logits:   # eval model lose this layer
            return x, aux2, aux1
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


In [6]:
#测试GoogLeNet
input_image=torch.randn(16,3,227,227)
net=GoogLeNet()
out=net(input_image)
print(net)

GoogLeNet(
  (conv1): BasicConv2d(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (relu): ReLU(inplace=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (conv2): BasicConv2d(
    (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
    (relu): ReLU(inplace=True)
  )
  (conv3): BasicConv2d(
    (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu): ReLU(inplace=True)
  )
  (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (inception3a): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))
      (relu): ReLU(inplace=True)
    )
    (branch2): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(192, 96, kernel_size=(1, 1), stride=(1, 1))
        (relu): ReLU(inplace=True)
      )
      (1): BasicConv2d(
        (conv): Conv2d(96, 128, kernel_size=(3, 3), stride=(1, 1), paddin

In [7]:
#测试padding
test_padding_model=nn.Conv2d(1,1,kernel_size=1,padding=1)
input_test=torch.randn(1,1,4,4)#4*4的张量测试
print(input_test)
output_test=test_padding_model(input_test)
print("\n===========================================================\n")
print(output_test)
print("\n===========================================================\n")
print(test_padding_model.bias)
print("结论显然：pytorch中的padding=1 表示上下左右全补0")
#如果想要别的padding方式，就需要自己写函数，在forwoard中适当位置进行调用


tensor([[[[ 0.1775, -1.0830,  1.0195,  0.2721],
          [ 0.3878, -0.0881, -1.1467, -0.4244],
          [ 0.1487,  0.4978, -0.8305, -0.8895],
          [-0.1282,  0.5966, -0.7826, -1.0735]]]])


tensor([[[[-0.5662, -0.5662, -0.5662, -0.5662, -0.5662, -0.5662],
          [-0.5662, -0.5104, -0.9063, -0.2460, -0.4807, -0.5662],
          [-0.5662, -0.4444, -0.5939, -0.9263, -0.6995, -0.5662],
          [-0.5662, -0.5195, -0.4098, -0.8270, -0.8455, -0.5662],
          [-0.5662, -0.6065, -0.3788, -0.8120, -0.9033, -0.5662],
          [-0.5662, -0.5662, -0.5662, -0.5662, -0.5662, -0.5662]]]],
       grad_fn=<ThnnConv2DBackward>)


Parameter containing:
tensor([-0.5662], requires_grad=True)
结论显然：pytorch中的padding=1 表示上下左右全补0
