# GhostNet

这据说是华为提出的一个比shufflenet,mobilenet更强的网络

In [76]:
%matplotlib inline
import torch
from torch import nn
from torch.nn import functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim
from tqdm import tqdm
import os
import numpy as np
import math
print(torch.__version__)

1.10.1


## 模型结构

![pic](GhostModule.png) 

In [59]:
def _make_divisible(v, divisor, min_value=None):
    '''
    全是整数运算,保证返回的new_v可以被divisor整除
    :param v:输入channel数量,例如32
    :param divisor:被整除的数字,一般为8,硬件要求对齐
    :param min_value:
    :return:
    '''
    if min_value is None:
        min_value = divisor
    # int(v + divisor / 2) // divisor * divisor 这句是求v被8整除的floor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v


class GhostModule(nn.Module):
    def __init__(self,in_channel, out_channel, kernel_size=1, ratio=2, dw_size=3, stride=1, relu=True):
        super().__init__()
        assert ratio==2
        self.out_channel = out_channel
        # 一般ratio=2,所以相当于对半分
        mid_channel = math.ceil(out_channel / ratio)  # 黄色部分通道数
        rest_channels = mid_channel * (ratio - 1)  # 剩余红色部分通道数
        # 生成黄色部分,黄色部分采用正常卷积
        self.intrinsic_conv = nn.Sequential(
            nn.Conv2d(in_channel, mid_channel, kernel_size, stride, kernel_size // 2, bias=False),
            nn.BatchNorm2d(mid_channel),
            nn.ReLU6(inplace=True)
        )
        # 用黄色部分输出做dw卷积
        self.ghost_conv = nn.Sequential(
            nn.Conv2d(mid_channel, rest_channels, dw_size, 1, dw_size // 2, groups=mid_channel, bias=False),
            nn.BatchNorm2d(rest_channels)
        )
        self.bn = nn.BatchNorm2d(out_channel)
        if relu:
            self.relu = nn.ReLU6(inplace=True)
        else:
            self.relu = None
    def forward(self,x):
        x1=self.intrinsic_conv(x)
        x2=self.ghost_conv(x1)
        out=torch.cat((x1,x1),dim=1)
        out=self.bn(out)
        if self.relu:
            out=self.relu(out)
        return out

class SqueezeExcite(nn.Module):
    def __init__(self,in_channel,ratio=0.25,divisor=4):
        super().__init__()
        reduce_ch=_make_divisible(in_channel*ratio,divisor)
        self.avg=nn.AdaptiveAvgPool2d(1)
        self.conv_reduce=nn.Conv2d(in_channel,reduce_ch,1,bias=True)
        self.relu=nn.ReLU6(inplace=True)
        self.conv_expend=nn.Conv2d(reduce_ch,in_channel,1,bias=True)
    def forward(self,x):
        out=self.avg(x)
        out=self.conv_reduce(out)
        out=self.relu(out)
        out=self.conv_expend(out)
        out=F.relu6(out+3.)/6.
        return out*x

![pic](GhostBottlenet.png)

In [60]:
class GhostBottleneck(nn.Module):
    def __init__(self, in_channel, mid_channel, out_channel, dw_kersize=3,stride=1,se_ratio=0.0):
        '''
        外部传入时,需要注意mid_channel符合4的倍数,此函数内部不对mid_channel有效性进行判断
        '''
        super(GhostBottleneck, self).__init__()
        assert stride in (1,2)
        self.stride = stride
        self.has_se = se_ratio > 0.
        # Point-wise 拓展,一般拓展的维度加深
        self.ghost1 = GhostModule(in_channel, mid_channel, relu=True)
        # Depth-wise 卷积
        if self.stride > 1:
            self.conv_dw = nn.Conv2d(mid_channel, mid_channel, dw_kersize, stride=stride,
                             padding=(dw_kersize-1)//2,
                             groups=mid_channel, bias=False)
            self.bn_dw = nn.BatchNorm2d(mid_channel)
        # Squeeze-and-excitation
        if self.has_se:
            self.se = SqueezeExcite(mid_channel, ratio=se_ratio)
        else:
            self.se = None
        # Point-wise 还原,一般是降维
        self.ghost2 = GhostModule(mid_channel, out_channel, relu=False)
        
        # shortcut
        if (in_channel == out_channel and self.stride == 1):
            # 跨距等于1
            self.shortcut = nn.Sequential()
        else:
            # 跨距等于2,此时特征图out channel要适应残差的输出
            # 两个输出要相加,因此维度相等
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channel, in_channel, dw_kersize, stride=stride,
                       padding=(dw_kersize-1)//2, groups=in_channel, bias=False),
                nn.BatchNorm2d(in_channel),
                nn.Conv2d(in_channel, out_channel, 1, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(out_channel),
            )

    def forward(self, x):
        # 1st ghost bottleneck
        out = self.ghost1(x)
        # Depth-wise convolution
        if self.stride > 1:
            out = self.bn_dw(self.conv_dw(out))
        # Squeeze-and-excitation
        if self.se:
            out = self.se(out)
        # 2nd ghost bottleneck
        out = self.ghost2(out)
        return self.shortcut(x)+out

## 网络结构

![pic](GhostNetStru.png)

In [64]:
class GhostNet(nn.Module):
    cfgs = [
        # k, t, c, SE, s 
        # stage1
        [[3,  16,  16, 0, 1],
         [3,  48,  24, 0, 2]],
        # stage2
        [[3,  72,  24, 0, 1],
         [5,  72,  40, 0.25, 2]],
        # stage3
        [[5, 120,  40, 0.25, 1],
        [3, 240,  80, 0, 2]],
        # stage4
        [[3, 200,  80, 0, 1],
         [3, 184,  80, 0, 1],
         [3, 184,  80, 0, 1],
         [3, 480, 112, 0.25, 1],
         [3, 672, 112, 0.25, 1],
         [5, 672, 160, 0.25, 2]],
        # stage5
        [[5, 960, 160, 0, 1],
         [5, 960, 160, 0.25, 1],
         [5, 960, 160, 0, 1],
         [5, 960, 160, 0.25, 1]
        ]
    ]
    def __init__(self,num_classes=10,cfgs=None,  width=1.0, dropout=0.2):
        super().__init__()
        if cfgs:
            self.cfgs=self.cfgs
        self.dropout=dropout
        output_channel=_make_divisible(width*16,4)
        self.conv1=nn.Sequential(
            nn.Conv2d(3,output_channel,kernel_size=3,padding=1,stride=2,bias=False),
            nn.BatchNorm2d(output_channel),
            nn.ReLU(inplace=True),
        )
        input_channel=output_channel
        stages = []
        for cfg in self.cfgs:
            layers = []
            for dw_kersize, expend_channel, c, se_ratio, stride in cfg:
                # 上面这句for循环的意思是
                # cfg=[[3,  16,  16, 0, 1]]
                # tmp=[3,  16,  16, 0, 1]
                # dw_kersize, expend_channel, c, se_ratio, stride =tmp
                output_channel = _make_divisible(c * width, 4)
                hidden_channel = _make_divisible(expend_channel * width, 4)
                layers.append(GhostBottleneck(input_channel, hidden_channel, output_channel, dw_kersize, stride,
                              se_ratio=se_ratio))
                input_channel = output_channel
            stages.append(nn.Sequential(*layers))
            
        # exp_size此时=960
        input_channel=output_channel
        output_channel = _make_divisible(exp_size * width, 4)
        stages.append(nn.Sequential(
                        nn.Conv2d(input_channel,output_channel,1,bias=False),
                        nn.BatchNorm2d(output_channel),
                        nn.ReLU(inplace=True),
                        )
        )
        input_channel = output_channel
        self.block=nn.Sequential(*stages)
        output_channel=1280
        self.global_avg=nn.AdaptiveAvgPool2d(1)
        self.conv_last=nn.Sequential(
            nn.Conv2d(input_channel,output_channel,1,bias=True),
            nn.ReLU(inplace=True)
        )
        self.line=nn.Linear(output_channel,num_classes)
    def forward(self,x):
        out=self.conv1(x)
        out=self.block(out)
        out=self.global_avg(out)
        out=self.conv_last(out)
        out=out.view(out.size(0), -1)
        if self.dropout > 0.:
            out = F.dropout(out, p=self.dropout, training=self.training)
        out = self.line(out)
        return out

In [65]:
net=GhostNet()
net;

In [67]:
data=torch.randn(8,3,224,224)
out=net(data)
out.shape

torch.Size([8, 10])