In [3]:
%matplotlib inline
import torch
from torch import nn
from torch.nn import functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim
from tqdm import tqdm
import os
print(torch.__version__)

1.10.1


# 模型结构图

t 为扩张稀疏比例，c 为输出通道数，n 为该层重复的次数，s为步长,输入图片大小224x224x3

![pic](mobilenetv2_stru.png)

In [23]:
class ConvBNReLu(nn.Sequential):
    def __init__(self,in_channel,out_channel,kernel_size=3,strides=1,groups=1):
        padding=(kernel_size-1)//2
        super().__init__(
            nn.Conv2d(in_channel,out_channel,kernel_size,strides,padding,groups=groups,bias=False),
            nn.BatchNorm2d(out_channel),
            nn.ReLU6(inplace=True),
        )
        self.out_channel=out_channel

class InvertedResidual(nn.Module):
    def __init__(self,in_channel,out_channel,strides,pw_ratio=6):
        '''
        pw_ratio: 第一个pw的1x1卷积channel数拓展比例
        '''
        super().__init__()
        # strides只能是1 或 2
        assert strides in [1, 2]
        self.use_shortcut= strides==2 and in_channel==out_channel
        hidden_channel=int(round(in_channel*pw_ratio))
        layer=[]
        if pw_ratio!=1:
            # add 1x1 升维
            layer.append(ConvBNReLu(in_channel,hidden_channel,kernel_size=1))
        layer.extend([
            ConvBNReLu(hidden_channel,hidden_channel,strides=strides,groups=hidden_channel),
            nn.Conv2d(hidden_channel,out_channel,kernel_size=1,stride=1,padding=0,bias=False),
            nn.BatchNorm2d(out_channel),
        ])
        self.InvertedResidual=nn.Sequential(*layer)
        self.out_channel=out_channel
    def forward(self,x):
        return x+self.InvertedResidual(x) if self.use_shortcut else self.InvertedResidual(x) 
        

        

In [43]:
class MobileNetV2(nn.Module):
    # 网络默认结构
    # 对应参数为 t,c,n,s
    net_stru=(
    (1, 16, 1, 1),
    (6, 24, 2, 2),
    (6, 32, 3, 2),
    (6, 64, 4, 2),
    (6, 96, 3, 1),
    (6, 160, 3, 2),
    (6, 320, 1, 1),
    )
    _defaults={
        "first_channel":32,
        "last_channel":1280,
        "round_nearest":8,
        "width_mult":1.0
    }
    def __init__(self,num_class=10,net_stru=None,full_conn=True):
        '''
        width_mult:输入输出的channel扩增比例
        num_class: 分类数目
        round_nearest:要求每个输入输出都要被round_nearest整除
        net_stru: 网络结构的tuple
        full_conn: 是否使用最后的全连接层分类层
        '''
        super().__init__()
        self.__dict__.update(self._defaults)
        # 一些可调参数
        if(net_stru):
            # 如果外部传入了网络结构,用外部的
            self.net_stru=net_stru
        if(len(self.net_stru)==0 or len(self.net_stru[0])!=4):
            raise ValueError("input net struct error %s",self.net_stru)
        self.use_classfy=full_conn
        self.num_class=num_class
        
        # 确保每个输入和输出的通道数都为8的倍数
        self.first_channel=MobileNetV2._make_divisible(self.first_channel,self.round_nearest)
        # 也就是说输出至少是1280,可以被调大
        self.last_channel=MobileNetV2._make_divisible(self.last_channel*max(self.width_mult,1.0),self.round_nearest)
        features=[ConvBNReLu(3,self.first_channel,strides=2)]
        # 记录第一个输入通道数
        in_channel=self.first_channel
        for t,c,n,s in self.net_stru:
            out_channel=MobileNetV2._make_divisible(c*self.width_mult,self.round_nearest)
            for cnt in range(n):
                # 几个连续的倒残差块,仅第一个卷积的跨距大小会有不同
                if cnt==0:
                    strides=s
                else:
                    strides=1
                features.append(InvertedResidual(in_channel,out_channel,strides,pw_ratio=t))
                in_channel=out_channel
        # 加上最后的转变成conv2d 1x1的卷积
        features.append(ConvBNReLu(in_channel,self.last_channel,kernel_size=1))
        self.bone=nn.Sequential(*features)
        
        if(self.use_classfy):
            self.classify=nn.Sequential(
                nn.Dropout(0.2),
                nn.Linear(self.last_channel,self.num_class),
            )
        self.weight_init()
        
    def weight_init(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                # 一种正态分布
                nn.init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)
                
    def forward(self,x):
        out=self.bone(x)
        if(self.use_classfy):
            # 对2,3两个维度求平均,也就是把batchx1280x7x7的向量
            # 求解为batchx1280的维度
            out=out.mean([2,3])
            out=self.classify(out)
        return out
        
        
    @staticmethod
    def _make_divisible(v, divisor, min_value=None):
        '''
        全是整数运算,保证返回的new_v可以被divisor整除
        :param v:输入channel数量,例如32
        :param divisor:被整除的数字,一般为8,硬件要求对齐
        :param min_value:
        :return:
        '''
        if min_value is None:
            min_value = divisor
        # int(v + divisor / 2) // divisor * divisor 这句是求v被8整除的floor
        new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
        if new_v < 0.9 * v:
            new_v += divisor
        return new_v  

        

In [45]:
net=MobileNetV2()
rand_data=torch.randn(4,3,224,224)
out=net(rand_data)
out.shape

torch.Size([4, 10])