# shufflenetV2

# 模型结构

## 基本模块

![pic](shufflenetv2_unit.jpg)

## 模型骨架

![pic](shufflenetv2_stru.jpg)

In [1]:
%matplotlib inline
import torch
from torch import nn
from torch.nn import functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim
from tqdm import tqdm
import os
print(torch.__version__)

1.10.2


In [2]:
# 基本单元    
class ShuffleUnit(nn.Module):
    def __init__(self,in_channel,out_channel,strides,grps=2):
        super().__init__()
        # strides 必须为1或2
        if not strides in (1,2):
            raise ValueError("input strides %s",strides)
        
        self.strides=strides
        self.groups=grps

        # 涉及这种非翻倍维度变换的卷积,只在Conv1-stage2的时候有发生
        # 而且只是主副branch都是变换到输出的一半,请注意,变换一定是1x1卷积发生变换,3x3是DW卷积,尺度不可变换
        # pytorch官方就是这么实现的
        mid_channel=out_channel//2
        if(strides==1):
            self.short=nn.Sequential()
        else:
            self.short=nn.Sequential(
                nn.Conv2d(in_channel, in_channel, 3, stride=self.strides, padding=1, groups=in_channel, bias=False),
                nn.BatchNorm2d(in_channel),
                nn.Conv2d(in_channel, mid_channel, 1, bias=False),
                nn.BatchNorm2d(mid_channel),
                nn.ReLU6(inplace=True),
            )
        # 注意,3x3的DW卷积无法变换维度,只有1x1卷积可变换维度
        self.branch=nn.Sequential(
            nn.Conv2d(in_channel,mid_channel,1,bias=False),
            nn.BatchNorm2d(mid_channel),
            nn.ReLU6(inplace=True),
            nn.Conv2d(mid_channel,mid_channel,3,stride=self.strides,padding=1,groups=mid_channel,bias=False),
            nn.BatchNorm2d(mid_channel),
            nn.Conv2d(mid_channel,mid_channel,1,bias=False),
            nn.BatchNorm2d(mid_channel),
            nn.ReLU6(inplace=True),
        )
    def forward(self,x):
        if self.strides==1:
            short,res=ShuffleUnit.channel_split(x)
        else :
            # strides=2,输入都为short和残差都是x
            short=x
            res=x
        out=torch.cat((self.short(short),self.branch(res)),dim=1)
        '''
        默认2组混洗
        '''
        return ShuffleUnit.shuffle(out,self.groups)
    
    @staticmethod
    def channel_split(x,grps=2):
        # ncwh 按照c分成grps组,默认对半分
        return x.chunk(grps,dim=1)
    
    @staticmethod
    def shuffle(x, groups=2):
        # 一半默认groups=2,当然也可以等于3或者其他值,但不易过大
        # 对c按照groups混洗
        N, C, H, W = x.size()
        out = x.view(N, groups, C // groups, H, W).permute(0, 2, 1, 3, 4).contiguous().view(N, C, H, W)
        return out

In [3]:
# 主结构
class ShuffleNetV2(nn.Module):
    '''
    图片默认输入大小224x224
    '''
    output_channel=(
        # 有关各种group对应每层的输出参数
        # group的取值为1,2,3,4,8
        (24,48,96,192,1024),
        (24,116,232,464,1024),
        (24,176,352,704,1024),
        (24,244,488,976,2048),
    )
    repeat_times=(4,8,4)
    scale=(0.5,1,1,5,2)
    def __init__(self, scale=1,in_channel=3, class_num=10,pre_train=False):
        super().__init__()
        assert scale in self.scale
        # 对应索引变为0,1,2,3
        self.use_cfg=int(scale*2)-1
        print("==use ShuffleNetV2 mode %s=="%scale)
        # 第一层正常卷积,跨距2
        self.conv1 = nn.Sequential(
                nn.Conv2d(in_channel, self.output_channel[self.use_cfg][0], kernel_size=3, stride=2, padding=1, bias=False),
                nn.BatchNorm2d(24),
                nn.ReLU(inplace=True)
        )
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        # 56x56x24
        self.stage2 = self.make_layers(self.output_channel[self.use_cfg][0], self.output_channel[self.use_cfg][1], self.repeat_times[0], 2)
        # 28x28
        self.stage3 = self.make_layers(self.output_channel[self.use_cfg][1], self.output_channel[self.use_cfg][2], self.repeat_times[1], 2)
        # 14x14
        self.stage4 = self.make_layers(self.output_channel[self.use_cfg][2], self.output_channel[self.use_cfg][3], self.repeat_times[2], 2)
        # 对应输出会有7x7x1024 和 7x7x2048
        self.conv5 = nn.Sequential(
                nn.Conv2d(self.output_channel[self.use_cfg][3], self.output_channel[self.use_cfg][4], 1, bias=False),
                nn.BatchNorm2d(self.output_channel[self.use_cfg][4]),
                nn.ReLU(inplace=True)
        )
        # 自适应池化后变为 batchx1x1x1024
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(self.output_channel[self.use_cfg][4], class_num)
        if not pre_train:
            self.init_params()
        
    def make_layers(self, in_channels, output_channel, layers_num, stride):
        layers = []
        # 仅第一层跨距会为2
        layers.append(ShuffleUnit(in_channels, output_channel, stride))
        in_channels = output_channel
        for i in range(layers_num - 1):
            ShuffleUnit(in_channels, output_channel, 1)
        return nn.Sequential(*layers)
    def forward(self, x):
        out = self.conv1(x)
        out = self.maxpool(out)
        out = self.stage2(out)
        out = self.stage3(out)
        out = self.stage4(out)
        out = self.conv5(out)
        out = self.avgpool(out)
        # batch x1x1x1024展平
        out = out.flatten(1)
        out = self.fc(out)
        return out

    # 何凯明的方法初始化权重
    def init_params(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                # 如果卷积操作后面没有使用激活函数，可以使用xavier_normal_
                # 卷积后面有激活函数,用kaiming_normal_这个
                nn.init.kaiming_normal_(m.weight)
                # 因为这里没有bias,所以可以直接注释,对于有些有bias有些没有的地方,可以先判断再置0
                # if m.bias!=None:
                #     nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.Linear):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

In [4]:
net=ShuffleNetV2(scale=0.5)
# print(net)
data=torch.randn(4,3,224,224)
out=net(data)
out.shape

==use ShuffleNetV2 mode 0.5==


torch.Size([4, 10])

In [5]:
device='cuda' if torch.cuda.is_available() else "cpu"
net.to(device)

ShuffleNetV2(
  (conv1): Sequential(
    (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (stage2): Sequential(
    (0): ShuffleUnit(
      (short): Sequential(
        (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=24, bias=False)
        (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (4): ReLU6(inplace=True)
      )
      (branch): Sequential(
        (0): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=T

In [9]:
# 注意torch里是宽高顺序,和PIL不同
img_size=(224,224)
# mean和std均值需要按照数据集来修改
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
batch=64
lr=0.04

In [10]:
#prepare dataset and preprocessing
transform_train = transforms.Compose([
    transforms.Resize(img_size),
    transforms.RandomCrop(img_size, padding=4),
    transforms.RandomHorizontalFlip(),
    # 至少要加上下面这句ToTensor
    transforms.ToTensor(),
    # ciaf10固有均值标准差
    transforms.Normalize(mean,std)
])

transform_val = transforms.Compose([
    transforms.Resize(img_size),
    # 至少要加上下面这句ToTensor
    transforms.ToTensor(),
    transforms.Normalize(mean,std)
])

In [11]:
# animals10数据集
# 正常来说,train_set这个类是需要自己定义的,但是在官方数据集中已经给定义好了
data_dir="D:/data/image/"
if not os.path.exists(data_dir):
    # 尝试mac的文件夹
    data_dir="~/data"
    if not os.path.exists(data_dir):
            raise FileExistsError("data source not exist!")
train_set=datasets.ImageFolder(root=data_dir+'animals10/train',
                        transform=transform_train)

val_set=datasets.ImageFolder(root=data_dir+'animals10/val',
                        transform=transform_val)
train_set_len=len(train_set)
val_set_len=len(val_set)
print('train data',train_set_len)
print('val data',val_set_len)
print('train label',train_set.class_to_idx)
print('val label',val_set.class_to_idx)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch,
                                         shuffle=True, num_workers=6)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch,
                                         shuffle=False, num_workers=6)


train data 23556
val data 2623
train label {'cane': 0, 'cavallo': 1, 'elefante': 2, 'farfalla': 3, 'gallina': 4, 'gatto': 5, 'mucca': 6, 'pecora': 7, 'ragno': 8, 'scoiattolo': 9}
val label {'cane': 0, 'cavallo': 1, 'elefante': 2, 'farfalla': 3, 'gallina': 4, 'gatto': 5, 'mucca': 6, 'pecora': 7, 'ragno': 8, 'scoiattolo': 9}


In [12]:
class CalcLoss(nn.Module):
    def __init__(self,num_classes=10):
        super().__init__()
        self.criterion=nn.CrossEntropyLoss()
    def forward(self,y_true,y_pred):
        return self.criterion(y_true,y_pred)
    
class TrainModel(object):
    _defaults={
        "eopch":2,
    }
    def __init__(self,net,loss,train_dataloder,optimizer,**kwargs):
        '''
        还支持传入字典参数
        '''
        self.__dict__.update(self._defaults)
        self.net=net
        self.loss=loss
        self.dataloder=train_dataloder
        self.optimizer=optimizer
        for name, value in kwargs.items():
            setattr(self, name, value)
        self.device='cuda' if torch.cuda.is_available() else "cpu"
    def __call__(self,train_total_len,batch_size):
        self.net.train()
        # 需要注意这里写的是train_set的长度,如果写错成train_loader,返回的是数据集一共有多少个batch
        with tqdm(total=train_total_len,desc=f'Train:') as pbar:
            for idx,data in enumerate(self.dataloder):
                data,label=data
                data,label=data.to(self.device),label.to(self.device)
                self.optimizer.zero_grad()
                # forward
                outputs=self.net(data)
                loss=self.loss(outputs,label)
                loss.backward()
                self.optimizer.step()
                # 更新进度条
                pbar.update(batch_size)

class TestModel(object):
    _defaults={
    "eopch":1,
    }
    def __init__(self,net,loss,val_dataloder,**kwargs):
        '''
        还支持传入字典参数
        '''
        self.__dict__.update(self._defaults)
        self.net=net
        self.loss=loss
        self.dataloder=val_dataloder
        for name, value in kwargs.items():
            setattr(self, name, value)
        self.device='cuda' if torch.cuda.is_available() else "cpu"
    def __call__(self,total_num,batch_size):
        # eval 模式下,dropout失效,bn层参数采用之前训练的,不更新
        self.net.eval()
        val_loss=0
        correct=0
        with tqdm(total=total_num,desc=f'Validation:') as pbar:
            # 不计算损失,这样速度更快
            with torch.no_grad():
                for idx,data in enumerate(self.dataloder):
                    x,y=data
                    x,y=x.to(self.device),y.to(self.device)
                    y_pre=self.net(x)
                    val_loss+=self.loss(y_pre,y).item()
                    # max 第一个返回的是元素值,第二个为索引值
                    # 求第一个维度的max,因此结果返回的是batch维度的max
                    # 返回是一个第一个元素为值,第二个元素为idx的tuple
                    pred=torch.max(y_pre,dim=1)[1]

                    # pred维度为batch,每个元素为索引
                    correct+=pred.eq(y).sum().item()
                    # 更新进度条
                    pbar.update(batch_size)
            # 格式化打印直接有% 带f%这种{:.2f%}是错的格式,format这种有点坑
            print("test loss {},accuracy {:.2%}".format(val_loss,correct/total_num))
        # 返回损失和准确率
        return (val_loss,correct/total_num)

In [13]:
optimizer=optim.SGD(net.parameters(),lr=lr,momentum=0.9)
loss=CalcLoss().to(device)
train=TrainModel(net,loss,train_loader,optimizer)
test=TestModel(net,loss,val_loader)

In [None]:
total_epoch=2
for i in range(total_epoch):
    train(train_set_len,batch)
    test(val_set_len,batch)

Train::  38%|██████████████████████████████                                                 | 8960/23556 [00:13<00:09, 1584.50it/s]