In [2]:
import torch
from torch import nn
import torch.nn.functional as F

### ShuffleNet_v1

In [3]:
'''
通道混洗模块 reshape为二维分组 -> 转置 -> 拉平操作
'''


class Shuffleblock(nn.Module):
    def __init__(self, groups):
        super(Shuffleblock, self).__init__()
        self.groups = groups

    def forward(self, x):
        batch_size, num_channels, height, width = x.shape
        channel_per_group = num_channels // self.groups
        # reshape操作
        x = x.reshape(batch_size, self.groups, channel_per_group, height, width)
        # 转置操作
        x = torch.transpose(x, 1, 2)
        # 拉平操作
        x = x.reshape(batch_size, -1, height, width)
        return x

In [4]:
'''
shuffle 模块基本单元
'''


class Bottleneck(nn.Module):
    def __init__(self, in_planes, out_planes, stride, groups):
        super(Bottleneck, self).__init__()
        self.stride = stride

        # bottleneck层中间层的channel数变为输出channel数的1/4
        mid_planes = out_planes // 4
        # 作者提到不在stage2的第一个pointwise层使用组卷积,因为输入channel数量太少,只有24
        g = 1 if in_planes == 24 else groups

        # 1x1卷积层
        self.conv1x1_1 = self.conv(in_planes,
                              mid_planes,
                              kernel_size=1,
                              stride=1,
                              padding=0,
                              groups=g)
        self.conv3x3 = self.conv(mid_planes,
                            mid_planes,
                            kernel_size=3,
                            stride=stride,
                            padding=1,
                            groups=mid_planes)
        self.conv1x1_2 = self.conv(mid_planes,
                              out_planes,
                              kernel_size=1,
                              stride=1,
                              padding=0,
                              groups=groups)
        self.shuffleblock = Shuffleblock(groups=g)
        self.short_cut = nn.Sequential()
        if stride == 2:
            self.short_cut = nn.Sequential(
                nn.AvgPool2d(kernel_size=3, stride=2, padding=1))

    def conv(self, in_dim, out_dim, kernel_size, stride, padding, groups):
        convlayer = nn.Sequential(
            nn.Conv2d(in_dim,
                      out_dim,
                      kernel_size=kernel_size,
                      stride=stride,
                      padding=padding,
                      groups=groups,
                      bias=False), nn.BatchNorm2d(out_dim), nn.ReLU(True))
        return convlayer

    def forward(self, x):
        out = self.conv1x1_2(self.conv3x3(self.shuffleblock(self.conv1x1_1(x))))
        identity = self.short_cut(x)
        return F.relu(tor.cat([out, identity],1)) if self.stride == 2 else F.relu(out + identity)

![](./shuffle.jpg)

In [27]:
class ShuffleNet_v1(nn.Module):
    def __init__(self, groups=3, in_channels=3, num_classes=10):
        super(ShuffleNet_v1, self).__init__()
        self.out_planes = [24, 240, 480, 960]
        self.num_blocks = [-1, 4, 8, 4]
        self.in_planes = self.out_planes[0]
        # stage1
        self.stage1 = nn.Sequential(
            nn.Conv2d(in_channels,
                      self.out_planes[0],
                      kernel_size=1,
                      bias=False), nn.BatchNorm2d(self.out_planes[0]),
            nn.ReLU(True))
        # stage2
        self.stage2 = self._make_layer(self.out_planes[1], self.num_blocks[1],
                                       groups)
        self.stage3 = self._make_layer(self.out_planes[2], self.num_blocks[2],
                                       groups)
        self.stage4 = self._make_layer(self.out_planes[3], self.num_blocks[3],
                                       groups)
        self.classifier = nn.Linear(self.out_planes[3], num_classes)

    def _make_layer(self, out_planes, num_blocks, groups):
        layers = []
        for i in range(num_blocks):
            if i == 0:  # 因为第一个要下采样并且cat，所以为了下一个block加的时候能够通道匹配，要先减掉cat的通道数
                layers += [
                    Bottleneck(self.in_planes,
                               out_planes - self.out_planes[0],
                               stride=2,
                               groups=groups)
                ]
            else:
                layers += [
                    Bottleneck(self.in_planes,
                               out_planes,
                               stride=1,
                               groups=groups)
                ]

            self.in_planes = out_planes

        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.stage4(self.stage3(self.stage2(self.stage1(x))))
        out = F.avg_pool2d(out, 4)
        out = out.reshape(out.shape[0], -1)
        y = self.classifier(out)
        return out

https://blog.csdn.net/winycg/article/details/87873946

In [28]:
model=ShuffleNet_v1()

In [29]:
model

ShuffleNet_v1(
  (stage1): Sequential(
    (0): Conv2d(3, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
  )
  (stage2): Sequential(
    (0): Bottleneck(
      (conv1x1_1): Sequential(
        (0): Conv2d(24, 54, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(54, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace)
      )
      (conv3x3): Sequential(
        (0): Conv2d(54, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=54, bias=False)
        (1): BatchNorm2d(54, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace)
      )
      (conv1x1_2): Sequential(
        (0): Conv2d(54, 216, kernel_size=(1, 1), stride=(1, 1), groups=3, bias=False)
        (1): BatchNorm2d(216, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace)