<a href="https://colab.research.google.com/github/IANGECHUKI176/deeplearning/blob/main/pytorch/convnets/ShuffleNetV1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


See the paper "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" for more details.

orginal [paper](https://arxiv.org/pdf/1707.01083.pdf)

code borrowed from  [github](https://github.com/kuangliu/pytorch-cifar/blob/master/models/shufflenet.py)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary

In [None]:
class ShuffleBlock(nn.Module):
    def __init__(self,groups):
        super(ShuffleBlock,self).__init__()
        self.groups = groups
    def forward(self,x):
        '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
        batch_size,channels,width,height=x.size()
        channels_per_group = int(channels / self.groups)

        x = x.view(batch_size,self.groups,channels_per_group,height,width)
        x = x.transpose(1,2)
        x = x.contiguous()
        x= x.view(batch_size,-1,height,width)

        #or use permutate
        #x = x.view(batch_size,self.groups,channels_per_group,height,width)
        #x = x.permutate(0,2,1,3,4)
        #x = x.reshape(batch_size,channels,height,width)
        return x

In [None]:
class Bottleneck(nn.Module):
    def __init__(self,in_channels,out_channels,stride,groups):
        super(Bottleneck,self).__init__()
        self.stride = stride

        mid_channels = int(out_channels / 4)
        g = 1 if in_channels == 24 else groups
        #"""Note that for Stage 2, we do not apply group convolution on the first pointwise
        #layer because the number of input channels is relatively small."""

        #1x1 GCONV
        self.conv1x1_1 = nn.Sequential(
            nn.Conv2d(in_channels,mid_channels,kernel_size= 1,groups=g,bias = False),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace = True)
        )
        #channel shuffle
        self.shuffle = ShuffleBlock(groups = g)

        #3X3_DWCONV
        self.conv3x3_2 = nn.Sequential(
            nn.Conv2d(mid_channels,mid_channels,kernel_size = 3,stride = stride,padding =1, groups = mid_channels,bias = False),
            nn.BatchNorm2d(mid_channels)
        )

        # 1x1 GCONV
        self.conv1x1_3 = nn.Sequential(
            nn.Conv2d(mid_channels,out_channels,kernel_size = 1,groups = groups,bias = False),
            nn.BatchNorm2d(out_channels)
        )

        self.shortcut = nn.Sequential()
        if stride==2:
            self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1))

    def forward(self,x):
        res = self.conv1x1_1(x)
        res = self.shuffle(res)
        res = self.conv3x3_2(res)
        res = self.conv1x1_3(res)
        shortcut = self.shortcut(x)
        out = F.relu(torch.cat([res, shortcut], 1)) if self.stride == 2 else F.relu(res+shortcut)
        return out


In [None]:
blk0 = Bottleneck(200,200,2,2)
summary(blk0,(200,224,224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 50, 224, 224]           5,000
       BatchNorm2d-2         [-1, 50, 224, 224]             100
              ReLU-3         [-1, 50, 224, 224]               0
      ShuffleBlock-4         [-1, 50, 224, 224]               0
            Conv2d-5         [-1, 50, 112, 112]             450
       BatchNorm2d-6         [-1, 50, 112, 112]             100
            Conv2d-7        [-1, 200, 112, 112]           5,000
       BatchNorm2d-8        [-1, 200, 112, 112]             400
         AvgPool2d-9        [-1, 200, 112, 112]               0
Total params: 11,050
Trainable params: 11,050
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 38.28
Forward/backward pass size (MB): 143.55
Params size (MB): 0.04
Estimated Total Size (MB): 181.88
----------------------------------------

In [None]:
class ShuffleNet(nn.Module):
    def __init__(self,cfg,num_classes):
        super(ShuffleNet,self).__init__()
        out_planes = cfg['out_planes']
        num_blocks = cfg['num_blocks']
        groups = cfg['groups']

        self.in_channels = 24

        self.conv1 = nn.Sequential(
            nn.Conv2d(3,24,1,bias = False),
            nn.BatchNorm2d(24),
            nn.ReLU(inplace = True)
        )
        self.layer1 = self._make_layer(out_planes[0],num_blocks[0],groups)
        self.layer2 = self._make_layer(out_planes[1],num_blocks[1],groups)
        self.layer3 = self._make_layer(out_planes[2],num_blocks[2],groups)
        self.avg_pool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(self.in_channels,num_classes)
    def _make_layer(self,out_channels,num_blocks,groups):
        layers = []
        for i in range(num_blocks):
            stride = 2 if i == 0 else 1
            cat_planes = self.in_channels if i == 0 else 0
            layers.append(Bottleneck(self.in_channels,out_channels - cat_planes,stride = stride,groups = groups))
            self.in_channels = out_channels
        return nn.Sequential(*layers)
    def forward(self,x):
        out = self.conv1(x)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.avg_pool(out)
        out = out.view(out.size(0),-1)
        out = self.fc(out)
        return out

In [None]:
def ShuffleNetG2():
    cfg = {
        'out_planes': [200,400,800],
        'num_blocks': [4,8,4],
        'groups': 2
    }
    return ShuffleNet(cfg,10)
def ShuffleNetG3():
    cfg = {
        'out_planes': [240,480,960],
        'num_blocks': [4,8,4],
        'groups': 3
    }
    return ShuffleNet(cfg,10)


def test():
    net = ShuffleNetG2()
    x = torch.randn(1,3,32,32)
    y = net(x)
    print(y)

test()

tensor([[-2.5869, -0.0489, -0.2903,  0.6408, -0.5941, -0.2983, -0.0655,  0.4851,
         -1.1879,  0.6140]], grad_fn=<AddmmBackward0>)


In [None]:
blk3 = ShuffleNetG2()
summary(blk3,(3,224,224))

In [None]:
blk5 = shufflenet()
summary(blk5,(3,224,224))

in_channels 24
out_channels 240
stride 2
mi_channels 60.0
-------------------------------------------------------
in_channels 240
out_channels 240
stride 1
mi_channels 60.0
-------------------------------------------------------
in_channels 240
out_channels 240
stride 1
mi_channels 60.0
-------------------------------------------------------
in_channels 240
out_channels 240
stride 1
mi_channels 60.0
-------------------------------------------------------
in_channels 240
out_channels 480
stride 2
mi_channels 120.0
-------------------------------------------------------
in_channels 480
out_channels 480
stride 1
mi_channels 120.0
-------------------------------------------------------
in_channels 480
out_channels 480
stride 1
mi_channels 120.0
-------------------------------------------------------
in_channels 480
out_channels 480
stride 1
mi_channels 120.0
-------------------------------------------------------
in_channels 480
out_channels 480
stride 1
mi_channels 120.0
-----------------