# Load model from google drive

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')


#set your path in google drive where model is kept
PATH = '/content/gdrive/MyDrive/ShuffleNet/resnet101_shuffleNet.132.h5';

Mounted at /content/gdrive


In [2]:
import torch
import torchvision
import torch.nn as nn

## Load model ResNet101 with ShuffleNet blocks Cifar10

In [3]:
def conv1x1(in_channels,
            out_channels,
            stride=1,
            groups=1,
            bias=False):
    """
    Convolution 1x1 layer.
    Parameters:
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    stride : int or tuple/list of 2 int, default 1
        Strides of the convolution.
    groups : int, default 1
        Number of groups.
    bias : bool, default False
        Whether the layer uses a bias vector.
    """
    return nn.Conv2d(
        in_channels=in_channels,
        out_channels=out_channels,
        kernel_size=1,
        stride=stride,
        groups=groups,
        bias=bias)


def conv3x3(in_channels,
            out_channels,
            stride=1,
            padding=1,
            dilation=1,
            groups=1,
            bias=False):
    """
    Convolution 3x3 layer.
    Parameters:
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    stride : int or tuple/list of 2 int, default 1
        Strides of the convolution.
    padding : int or tuple/list of 2 int, default 1
        Padding value for convolution layer.
    groups : int, default 1
        Number of groups.
    bias : bool, default False
        Whether the layer uses a bias vector.
    """
    return nn.Conv2d(
        in_channels=in_channels,
        out_channels=out_channels,
        kernel_size=3,
        stride=stride,
        padding=padding,
        dilation=dilation,
        groups=groups,
        bias=bias)
    
def conv3x3(in_channels,
            out_channels,
            stride=1,
            padding=1,
            dilation=1,
            groups=1,
            bias=False):
    """
    Convolution 3x3 layer.
    Parameters:
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    stride : int or tuple/list of 2 int, default 1
        Strides of the convolution.
    padding : int or tuple/list of 2 int, default 1
        Padding value for convolution layer.
    groups : int, default 1
        Number of groups.
    bias : bool, default False
        Whether the layer uses a bias vector.
    """
    return nn.Conv2d(
        in_channels=in_channels,
        out_channels=out_channels,
        kernel_size=3,
        stride=stride,
        padding=padding,
        dilation=dilation,
        groups=groups,
        bias=bias)


def depthwise_conv3x3(channels,
                      stride):
    """
    Depthwise convolution 3x3 layer.
    Parameters:
    ----------
    channels : int
        Number of input/output channels.
    strides : int or tuple/list of 2 int
        Strides of the convolution.
    """
    return nn.Conv2d(
        in_channels=channels,
        out_channels=channels,
        kernel_size=3,
        stride=stride,
        padding=1,
        groups=channels,
        bias=False)

def channel_shuffle(x,
                    groups):
    """
    Channel shuffle operation from 'ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices,'
    https://arxiv.org/abs/1707.01083.
    Parameters:
    ----------
    x : Tensor
        Input tensor.
    groups : int
        Number of groups.
    Returns
    -------
    Tensor
        Resulted tensor.
    """
    batch, channels, height, width = x.size()
    # assert (channels % groups == 0)
    channels_per_group = channels // groups
    x = x.view(batch, groups, channels_per_group, height, width)
    x = torch.transpose(x, 1, 2).contiguous()
    x = x.view(batch, channels, height, width)
    return x

class ChannelShuffle(nn.Module):
    """
    Channel shuffle layer. This is a wrapper over the same operation. It is designed to save the number of groups.
    Parameters:
    ----------
    channels : int
        Number of channels.
    groups : int
        Number of groups.
    """
    def __init__(self,
                 channels,
                 groups):
        super(ChannelShuffle, self).__init__()
        # assert (channels % groups == 0)
        if channels % groups != 0:
            raise ValueError('channels must be divisible by groups')
        self.groups = groups

    def forward(self, x):
        return channel_shuffle(x, self.groups)

In [4]:
class ShuffleUnit(nn.Module):
    """
    ShuffleNet unit.
    Parameters:
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    groups : int
        Number of groups in convolution layers.
    downsample : bool
        Whether do downsample.
    ignore_group : bool
        Whether ignore group value in the first convolution layer.
    """
    def __init__(self,
                 in_channels,
                 out_channels,
                 groups,
                 downsample,
                 ignore_group, expand):
        super(ShuffleUnit, self).__init__()
        self.downsample = downsample
        self.expand = expand
        mid_channels = out_channels // 4

        if downsample:
            out_channels -= in_channels

        self.compress_conv1 = conv1x1(
            in_channels=in_channels,
            out_channels=mid_channels,
            groups=(1 if ignore_group else groups))
        self.compress_bn1 = nn.BatchNorm2d(num_features=mid_channels)
        self.c_shuffle = ChannelShuffle(
            channels=mid_channels,
            groups=groups)
        self.dw_conv2 = depthwise_conv3x3(
            channels=mid_channels,
            stride=(2 if self.downsample else 1))
        self.dw_bn2 = nn.BatchNorm2d(num_features=mid_channels)
        self.expand_conv3 = conv1x1(
            in_channels=mid_channels,
            out_channels=out_channels,
            groups=groups)
        if self.expand:
          self.expandConv = conv1x1(in_channels,
            out_channels= in_channels* 4,
            groups = groups)
        self.expand_bn3 = nn.BatchNorm2d(num_features=out_channels)
        if downsample:
            self.avgpool = nn.AvgPool2d(kernel_size=3, stride=2, padding=1)
        self.activ = nn.ReLU(inplace=True)

    def forward(self, x):
        identity = x
        if self.expand:
          identity = self.expandConv(identity)
        x = self.compress_conv1(x)
        x = self.compress_bn1(x)
        x = self.activ(x)
        x = self.c_shuffle(x)
        x = self.dw_conv2(x)
        x = self.expand_conv3(x)
        x = self.expand_bn3(x)
        if self.downsample:
            identity = self.avgpool(identity)
            x = torch.cat((x, identity), dim=1)
        else:
            x = x + identity
        x = self.activ(x)
        return x

In [5]:
from torchvision.models import resnet101

model = resnet101(pretrained=False, num_classes=10)

model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
model.maxpool = nn.Identity()

model.layer1[0] = ShuffleUnit(64, 256, 8, False, True, True)
model.layer1[1] = ShuffleUnit(256, 256, 8, False, True, False)
model.layer1[2] = ShuffleUnit(256, 256, 8, False, True, False)
model.layer2[0] = ShuffleUnit(256, 512, 8, True, True, False)
model.layer2[1] = ShuffleUnit(512, 512, 8, False, True, False)
model.layer2[2] = ShuffleUnit(512, 512, 8, False, True, False)
model.layer2[3] = ShuffleUnit(512, 512, 8, False, True, False)
model.layer3[0] = ShuffleUnit(512, 1024, 8, True, True, False)
model.layer3[1] = ShuffleUnit(1024, 1024, 8, False, True, False)
model.layer3[2] = ShuffleUnit(1024, 1024, 8, False, True, False)
model.layer3[3] = ShuffleUnit(1024, 1024, 8, False, True, False)
model.layer3[4] = ShuffleUnit(1024, 1024, 8, False, True, False)
model.layer3[5] = ShuffleUnit(1024, 1024, 8, False, True, False)
model.layer3[6] = ShuffleUnit(1024, 1024, 8, False, True, False)
model.layer3[7] = ShuffleUnit(1024, 1024, 8, False, True, False)
model.layer3[8] = ShuffleUnit(1024, 1024, 8, False, True, False)
model.layer3[9] = ShuffleUnit(1024, 1024, 8, False, True, False)
model.layer3[10] = ShuffleUnit(1024, 1024, 8, False, True, False)
model.layer3[11] = ShuffleUnit(1024, 1024, 8, False, True, False)
model.layer3[12] = ShuffleUnit(1024, 1024, 8, False, True, False)
model.layer3[13] = ShuffleUnit(1024, 1024, 8, False, True, False)
model.layer3[14] = ShuffleUnit(1024, 1024, 8, False, True, False)
model.layer3[15] = ShuffleUnit(1024, 1024, 8, False, True, False)
model.layer3[16] = ShuffleUnit(1024, 1024, 8, False, True, False)
model.layer3[17] = ShuffleUnit(1024, 1024, 8, False, True, False)
model.layer3[18] = ShuffleUnit(1024, 1024, 8, False, True, False)
model.layer3[19] = ShuffleUnit(1024, 1024, 8, False, True, False)
model.layer3[20] = ShuffleUnit(1024, 1024, 8, False, True, False)
model.layer3[21] = ShuffleUnit(1024, 1024, 8, False, True, False)
model.layer3[22] = ShuffleUnit(1024, 1024, 8, False, True, False)
model.layer4[0] = ShuffleUnit(1024, 2048, 8, True, True, False)
model.layer4[1] = ShuffleUnit(2048, 2048, 8, False, True, False)
model.layer4[2] = ShuffleUnit(2048, 2048, 8, False, True, False)

model.load_state_dict(torch.load(PATH))
model.eval()



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): Identity()
  (layer1): Sequential(
    (0): ShuffleUnit(
      (compress_conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (compress_bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (c_shuffle): ChannelShuffle()
      (dw_conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
      (dw_bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (expand_conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), groups=8, bias=False)
      (expandConv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), groups=8, bias=False)
      (expand_bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)


In [6]:
from prettytable import PrettyTable

def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad: continue
        param = parameter.numel()
        table.add_row([name, param])
        total_params+=param
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params

count_parameters(model)

pytorch_total_params = sum(p.numel() for p in model.parameters())
print("Total Params: ", pytorch_total_params)

+---------------------------------+------------+
|             Modules             | Parameters |
+---------------------------------+------------+
|           conv1.weight          |    1728    |
|            bn1.weight           |     64     |
|             bn1.bias            |     64     |
|  layer1.0.compress_conv1.weight |    4096    |
|   layer1.0.compress_bn1.weight  |     64     |
|    layer1.0.compress_bn1.bias   |     64     |
|     layer1.0.dw_conv2.weight    |    576     |
|      layer1.0.dw_bn2.weight     |     64     |
|       layer1.0.dw_bn2.bias      |     64     |
|   layer1.0.expand_conv3.weight  |    2048    |
|    layer1.0.expandConv.weight   |    2048    |
|    layer1.0.expand_bn3.weight   |    256     |
|     layer1.0.expand_bn3.bias    |    256     |
|  layer1.1.compress_conv1.weight |   16384    |
|   layer1.1.compress_bn1.weight  |     64     |
|    layer1.1.compress_bn1.bias   |     64     |
|     layer1.1.dw_conv2.weight    |    576     |
|      layer1.1.dw_b

# Number of FLOPS

In [7]:
pip install -U fvcore

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting fvcore
  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 KB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting yacs>=0.1.6
  Downloading yacs-0.1.8-py3-none-any.whl (14 kB)
Collecting iopath>=0.1.7
  Downloading iopath-0.1.10.tar.gz (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 KB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting portalocker
  Downloading portalocker-2.6.0-py2.py3-none-any.whl (15 kB)
Building wheels for collected packages: fvcore, iopath
  Building wheel for fvcore (setup.py) ... [?25l[?25hdone
  Created wheel for fvcore: filename=fvcore-0.1.5.post20221221-py3-none-any.whl size=61431 sha256=98288627eb4edfb5666ae2f6771d6d0d

In [8]:
from fvcore.nn import FlopCountAnalysis
from fvcore.nn import flop_count_table

input =  torch.rand(1, 3, 32, 32)

flops = FlopCountAnalysis(model, input)
print(flop_count_table(flops))
print("Total number of FLOPS: ", flops.total())

| module                      | #parameters or shape   | #flops     |
|:----------------------------|:-----------------------|:-----------|
| model                       | 10.077M                | 0.675G     |
|  conv1                      |  1.728K                |  1.769M    |
|   conv1.weight              |   (64, 3, 3, 3)        |            |
|  bn1                        |  0.128K                |  0.131M    |
|   bn1.weight                |   (64,)                |            |
|   bn1.bias                  |   (64,)                |            |
|  layer1                     |  49.088K               |  49.873M   |
|   layer1.0                  |   9.536K               |   9.634M   |
|    layer1.0.compress_conv1  |    4.096K              |    4.194M  |
|    layer1.0.compress_bn1    |    0.128K              |    0.131M  |
|    layer1.0.dw_conv2        |    0.576K              |    0.59M   |
|    layer1.0.dw_bn2          |    0.128K              |            |
|    layer1.0.expand