In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import matplotlib.pyplot as plt
import numpy as np

import torch
import torchvision
import torchvision.transforms as transforms
from torchvision.models import resnet101

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torchsummary import summary
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()

import pathlib
import os

In [3]:
model = resnet101(pretrained=False, num_classes=10)

model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
model.maxpool = nn.Identity()



In [5]:
from inspect import isfunction

class Swish(nn.Module):
    """
    Swish activation function from 'Searching for Activation Functions,' https://arxiv.org/abs/1710.05941.
    """
    def forward(self, x):
        return x * torch.sigmoid(x)


class HSigmoid(nn.Module):
    """
    Approximated sigmoid function, so-called hard-version of sigmoid from 'Searching for MobileNetV3,'
    https://arxiv.org/abs/1905.02244.
    """
    def forward(self, x):
        return F.relu6(x + 3.0, inplace=True) / 6.0


class HSwish(nn.Module):
    """
    H-Swish activation function from 'Searching for MobileNetV3,' https://arxiv.org/abs/1905.02244.
    Parameters:
    ----------
    inplace : bool
        Whether to use inplace version of the module.
    """
    def __init__(self, inplace=False):
        super(HSwish, self).__init__()
        self.inplace = inplace

    def forward(self, x):
        return x * F.relu6(x + 3.0, inplace=self.inplace) / 6.0


def get_activation_layer(activation):
    """
    Create activation layer from string/function.
    Parameters:
    ----------
    activation : function, or str, or nn.Module
        Activation function or name of activation function.
    Returns
    -------
    nn.Module
        Activation layer.
    """
    assert (activation is not None)
    if isfunction(activation):
        return activation()
    elif isinstance(activation, str):
        if activation == "relu":
            return nn.ReLU(inplace=True)
        elif activation == "relu6":
            return nn.ReLU6(inplace=True)
        elif activation == "swish":
            return Swish()
        elif activation == "hswish":
            return HSwish(inplace=True)
        elif activation == "sigmoid":
            return nn.Sigmoid()
        elif activation == "hsigmoid":
            return HSigmoid()
        else:
            raise NotImplementedError()
    else:
        assert (isinstance(activation, nn.Module))
        return

  

class ConvBlock(nn.Module):
    """
    Standard convolution block with Batch normalization and activation.
    Parameters:
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    kernel_size : int or tuple/list of 2 int
        Convolution window size.
    stride : int or tuple/list of 2 int
        Strides of the convolution.
    padding : int or tuple/list of 2 int
        Padding value for convolution layer.
    dilation : int or tuple/list of 2 int, default 1
        Dilation value for convolution layer.
    groups : int, default 1
        Number of groups.
    bias : bool, default False
        Whether the layer uses a bias vector.
    use_bn : bool, default True
        Whether to use BatchNorm layer.
    bn_eps : float, default 1e-5
        Small float added to variance in Batch norm.
    activation : function or str or None, default nn.ReLU(inplace=True)
        Activation function or name of activation function.
    """
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride,
                 padding,
                 dilation=1,
                 groups=1,
                 bias=False,
                 use_bn=True,
                 bn_eps=1e-5,
                 activation=(lambda: nn.ReLU(inplace=True))):
        super(ConvBlock, self).__init__()
        self.activate = (activation is not None)
        self.use_bn = use_bn

        self.conv = nn.Conv2d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            groups=groups,
            bias=bias)
        if self.use_bn:
            self.bn = nn.BatchNorm2d(
                num_features=out_channels,
                eps=bn_eps)
        if self.activate:
            self.activ = get_activation_layer(activation)

    def forward(self, x):
        x = self.conv(x)
        if self.use_bn:
            x = self.bn(x)
        if self.activate:
            x = self.activ(x)
        return x

def conv1x1_block(in_channels,
                  out_channels,
                  stride=1,
                  padding=0,
                  groups=1,
                  bias=False,
                  use_bn=True,
                  bn_eps=1e-5,
                  activation=(lambda: nn.ReLU(inplace=True))):
    """
    1x1 version of the standard convolution block.
    Parameters:
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    stride : int or tuple/list of 2 int, default 1
        Strides of the convolution.
    padding : int or tuple/list of 2 int, default 0
        Padding value for convolution layer.
    groups : int, default 1
        Number of groups.
    bias : bool, default False
        Whether the layer uses a bias vector.
    use_bn : bool, default True
        Whether to use BatchNorm layer.
    bn_eps : float, default 1e-5
        Small float added to variance in Batch norm.
    activation : function or str or None, default nn.ReLU(inplace=True)
        Activation function or name of activation function.
    """
    return ConvBlock(
        in_channels=in_channels,
        out_channels=out_channels,
        kernel_size=1,
        stride=stride,
        padding=padding,
        groups=groups,
        bias=bias,
        use_bn=use_bn,
        bn_eps=bn_eps,
        activation=activation)

In [6]:
class SqnxtUnit(nn.Module):
    """
    SqueezeNext unit.
    Parameters:
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    stride : int or tuple/list of 2 int
        Strides of the convolution.
    """
    def __init__(self,
                 in_channels,
                 out_channels,
                 stride, expand = False):
        super(SqnxtUnit, self).__init__()
        if stride == 2:
            reduction_den = 1
            self.resize_identity = True
        elif in_channels > out_channels:
            reduction_den = 4
            self.resize_identity = True
        else:
            reduction_den = 2
            self.resize_identity = False
      
        self.expand = expand
        self.conv1 = conv1x1_block(
            in_channels=in_channels,
            out_channels=(in_channels // reduction_den),
            stride=stride,
            bias=True)
        self.conv2 = conv1x1_block(
            in_channels=(in_channels // reduction_den),
            out_channels=(in_channels // (2 * reduction_den)),
            bias=True)
        self.conv3 = ConvBlock(
            in_channels=(in_channels // (2 * reduction_den)),
            out_channels=(in_channels // reduction_den),
            kernel_size=(1, 3),
            stride=1,
            padding=(0, 1),
            bias=True)
        self.conv4 = ConvBlock(
            in_channels=(in_channels // reduction_den),
            out_channels=(in_channels // reduction_den),
            kernel_size=(3, 1),
            stride=1,
            padding=(1, 0),
            bias=True)
        self.conv5 = conv1x1_block(
            in_channels=(in_channels // reduction_den),
            out_channels=out_channels,
            bias=True)

        if self.resize_identity:
            self.identity_conv = conv1x1_block(
                in_channels=in_channels,
                out_channels=out_channels,
                stride=stride,
                bias=True)
        if self.expand:
          self.expandConv = conv1x1_block(in_channels,
            out_channels= in_channels* 4,
            stride=stride,
             bias=True)
        self.activ = nn.ReLU(inplace=True)

    def forward(self, x):
        if self.resize_identity:
            identity = self.identity_conv(x)
        else:
            identity = x

        if self.expand:
            identity = self.expandConv(identity)

        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = x + identity
        x = self.activ(x)
        return x

In [7]:
model.layer1[0] = SqnxtUnit(64, 256, 1, expand= True)
model.layer1[1] = SqnxtUnit(256, 256, 1)
model.layer1[2] = SqnxtUnit(256, 256, 1)
model.layer2[0] = SqnxtUnit(256, 512, 2)
model.layer2[1] = SqnxtUnit(512, 512, 1)
model.layer2[2] = SqnxtUnit(512, 512, 1)
model.layer2[3] = SqnxtUnit(512, 512, 1)
model.layer3[0] = SqnxtUnit(512, 1024, 2)
model.layer3[1] = SqnxtUnit(1024, 1024, 1)
model.layer3[2] = SqnxtUnit(1024, 1024, 1)
model.layer3[3] = SqnxtUnit(1024, 1024, 1)
model.layer3[4] = SqnxtUnit(1024, 1024, 1)
model.layer3[5] = SqnxtUnit(1024, 1024, 1)
model.layer3[6] = SqnxtUnit(1024, 1024, 1)
model.layer3[7] = SqnxtUnit(1024, 1024, 1)
model.layer3[8] = SqnxtUnit(1024, 1024, 1)
model.layer3[9] = SqnxtUnit(1024, 1024, 1)
model.layer3[10] = SqnxtUnit(1024, 1024, 1)
model.layer3[11] = SqnxtUnit(1024, 1024, 1)
model.layer3[12] = SqnxtUnit(1024, 1024, 1)
model.layer3[13] = SqnxtUnit(1024, 1024, 1)
model.layer3[14] = SqnxtUnit(1024, 1024, 1)
model.layer3[15] = SqnxtUnit(1024, 1024, 1)
model.layer3[16] = SqnxtUnit(1024, 1024, 1)
model.layer3[17] = SqnxtUnit(1024, 1024, 1)
model.layer3[18] = SqnxtUnit(1024, 1024, 1)
model.layer3[19] = SqnxtUnit(1024, 1024, 1)
model.layer3[20] = SqnxtUnit(1024, 1024, 1)
model.layer3[21] = SqnxtUnit(1024, 1024, 1)
model.layer3[22] = SqnxtUnit(1024, 1024, 1)
model.layer4[0] = SqnxtUnit(1024, 2048, 2)
model.layer4[1] = SqnxtUnit(2048, 2048, 1)
model.layer4[2] = SqnxtUnit(2048, 2048, 1)

In [8]:
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): Identity()
  (layer1): Sequential(
    (0): SqnxtUnit(
      (conv1): ConvBlock(
        (conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1))
        (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (activ): ReLU(inplace=True)
      )
      (conv2): ConvBlock(
        (conv): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1))
        (bn): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (activ): ReLU(inplace=True)
      )
      (conv3): ConvBlock(
        (conv): Conv2d(16, 32, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
        (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (activ): ReLU(inplace=True)
      )
      (conv4

In [9]:
from prettytable import PrettyTable

def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad: continue
        param = parameter.numel()
        table.add_row([name, param])
        total_params+=param
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params

count_parameters(model)

pytorch_total_params = sum(p.numel() for p in model.parameters())
print("Total Params: ", pytorch_total_params)

+------------------------------------+------------+
|              Modules               | Parameters |
+------------------------------------+------------+
|            conv1.weight            |    1728    |
|             bn1.weight             |     64     |
|              bn1.bias              |     64     |
|     layer1.0.conv1.conv.weight     |    2048    |
|      layer1.0.conv1.conv.bias      |     32     |
|      layer1.0.conv1.bn.weight      |     32     |
|       layer1.0.conv1.bn.bias       |     32     |
|     layer1.0.conv2.conv.weight     |    512     |
|      layer1.0.conv2.conv.bias      |     16     |
|      layer1.0.conv2.bn.weight      |     16     |
|       layer1.0.conv2.bn.bias       |     16     |
|     layer1.0.conv3.conv.weight     |    1536    |
|      layer1.0.conv3.conv.bias      |     32     |
|      layer1.0.conv3.bn.weight      |     32     |
|       layer1.0.conv3.bn.bias       |     32     |
|     layer1.0.conv4.conv.weight     |    3072    |
|      layer

In [11]:
pip install -U fvcore

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting fvcore
  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 KB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting yacs>=0.1.6
  Downloading yacs-0.1.8-py3-none-any.whl (14 kB)
Collecting iopath>=0.1.7
  Downloading iopath-0.1.10.tar.gz (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 KB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting portalocker
  Downloading portalocker-2.7.0-py2.py3-none-any.whl (15 kB)
Building wheels for collected packages: fvcore, iopath
  Building wheel for fvcore (setup.py) ... [?25l[?25hdone
  Created wheel for fvcore: filename=fvcore-0.1.5.post20221221-py3-none-any.whl size=61431 sha256=4f574d32c618703e9236dc49eb99e800

In [12]:
from fvcore.nn import FlopCountAnalysis
from fvcore.nn import flop_count_table

input =  torch.rand(1, 3, 32, 32)

flops = FlopCountAnalysis(model, input)
print(flop_count_table(flops))
print("Total number of FLOPS: ", flops.total())

| module                    | #parameters or shape   | #flops     |
|:--------------------------|:-----------------------|:-----------|
| model                     | 86.939M                | 4.933G     |
|  conv1                    |  1.728K                |  1.769M    |
|   conv1.weight            |   (64, 3, 3, 3)        |            |
|  bn1                      |  0.128K                |  0.131M    |
|   bn1.weight              |   (64,)                |            |
|   bn1.bias                |   (64,)                |            |
|  layer1                   |  0.333M                |  0.339G    |
|   layer1.0                |   33.616K              |   33.784M  |
|    layer1.0.conv1         |    2.144K              |    2.163M  |
|    layer1.0.conv2         |    0.56K               |    0.557M  |
|    layer1.0.conv3         |    1.632K              |    1.638M  |
|    layer1.0.conv4         |    3.168K              |    3.211M  |
|    layer1.0.conv5         |    8.96K          

In [11]:
save_dir = pathlib.Path('/content/drive/MyDrive/SqueezeNext')

if not os.path.isdir(save_dir):
    os.makedirs(save_dir)

model_name = 'resnet101_squeezeNext.h5'
torch.save(model.state_dict(), os.path.join(save_dir, model_name))

# Conclusion
The numbers of parameters and FLOPs are too big for further research.