In [1]:
#!pip install torchsummary
# !pip install pretrainedmodels

In [2]:
import numpy as np
import torch.nn as nn
import torch.functional as F
from torchvision import models
import pretrainedmodels

In [3]:
model = pretrainedmodels.__dict__["resnet34"](pretrained=None)
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

^ by looking at the last layer above here, we change it to 3 separate layers based on number of classes in each classfication task.

This is how the last layers of ResNet34 look like:
    
    (avgpool): AvgPool2d(kernel_size=7, stride=7, padding=0)
    (fc): None
    (last_linear): Linear(in_features=512, out_features=1000, bias=True)
        
So we will add 512\*num_of_possible_classes for each head. That is 512\*2


In [4]:
class ResNet34(nn.Module):
    def __init__(self, pretrained):
        super(ResNet34, self).__init__()
        if pretrained is True:
            self.model = pretrainedmodels.__dict__["resnet34"](pretrained="imagenet")
        else:
            self.model = pretrainedmodels.__dict__["resnet34"](pretrained=None)
        
        self.l0 = nn.Linear(512, 2)  # nucleoplasmn
        self.l1 = nn.Linear(512, 2)  # nuclear_membrane
        self.l2 = nn.Linear(512, 2)  # nucleoli
        self.l3 = nn.Linear(512, 2)  # nucleoli_fibrillar_center
        self.l4 = nn.Linear(512, 2)  # nuclear_speckles
        self.l5 = nn.Linear(512, 2)  # nuclear_bodies
        self.l6 = nn.Linear(512, 2)  # endoplasmic_reticulum
        self.l7 = nn.Linear(512, 2)  # golgi_apparatus
        self.l8 = nn.Linear(512, 2)  # peroxisomes
        self.l9 = nn.Linear(512, 2)  # endosomes
        self.l10 = nn.Linear(512, 2)  # lysosomes
        self.l11 = nn.Linear(512, 2)  # intermediate_filaments
        self.l12 = nn.Linear(512, 2)  # actin_filaments
        self.l13 = nn.Linear(512, 2)  # focal_adhesion_sites
        self.l14 = nn.Linear(512, 2)  # microtubules
        self.l15 = nn.Linear(512, 2)  # microtubule_ends
        self.l16 = nn.Linear(512, 2)  # cytokinetic_bridge
        self.l17 = nn.Linear(512, 2)  # mitotic_spindle
        self.l18 = nn.Linear(512, 2)  # microtubule_organizing_center
        self.l19 = nn.Linear(512, 2)  # centrosome
        self.l20 = nn.Linear(512, 2)  # lipid_droplets
        self.l21 = nn.Linear(512, 2)  # plasma_membrane
        self.l22 = nn.Linear(512, 2)  # cell_junctions
        self.l23 = nn.Linear(512, 2)  # mitochondria
        self.l24 = nn.Linear(512, 2)  # aggresome
        self.l25 = nn.Linear(512, 2)  # cytosol
        self.l26 = nn.Linear(512, 2)  # cytoplasmic_bodies
        self.l27 = nn.Linear(512, 2)  # rods_rings
        
    def forward(self, x):
        bs, _, _, _ = x.shape
        x = self.model.features(x)
        # Adaptive pooling supports all image sizes
        x = F.adaptive_avg_pool2d(x, 1).reshape(bs, -1)
        l0  = self.l0(x)
        l1  = self.l1(x)
        l2  = self.l2(x)
        l3  = self.l3(x)
        l4  = self.l4(x)
        l5  = self.l5(x)
        l6  = self.l6(x)
        l7  = self.l7(x)
        l8  = self.l8(x)
        l9  = self.l9(x)
        l10  = self.l10(x)
        l11  = self.l11(x)
        l12  = self.l12(x)
        l13  = self.l13(x)
        l14  = self.l14(x)
        l15  = self.l15(x)
        l16  = self.l16(x)
        l17  = self.l17(x)
        l18  = self.l18(x)
        l19  = self.l19(x)
        l20  = self.l20(x)
        l21  = self.l21(x)
        l22  = self.l22(x)
        l23  = self.l23(x)
        l24  = self.l24(x)
        l25  = self.l25(x)
        l26  = self.l26(x)
        l27  = self.l27(x)

        return l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15, l16, l17, l18, l19, l20, l21, l22, l23, l24, l25, l26, l27

In [5]:
model = ResNet34(pretrained=True)
model

ResNet34(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=

^ This looks good.

__Now let's create next customer model using ResNet50.

In [6]:
model = pretrainedmodels.__dict__["resnet50"](pretrained=None)
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=F

This is how the last layer of the original ResNet50 looks like:
    
    (avgpool): AvgPool2d(kernel_size=7, stride=7, padding=0)
    (fc): None
    (last_linear): Linear(in_features=2048, out_features=1000, bias=True)

So we will add 2048\*num_of_possible_classes for each head. That is 2048\*2

In [7]:
class ResNet50(nn.Module):
    def __init__(self, pretrained):
        super(ResNet50, self).__init__()
        if pretrained is True:
            self.model = pretrainedmodels.__dict__["resnet50"](pretrained="imagenet")
        else:
            self.model = pretrainedmodels.__dict__["resnet50"](pretrained=None)

        self.l0 = nn.Linear(2048, 2)  # nucleoplasmn
        self.l1 = nn.Linear(2048, 2)  # nuclear_membrane
        self.l2 = nn.Linear(2048, 2)  # nucleoli
        self.l3 = nn.Linear(2048, 2)  # nucleoli_fibrillar_center
        self.l4 = nn.Linear(2048, 2)  # nuclear_speckles
        self.l5 = nn.Linear(2048, 2)  # nuclear_bodies
        self.l6 = nn.Linear(2048, 2)  # endoplasmic_reticulum
        self.l7 = nn.Linear(2048, 2)  # golgi_apparatus
        self.l8 = nn.Linear(2048, 2)  # peroxisomes
        self.l9 = nn.Linear(2048, 2)  # endosomes
        self.l10 = nn.Linear(2048, 2)  # lysosomes
        self.l11 = nn.Linear(2048, 2)  # intermediate_filaments
        self.l12 = nn.Linear(2048, 2)  # actin_filaments
        self.l13 = nn.Linear(2048, 2)  # focal_adhesion_sites
        self.l14 = nn.Linear(2048, 2)  # microtubules
        self.l15 = nn.Linear(2048, 2)  # microtubule_ends
        self.l16 = nn.Linear(2048, 2)  # cytokinetic_bridge
        self.l17 = nn.Linear(2048, 2)  # mitotic_spindle
        self.l18 = nn.Linear(2048, 2)  # microtubule_organizing_center
        self.l19 = nn.Linear(2048, 2)  # centrosome
        self.l20 = nn.Linear(2048, 2)  # lipid_droplets
        self.l21 = nn.Linear(2048, 2)  # plasma_membrane
        self.l22 = nn.Linear(2048, 2)  # cell_junctions
        self.l23 = nn.Linear(2048, 2)  # mitochondria
        self.l24 = nn.Linear(2048, 2)  # aggresome
        self.l25 = nn.Linear(2048, 2)  # cytosol
        self.l26 = nn.Linear(2048, 2)  # cytoplasmic_bodies
        self.l27 = nn.Linear(2048, 2)  # rods_rings

    def forward(self, x):
        bs, _, _, _ = x.shape
        x = self.model.features(x)
        print(x.shape) # The dim is: bs * 2048 * 16 * 16 for images of size 3*512*512
        # Adaptive pooling supports all image sizes
        x = F.adaptive_avg_pool2d(x, 1).reshape(bs, -1)
        print(x.shape) # The dim is: bs * 2048
        l0  = self.l0(x)
        l1  = self.l1(x)
        l2  = self.l2(x)
        l3  = self.l3(x)
        l4  = self.l4(x)
        l5  = self.l5(x)
        l6  = self.l6(x)
        l7  = self.l7(x)
        l8  = self.l8(x)
        l9  = self.l9(x)
        l10  = self.l10(x)
        l11  = self.l11(x)
        l12  = self.l12(x)
        l13  = self.l13(x)
        l14  = self.l14(x)
        l15  = self.l15(x)
        l16  = self.l16(x)
        l17  = self.l17(x)
        l18  = self.l18(x)
        l19  = self.l19(x)
        l20  = self.l20(x)
        l21  = self.l21(x)
        l22  = self.l22(x)
        l23  = self.l23(x)
        l24  = self.l24(x)
        l25  = self.l25(x)
        l26  = self.l26(x)
        l27  = self.l27(x)

        return l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15, l16, l17, l18, l19, l20, l21, l22, l23, l24, l25, l26, l27

In [8]:
model = ResNet50(pretrained=True)
model

ResNet50(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace)
        (downsample): Sequential(
          (0): Conv2d

^ This looks good.

Now let's look into model parametrs.

In [9]:
for params in model.parameters():
    print(dir(params))
    break

['__abs__', '__add__', '__and__', '__array__', '__array_priority__', '__array_wrap__', '__bool__', '__class__', '__deepcopy__', '__delattr__', '__delitem__', '__dict__', '__dir__', '__div__', '__doc__', '__eq__', '__float__', '__floordiv__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__iadd__', '__iand__', '__idiv__', '__ilshift__', '__imul__', '__index__', '__init__', '__init_subclass__', '__int__', '__invert__', '__ior__', '__ipow__', '__irshift__', '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__', '__len__', '__long__', '__lshift__', '__lt__', '__matmul__', '__mod__', '__module__', '__mul__', '__ne__', '__neg__', '__new__', '__nonzero__', '__or__', '__pow__', '__radd__', '__rdiv__', '__reduce__', '__reduce_ex__', '__repr__', '__reversed__', '__rfloordiv__', '__rmul__', '__rpow__', '__rshift__', '__rsub__', '__rtruediv__', '__setattr__', '__setitem__', '__setstate__', '__sizeof__', '__str__', '__sub__', '__subclasshook__', '__truediv

^ We can see `requires_grad` here which would help us freeze or unfreeze the training layers.

In [10]:
def freeze(model: nn.Module):
    """Freeze all model parameters."""
    for param in model.parameters():
        param.requires_grad = False


def unfreeze(model: nn.Module):
    """Unfreeze all model parameters."""
    for param in model.parameters():
        param.requires_grad = True

__Now let's look into how can we apply differntial learning rate__

We can pass options as keyword arguments. They will be used as defaults, in the groups that didn’t override them. This is useful when you only want to vary a single option, while keeping all others consistent between parameter groups. - [PyTorch Optim](https://pytorch.org/docs/stable/optim.html)

In [11]:
# Let's get the name of each trainable layer 
for name, param in model.named_parameters():
    # if requires grad is True then that layer is available for training
    if param.requires_grad:
        print(name)
        #print name, param.data

model.conv1.weight
model.bn1.weight
model.bn1.bias
model.layer1.0.conv1.weight
model.layer1.0.bn1.weight
model.layer1.0.bn1.bias
model.layer1.0.conv2.weight
model.layer1.0.bn2.weight
model.layer1.0.bn2.bias
model.layer1.0.conv3.weight
model.layer1.0.bn3.weight
model.layer1.0.bn3.bias
model.layer1.0.downsample.0.weight
model.layer1.0.downsample.1.weight
model.layer1.0.downsample.1.bias
model.layer1.1.conv1.weight
model.layer1.1.bn1.weight
model.layer1.1.bn1.bias
model.layer1.1.conv2.weight
model.layer1.1.bn2.weight
model.layer1.1.bn2.bias
model.layer1.1.conv3.weight
model.layer1.1.bn3.weight
model.layer1.1.bn3.bias
model.layer1.2.conv1.weight
model.layer1.2.bn1.weight
model.layer1.2.bn1.bias
model.layer1.2.conv2.weight
model.layer1.2.bn2.weight
model.layer1.2.bn2.bias
model.layer1.2.conv3.weight
model.layer1.2.bn3.weight
model.layer1.2.bn3.bias
model.layer2.0.conv1.weight
model.layer2.0.bn1.weight
model.layer2.0.bn1.bias
model.layer2.0.conv2.weight
model.layer2.0.bn2.weight
model.layer2

In [12]:
for name, param in model.named_parameters():
    if param.requires_grad:
        print(dir(param))
        #print(param.name)
        break

['__abs__', '__add__', '__and__', '__array__', '__array_priority__', '__array_wrap__', '__bool__', '__class__', '__deepcopy__', '__delattr__', '__delitem__', '__dict__', '__dir__', '__div__', '__doc__', '__eq__', '__float__', '__floordiv__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__iadd__', '__iand__', '__idiv__', '__ilshift__', '__imul__', '__index__', '__init__', '__init_subclass__', '__int__', '__invert__', '__ior__', '__ipow__', '__irshift__', '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__', '__len__', '__long__', '__lshift__', '__lt__', '__matmul__', '__mod__', '__module__', '__mul__', '__ne__', '__neg__', '__new__', '__nonzero__', '__or__', '__pow__', '__radd__', '__rdiv__', '__reduce__', '__reduce_ex__', '__repr__', '__reversed__', '__rfloordiv__', '__rmul__', '__rpow__', '__rshift__', '__rsub__', '__rtruediv__', '__setattr__', '__setitem__', '__setstate__', '__sizeof__', '__str__', '__sub__', '__subclasshook__', '__truediv

In [13]:
# from torchsummary import summary

In [14]:
# This did not work
# summary(model,(3,224,224))

__Let's play with the default model__

In [15]:
model = models.resnet50(pretrained=True)

In [16]:
for name, param in model.named_parameters():
    if param.requires_grad:
        #print(dir(param))
        print(name)

conv1.weight
bn1.weight
bn1.bias
layer1.0.conv1.weight
layer1.0.bn1.weight
layer1.0.bn1.bias
layer1.0.conv2.weight
layer1.0.bn2.weight
layer1.0.bn2.bias
layer1.0.conv3.weight
layer1.0.bn3.weight
layer1.0.bn3.bias
layer1.0.downsample.0.weight
layer1.0.downsample.1.weight
layer1.0.downsample.1.bias
layer1.1.conv1.weight
layer1.1.bn1.weight
layer1.1.bn1.bias
layer1.1.conv2.weight
layer1.1.bn2.weight
layer1.1.bn2.bias
layer1.1.conv3.weight
layer1.1.bn3.weight
layer1.1.bn3.bias
layer1.2.conv1.weight
layer1.2.bn1.weight
layer1.2.bn1.bias
layer1.2.conv2.weight
layer1.2.bn2.weight
layer1.2.bn2.bias
layer1.2.conv3.weight
layer1.2.bn3.weight
layer1.2.bn3.bias
layer2.0.conv1.weight
layer2.0.bn1.weight
layer2.0.bn1.bias
layer2.0.conv2.weight
layer2.0.bn2.weight
layer2.0.bn2.bias
layer2.0.conv3.weight
layer2.0.bn3.weight
layer2.0.bn3.bias
layer2.0.downsample.0.weight
layer2.0.downsample.1.weight
layer2.0.downsample.1.bias
layer2.1.conv1.weight
layer2.1.bn1.weight
layer2.1.bn1.bias
layer2.1.conv2.we

In [17]:
lr = 0.001

As specified in torch.optim documentation we divide model layers into multiple buckets (in following case 3) and train each bucket with a different learning rate.

In [18]:
param_groups = [
    [model.conv1, model.bn1, model.layer1, model.layer2],
    [model.layer3, model.layer4],
    [model.fc.weight, model.fc.bias]
]

lrs = np.array([lr / 10, lr / 3, lr])

^ This is how we can create 3 separate buckets for 3 separate learning rates.

__Now let's let's try the same for our custom model__

In [19]:
model = ResNet50(pretrained=True)

In [20]:
for name, param in model.named_parameters():
    if param.requires_grad:
        #print(dir(param))
        print(name)

model.conv1.weight
model.bn1.weight
model.bn1.bias
model.layer1.0.conv1.weight
model.layer1.0.bn1.weight
model.layer1.0.bn1.bias
model.layer1.0.conv2.weight
model.layer1.0.bn2.weight
model.layer1.0.bn2.bias
model.layer1.0.conv3.weight
model.layer1.0.bn3.weight
model.layer1.0.bn3.bias
model.layer1.0.downsample.0.weight
model.layer1.0.downsample.1.weight
model.layer1.0.downsample.1.bias
model.layer1.1.conv1.weight
model.layer1.1.bn1.weight
model.layer1.1.bn1.bias
model.layer1.1.conv2.weight
model.layer1.1.bn2.weight
model.layer1.1.bn2.bias
model.layer1.1.conv3.weight
model.layer1.1.bn3.weight
model.layer1.1.bn3.bias
model.layer1.2.conv1.weight
model.layer1.2.bn1.weight
model.layer1.2.bn1.bias
model.layer1.2.conv2.weight
model.layer1.2.bn2.weight
model.layer1.2.bn2.bias
model.layer1.2.conv3.weight
model.layer1.2.bn3.weight
model.layer1.2.bn3.bias
model.layer2.0.conv1.weight
model.layer2.0.bn1.weight
model.layer2.0.bn1.bias
model.layer2.0.conv2.weight
model.layer2.0.bn2.weight
model.layer2

In [22]:
param_groups = [
    [model.model.conv1, model.model.bn1, model.model.layer1, model.model.layer2],
    [model.model.layer3, model.model.layer4],
    [model.model.last_linear]
]

lrs = np.array([lr / 10, lr / 3, lr])

__^ This is how our custom model can use the differential learning rate__

In [50]:
# Another example from torch.optim documentation
import torch.optim as optim

model = ResNet50(pretrained='imagenet').model

optim.SGD([
                {'params': model.base.parameters()},
                {'params': model.classifier.parameters(), 'lr': 1e-3}
            ], lr=1e-2, momentum=0.9)

AttributeError: 'ResNet' object has no attribute 'base'

In [28]:
# This would be equivalent to freezing all layers
# for param in model.features.parameters():
#     param.requires_grad = False

In [30]:
# Let's test if freeze/unfreeze it working
model = ResNet50(pretrained=True)

In [31]:
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name)

model.conv1.weight
model.bn1.weight
model.bn1.bias
model.layer1.0.conv1.weight
model.layer1.0.bn1.weight
model.layer1.0.bn1.bias
model.layer1.0.conv2.weight
model.layer1.0.bn2.weight
model.layer1.0.bn2.bias
model.layer1.0.conv3.weight
model.layer1.0.bn3.weight
model.layer1.0.bn3.bias
model.layer1.0.downsample.0.weight
model.layer1.0.downsample.1.weight
model.layer1.0.downsample.1.bias
model.layer1.1.conv1.weight
model.layer1.1.bn1.weight
model.layer1.1.bn1.bias
model.layer1.1.conv2.weight
model.layer1.1.bn2.weight
model.layer1.1.bn2.bias
model.layer1.1.conv3.weight
model.layer1.1.bn3.weight
model.layer1.1.bn3.bias
model.layer1.2.conv1.weight
model.layer1.2.bn1.weight
model.layer1.2.bn1.bias
model.layer1.2.conv2.weight
model.layer1.2.bn2.weight
model.layer1.2.bn2.bias
model.layer1.2.conv3.weight
model.layer1.2.bn3.weight
model.layer1.2.bn3.bias
model.layer2.0.conv1.weight
model.layer2.0.bn1.weight
model.layer2.0.bn1.bias
model.layer2.0.conv2.weight
model.layer2.0.bn2.weight
model.layer2

In [32]:
freeze(model)

In [33]:
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name)

^ as expected this prints nothing because all layer are frozen

In [36]:
unfreeze(model)

In [37]:
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name)

model.conv1.weight
model.bn1.weight
model.bn1.bias
model.layer1.0.conv1.weight
model.layer1.0.bn1.weight
model.layer1.0.bn1.bias
model.layer1.0.conv2.weight
model.layer1.0.bn2.weight
model.layer1.0.bn2.bias
model.layer1.0.conv3.weight
model.layer1.0.bn3.weight
model.layer1.0.bn3.bias
model.layer1.0.downsample.0.weight
model.layer1.0.downsample.1.weight
model.layer1.0.downsample.1.bias
model.layer1.1.conv1.weight
model.layer1.1.bn1.weight
model.layer1.1.bn1.bias
model.layer1.1.conv2.weight
model.layer1.1.bn2.weight
model.layer1.1.bn2.bias
model.layer1.1.conv3.weight
model.layer1.1.bn3.weight
model.layer1.1.bn3.bias
model.layer1.2.conv1.weight
model.layer1.2.bn1.weight
model.layer1.2.bn1.bias
model.layer1.2.conv2.weight
model.layer1.2.bn2.weight
model.layer1.2.bn2.bias
model.layer1.2.conv3.weight
model.layer1.2.bn3.weight
model.layer1.2.bn3.bias
model.layer2.0.conv1.weight
model.layer2.0.bn1.weight
model.layer2.0.bn1.bias
model.layer2.0.conv2.weight
model.layer2.0.bn2.weight
model.layer2

^ as expected this prints everything because all layer are unfrozen

Now let's write freeze_to in the next part

In [51]:
counter = 0

for param in model.parameters():
    counter += 1
    
print(counter)

counter = 0

for param in model.parameters():
    if param.requires_grad:
        counter += 1
    
print(counter)

161
161


In [40]:
model = ResNet50(pretrained=True).model
for name, param in model.named_parameters():
    print(name, param.size())

conv1.weight torch.Size([64, 3, 7, 7])
bn1.weight torch.Size([64])
bn1.bias torch.Size([64])
layer1.0.conv1.weight torch.Size([64, 64, 1, 1])
layer1.0.bn1.weight torch.Size([64])
layer1.0.bn1.bias torch.Size([64])
layer1.0.conv2.weight torch.Size([64, 64, 3, 3])
layer1.0.bn2.weight torch.Size([64])
layer1.0.bn2.bias torch.Size([64])
layer1.0.conv3.weight torch.Size([256, 64, 1, 1])
layer1.0.bn3.weight torch.Size([256])
layer1.0.bn3.bias torch.Size([256])
layer1.0.downsample.0.weight torch.Size([256, 64, 1, 1])
layer1.0.downsample.1.weight torch.Size([256])
layer1.0.downsample.1.bias torch.Size([256])
layer1.1.conv1.weight torch.Size([64, 256, 1, 1])
layer1.1.bn1.weight torch.Size([64])
layer1.1.bn1.bias torch.Size([64])
layer1.1.conv2.weight torch.Size([64, 64, 3, 3])
layer1.1.bn2.weight torch.Size([64])
layer1.1.bn2.bias torch.Size([64])
layer1.1.conv3.weight torch.Size([256, 64, 1, 1])
layer1.1.bn3.weight torch.Size([256])
layer1.1.bn3.bias torch.Size([256])
layer1.2.conv1.weight tor

In [42]:
from torchsummary import summary

In [45]:
summary(model.cuda(),(3,224,224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,096
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          16,384
      BatchNorm2d-12          [-1, 256, 56, 56]             512
           Conv2d-13          [-1, 256, 56, 56]          16,384
      BatchNorm2d-14          [-1, 256,