In [1]:
import torch
from torch import nn    

Children gives you the names of general logical groups/parts in the model.
Like here features is the main part of the model, followed by pooling to summarize and compress and finally the classifier.

In [2]:
from torchvision.models import efficientnet_b0

efb0 = efficientnet_b0(pretrained=True)

for name, child in efb0.named_children():
    print(name.title())



Features
Avgpool
Classifier




In [14]:
print(efb0.features[1])

Sequential(
  (0): MBConv(
    (block): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): SiLU(inplace=True)
      )
      (1): SqueezeExcitation(
        (avgpool): AdaptiveAvgPool2d(output_size=1)
        (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
        (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
        (activation): SiLU(inplace=True)
        (scale_activation): Sigmoid()
      )
      (2): Conv2dNormActivation(
        (0): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (stochastic_depth): StochasticDepth(p=0.0, mode=row)
  )
)


In [3]:
for name_child, child in efb0.named_children():
    for name_grand_child, grand_child in child.named_modules():
        print(name_child.title(), name_grand_child.title())

Features 
Features 0
Features 0.0
Features 0.1
Features 0.2
Features 1
Features 1.0
Features 1.0.Block
Features 1.0.Block.0
Features 1.0.Block.0.0
Features 1.0.Block.0.1
Features 1.0.Block.0.2
Features 1.0.Block.1
Features 1.0.Block.1.Avgpool
Features 1.0.Block.1.Fc1
Features 1.0.Block.1.Fc2
Features 1.0.Block.1.Activation
Features 1.0.Block.1.Scale_Activation
Features 1.0.Block.2
Features 1.0.Block.2.0
Features 1.0.Block.2.1
Features 1.0.Stochastic_Depth
Features 2
Features 2.0
Features 2.0.Block
Features 2.0.Block.0
Features 2.0.Block.0.0
Features 2.0.Block.0.1
Features 2.0.Block.0.2
Features 2.0.Block.1
Features 2.0.Block.1.0
Features 2.0.Block.1.1
Features 2.0.Block.1.2
Features 2.0.Block.2
Features 2.0.Block.2.Avgpool
Features 2.0.Block.2.Fc1
Features 2.0.Block.2.Fc2
Features 2.0.Block.2.Activation
Features 2.0.Block.2.Scale_Activation
Features 2.0.Block.3
Features 2.0.Block.3.0
Features 2.0.Block.3.1
Features 2.0.Stochastic_Depth
Features 2.1
Features 2.1.Block
Features 2.1.Block

Modules gives you all nn.Modules inside of all children() and children (grandchildren) of the children

In [4]:
for name, module in efb0.named_modules():
    print(name.title(), '  ', type(module))


    <class 'torchvision.models.efficientnet.EfficientNet'>
Features    <class 'torch.nn.modules.container.Sequential'>
Features.0    <class 'torchvision.ops.misc.Conv2dNormActivation'>
Features.0.0    <class 'torch.nn.modules.conv.Conv2d'>
Features.0.1    <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
Features.0.2    <class 'torch.nn.modules.activation.SiLU'>
Features.1    <class 'torch.nn.modules.container.Sequential'>
Features.1.0    <class 'torchvision.models.efficientnet.MBConv'>
Features.1.0.Block    <class 'torch.nn.modules.container.Sequential'>
Features.1.0.Block.0    <class 'torchvision.ops.misc.Conv2dNormActivation'>
Features.1.0.Block.0.0    <class 'torch.nn.modules.conv.Conv2d'>
Features.1.0.Block.0.1    <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
Features.1.0.Block.0.2    <class 'torch.nn.modules.activation.SiLU'>
Features.1.0.Block.1    <class 'torchvision.ops.misc.SqueezeExcitation'>
Features.1.0.Block.1.Avgpool    <class 'torch.nn.modules.pooling.AdaptiveAvgPool2

Like named_modules but with additional information about the parameters Weight or Bias

In [5]:
for name, feature in efb0.named_parameters():
    print(name.title(), '  ', type(feature))

Features.0.0.Weight    <class 'torch.nn.parameter.Parameter'>
Features.0.1.Weight    <class 'torch.nn.parameter.Parameter'>
Features.0.1.Bias    <class 'torch.nn.parameter.Parameter'>
Features.1.0.Block.0.0.Weight    <class 'torch.nn.parameter.Parameter'>
Features.1.0.Block.0.1.Weight    <class 'torch.nn.parameter.Parameter'>
Features.1.0.Block.0.1.Bias    <class 'torch.nn.parameter.Parameter'>
Features.1.0.Block.1.Fc1.Weight    <class 'torch.nn.parameter.Parameter'>
Features.1.0.Block.1.Fc1.Bias    <class 'torch.nn.parameter.Parameter'>
Features.1.0.Block.1.Fc2.Weight    <class 'torch.nn.parameter.Parameter'>
Features.1.0.Block.1.Fc2.Bias    <class 'torch.nn.parameter.Parameter'>
Features.1.0.Block.2.0.Weight    <class 'torch.nn.parameter.Parameter'>
Features.1.0.Block.2.1.Weight    <class 'torch.nn.parameter.Parameter'>
Features.1.0.Block.2.1.Bias    <class 'torch.nn.parameter.Parameter'>
Features.2.0.Block.0.0.Weight    <class 'torch.nn.parameter.Parameter'>
Features.2.0.Block.0.1.W

In [6]:
for name, f in efb0.named_buffers():
    print(name.title(), '  ', type(f))

Features.0.1.Running_Mean    <class 'torch.Tensor'>
Features.0.1.Running_Var    <class 'torch.Tensor'>
Features.0.1.Num_Batches_Tracked    <class 'torch.Tensor'>
Features.1.0.Block.0.1.Running_Mean    <class 'torch.Tensor'>
Features.1.0.Block.0.1.Running_Var    <class 'torch.Tensor'>
Features.1.0.Block.0.1.Num_Batches_Tracked    <class 'torch.Tensor'>
Features.1.0.Block.2.1.Running_Mean    <class 'torch.Tensor'>
Features.1.0.Block.2.1.Running_Var    <class 'torch.Tensor'>
Features.1.0.Block.2.1.Num_Batches_Tracked    <class 'torch.Tensor'>
Features.2.0.Block.0.1.Running_Mean    <class 'torch.Tensor'>
Features.2.0.Block.0.1.Running_Var    <class 'torch.Tensor'>
Features.2.0.Block.0.1.Num_Batches_Tracked    <class 'torch.Tensor'>
Features.2.0.Block.1.1.Running_Mean    <class 'torch.Tensor'>
Features.2.0.Block.1.1.Running_Var    <class 'torch.Tensor'>
Features.2.0.Block.1.1.Num_Batches_Tracked    <class 'torch.Tensor'>
Features.2.0.Block.3.1.Running_Mean    <class 'torch.Tensor'>
Features

# Difference Between Adaptive Average Pooling and Average Pooling
Average Pooling and Adaptive Average Pooling are both operations used in convolutional neural networks (CNNs) for downsampling feature maps. However, they have key differences in their behavior and use cases.

Average Pooling
Average Pooling reduces the spatial dimensions (height and width) of the input by dividing the input into non-overlapping rectangular regions and computing the average of each region.

Characteristics:
Kernel Size: You specify the size of the pooling window (kernel size).
Stride: You typically specify the stride, which determines the step size for sliding the pooling window.
Output Size: The output size depends on the input size, kernel size, and stride. The output size is not directly specified but is a result of the pooling operation.
Example:
For example, if you have a 4x4 input and you apply 2x2 average pooling with a stride of 2, you will get a 2x2 output where each value is the average of the corresponding 2x2 block in the input.

In [9]:
# Average Pooling vs. Adaptive Average Pooling

input_tensor = torch.tensor([[[[1, 2, 3, 4],
                               [5, 6, 7, 8],
                               [9, 10, 11, 12],
                               [13, 14, 15, 16]]]], dtype=torch.float32)

avg_pool = nn.AvgPool2d(kernel_size=2, stride=1)
output = avg_pool(input_tensor)
print(output)

tensor([[[[ 3.5000,  4.5000,  5.5000],
          [ 7.5000,  8.5000,  9.5000],
          [11.5000, 12.5000, 13.5000]]]])


Adaptive Average Pooling
Adaptive Average Pooling adjusts the pooling operation to ensure that the output has a specific size, regardless of the input size. This is useful when you want to have a fixed-size output, which is common before feeding the data into fully connected layers.

Characteristics:
Output Size: You specify the desired output size, and the pooling operation adjusts the kernel size and stride dynamically to ensure the output has this size.
Input Size: The input size can vary, but the output size is fixed as specified.
Example:
If you have a 4x4 input and you want a 2x2 output, adaptive average pooling will adjust the kernel size and stride to achieve this.

In [10]:
adaptive_avg_pool = nn.AdaptiveAvgPool2d((2, 2))
output = adaptive_avg_pool(input_tensor)
print(output)

tensor([[[[ 3.5000,  5.5000],
          [11.5000, 13.5000]]]])
