In [2]:
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transform


In [3]:
class EncoderCNN(nn.Module):
    """
    takes in the size of the embeded_vector to fed to rnn.
    this is not used for training but just get the feature vector of size embed size
    """
    def __init__(self, embed_size):
        super(EncoderCNN, self).__init__()
        resnet = models.resnet50(pretrained=True) #Load the model with all the pretrained weights
        for param in resnet.parameters():
            param.requires_grad_(False)
            #By setting requires_grad=False, you are telling PyTorch not to compute gradients for this tensor during backpropagation.
    
    # get all the layers except last as we are not intereseted in classification
        modules = list(resnet.children())[:-1] # last layer Linear(in_features=2048, out_features=1000, bias=True)
        self.resnet = nn.Sequential(*modules)#unpackign the layers
        self.embed = nn.Linear(resnet.fc.in_features, embed_size)
    def forward(self, images):
        features = self.resnet(images)
        features = features.view(features.size(0), -1) # flatten the layer 
        features = self.embed(features)
        return features

In [4]:
resnet = models.resnet50(pretrained=True)



In [5]:
children = list(resnet.children())
len(children)

10

In [19]:
children[0]

Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

In [6]:
children

[Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False),
 BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
 ReLU(inplace=True),
 MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False),
 Sequential(
   (0): Bottleneck(
     (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
     (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
     (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
     (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (relu): ReLU(inplace=True)
     (downsample): Sequential(
       (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
       (1): BatchNorm2d(256, eps=1e-05, momentum