In [None]:
import torch
import torch.nn as nn
import numpy as np
import torchvision
from google.colab.patches import cv2_imshow
from torchsummary import summary

In [None]:
class ImageFeatureExtractor(nn.Module):
    """
    Object feature extractor
    """
    def __init__(self, submodule, layer, device):
        """
        input the object detector module and the layer
        number on which we want to extract features
        """
        super(ImageFeatureExtractor, self).__init__()
        self.pretrain_model = submodule
        self.layer = layer
        self.layer_list = list(self.pretrain_model._modules['backbone']._modules['body']._modules.keys())
        print(list(self.pretrain_model._modules['backbone']._modules['body']._modules))
        print(self.layer_list)
        output_layer = self.layer_list[self.layer]
        # just change the number of the layer to get the output
        self.children_list = []
        for (name, comp_layer) in self.pretrain_model._modules['backbone']._modules['body'].named_children():
            self.children_list.append(comp_layer)
            if name == output_layer:
                break
        self.feature_extrac_net = nn.Sequential(*self.children_list).to(device)
        # print(self.feature_extrac_net)
        self.pretrain_model = None
        print(output_layer)
        # print(self.children_list)
    def forward(self, image):
        feature = self.feature_extrac_net(image)
        # print(feature)
        return feature


In [None]:
def extract_image_deep_feature(image, image_annotation, deep_net='resnet50_fpn', layer_no=7):

    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    image = np.swapaxes(image, 0, 2)
    image = np.swapaxes(image, 1, 2)
    image = np.expand_dims(image, 0)

    image_tensor = torch.from_numpy(image).to(device)
    # print(image_tensor)

    if deep_net == 'resnet50_fpn':

        model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    model.to(device)
    # print(model)
    model.eval()
    # print(summary(model, input_size=(3, 224, 224)))
    feature_net = ImageFeatureExtractor(model, layer_no, device)
    # print(feature_net)
    print(summary(feature_net, input_size=(3, 224, 224)))

    image_feature = feature_net(image_tensor)
    # print(image_feature)
    return image_feature

In [None]:
import cv2
img2 = cv2.imread('flowers_resized.jpg')
height, width, channels = img2.shape
print(height, width, channels)
# cv2_imshow(img2)
# print(img2)
img2 = img2.astype('float32')
# print(img2)
# print(type(img2))
im2_feature = extract_image_deep_feature(img2, None)

# print(img2)
# print(img2.size())
# print(im2_feature.size())

224 224 3
['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3', 'layer4']
['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3', 'layer4']
layer4
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
 FrozenBatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,096
 FrozenBatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
 FrozenBatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          16,38

In [None]:
# im2_feature.nelement() == 0
# torch.count_nonzero(im2_feature,dim=None)
im2_feature.size()

torch.Size([1, 2048, 7, 7])

In [None]:
def roi_align(feature_map, boxes):
    dic = OrderedDict()
    dic["0"] = feature_map
    pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names= ["0"], output_size=(1, 1), sampling_ratio=1)
    image_size = 224
    image_sizes = [(224,  224)]
    output = pooler(dic, [boxes], image_sizes)
    return output

In [None]:
from collections import OrderedDict

In [None]:
boxes = torch.rand(6, 4) * 112; boxes[:, 2:] += boxes[:, :2]
output = roi_align(im2_feature, boxes)
print(boxes)

tensor([[ 55.6013,  30.7511,  66.4535,  54.8166],
        [ 13.1412,  21.1730,  84.7416,  46.5829],
        [ 63.0367,  23.1422, 164.8728,  55.3115],
        [ 58.1424, 102.5370, 148.1151, 118.3709],
        [ 35.5754, 100.5836, 133.4499, 111.4397],
        [ 56.2560,  79.0254, 128.7704,  82.2473]])


In [None]:
# output.nelement() == 0
print(output)

tensor([[[[0.]],

         [[0.]],

         [[0.]],

         ...,

         [[0.]],

         [[0.]],

         [[0.]]],


        [[[0.]],

         [[0.]],

         [[0.]],

         ...,

         [[0.]],

         [[0.]],

         [[0.]]],


        [[[0.]],

         [[0.]],

         [[0.]],

         ...,

         [[0.]],

         [[0.]],

         [[0.]]],


        [[[0.]],

         [[0.]],

         [[0.]],

         ...,

         [[0.]],

         [[0.]],

         [[0.]]],


        [[[0.]],

         [[0.]],

         [[0.]],

         ...,

         [[0.]],

         [[0.]],

         [[0.]]],


        [[[0.]],

         [[0.]],

         [[0.]],

         ...,

         [[0.]],

         [[0.]],

         [[0.]]]], grad_fn=<ROIAlignFunction>>)
