In [4]:
import torch
from torchvision import models

class FeatureExtractor:
    def __init__(self):
        self.model = models.vgg19(weights='IMAGENET1K_V1')
        self.max_pooling_indices = [9, 18, 27, 36]
        self.model.eval()
        
    @torch.no_grad()
    def extract_feature_vector(self, x, stage=0, verbose=False):
        for i, feature in enumerate(self.model.features):
            if verbose:
                print(f'Passing through layer {type(feature)}')
            if i == self.max_pooling_indices[stage]:
                return x
            x = feature(x)

In [20]:
from PIL import Image
from torchvision import transforms
backbone_transforms = transforms.Compose([transforms.ToTensor(),
                                          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
image = Image.fromarray(np.zeros((255,255,3), dtype=np.uint8))
backbone_transforms(image)

tensor([[[-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
         [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
         [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
         ...,
         [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
         [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
         [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179]],

        [[-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
         [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
         [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
         ...,
         [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
         [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
         [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357]],

        [[-1.8044, -1.8044, -1.8044,  ..., -1.8044, -1.8044, -1.8044],
         [-1.8044, -1.8044, -1.8044,  ..., -1

In [None]:
backbone_transforms(image)

In [6]:
fe = FeatureExtractor()

In [11]:
fe.extract_feature_vector(image,stage=3, verbose=True).shape

Passing through layer <class 'torch.nn.modules.conv.Conv2d'>
Passing through layer <class 'torch.nn.modules.activation.ReLU'>
Passing through layer <class 'torch.nn.modules.conv.Conv2d'>
Passing through layer <class 'torch.nn.modules.activation.ReLU'>
Passing through layer <class 'torch.nn.modules.pooling.MaxPool2d'>
Passing through layer <class 'torch.nn.modules.conv.Conv2d'>
Passing through layer <class 'torch.nn.modules.activation.ReLU'>
Passing through layer <class 'torch.nn.modules.conv.Conv2d'>
Passing through layer <class 'torch.nn.modules.activation.ReLU'>
Passing through layer <class 'torch.nn.modules.pooling.MaxPool2d'>
Passing through layer <class 'torch.nn.modules.conv.Conv2d'>
Passing through layer <class 'torch.nn.modules.activation.ReLU'>
Passing through layer <class 'torch.nn.modules.conv.Conv2d'>
Passing through layer <class 'torch.nn.modules.activation.ReLU'>
Passing through layer <class 'torch.nn.modules.conv.Conv2d'>
Passing through layer <class 'torch.nn.modules.ac

torch.Size([1, 512, 15, 15])

In [3]:
models.vgg19(weights='IMAGENET1K_V1')

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [14]:
fvs = []
labels = []
for image in range(10):
    image = torch.zeros((1, 3, 255, 255)) ## IMAGEN
    fv = fe.extract_feature_vector(image,stage=3, verbose=False).flatten().numpy()
    fvs += [fv]
    labels += [label]

In [16]:
import numpy as np
np.array(fvs).shape

(10, 115200)