In [10]:
import torch
import torch.nn as nn
from torchvision.models import vgg16, VGG16_Weights
import torchvision.transforms as transforms

from PIL import Image
import numpy as np

In [2]:
model = vgg16(weights=VGG16_Weights.IMAGENET1K_V1)

In [3]:
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self, x):
        return x

In [4]:
model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [5]:
#remove avgpool and classifier
model.avgpool = Identity()
model.classifier = Identity()

In [6]:
model.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [7]:
#  224x224
data = Image.open('152018.jpg')
data = transforms.Compose([transforms.Resize((224, 224), interpolation=transforms.InterpolationMode.NEAREST), transforms.ToTensor()])(data)
data

tensor([[[0.8745, 0.6588, 0.5882,  ..., 0.9882, 0.9882, 0.9882],
         [0.8667, 0.6392, 0.6039,  ..., 0.9882, 0.9882, 0.9882],
         [0.8588, 0.6196, 0.6235,  ..., 0.9882, 0.9882, 0.9882],
         ...,
         [0.2627, 0.2118, 0.8039,  ..., 0.5882, 0.4667, 0.2000],
         [0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.2000, 0.2000],
         [0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.2000, 0.2000]],

        [[0.7804, 0.5725, 0.5216,  ..., 0.9882, 0.9882, 0.9882],
         [0.7725, 0.5529, 0.5373,  ..., 0.9882, 0.9882, 0.9882],
         [0.7647, 0.5294, 0.5569,  ..., 0.9882, 0.9882, 0.9882],
         ...,
         [0.2549, 0.2039, 0.7961,  ..., 0.5804, 0.4588, 0.1922],
         [0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.2000, 0.2000],
         [0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.2000, 0.2000]],

        [[0.4902, 0.3490, 0.4039,  ..., 0.9804, 0.9804, 0.9804],
         [0.4902, 0.3294, 0.4196,  ..., 0.9804, 0.9804, 0.9804],
         [0.4902, 0.3255, 0.4471,  ..., 0.9804, 0.9804, 0.

In [8]:
output = model(data)
output.shape

torch.Size([512, 49])

In [11]:
torch.flatten(output)

tensor([0., 0., 0.,  ..., 0., 0., 0.], grad_fn=<ReshapeAliasBackward0>)