-
Notifications
You must be signed in to change notification settings - Fork 0
/
features_extraction.py
55 lines (44 loc) · 2.55 KB
/
features_extraction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#------------------------------------------------------------------#
# extract features from an image using the pretrained model vgg16 #
#------------------------------------------------------------------#
#--------------------------------------------------------------------#
# essential library: PIL.Image, torchvision #
# step by step: #
# - select device #
# - load an image (numpyndarray) and convert to tensor. #
# - permute [height, width, channel] to [channel, height, width]. #
# - add batch_size to dim=0. #
# - apply preprocessing of vgg16 pretrained model to our input. #
# - load weights and pretrained model #
#--------------------------------------------------------------------#
from PIL import Image
import torchvision
import torch
from core.image_utils import image_from_url
# check cuda if it's available
device = "cuda" if torch.cuda.is_available() else "cpu"
# select url and load an image from this url
url = "http://farm1.staticflickr.com/133/330657765_4c19d29015_z.jpg"
img = image_from_url(url=url)
# convert image numpydarray to tensor
img_tensor = torch.from_numpy(img)
# permute from [height, width, channel] to [channel, heighht, width]
img_tensor = img_tensor.permute(dims=[2, 0, 1]) # torch.Size([3, 473, 640])
# add batch_size
img_tensor = img_tensor.unsqueeze(dim=0) # torch.Size([1, 3, 473, 640])
img_tensor = img_tensor.to(device)
# load weights and pretrained model
weights_vgg16 = torchvision.models.VGG16_Weights.DEFAULT
model_vgg16 = torchvision.models.vgg16(weights=weights_vgg16)
# if cuda is available, move params of model to it
model_vgg16 = model_vgg16.to(device)
transforms_vgg16 = weights_vgg16.transforms()
img_tensor = transforms_vgg16(img_tensor)
# compute output
#---------------------------------------------------------------------------------------------------#
# the FC7 layer of VGG16 extracts a 4096-dimensional vector representation of the input image. #
# This vector representation contains high-level information about the image, such as the objects #
# that are present in the image, the relationships between the objects, and the overall scene. #
#---------------------------------------------------------------------------------------------------#
output = model_vgg16.classifier[0:6](model_vgg16.avgpool(model_vgg16.features(img_tensor)).reshape(1, -1)) # torch.Size([1, 4096])
print(output.shape)