In [2]:
import torch
from torchvision import models, transforms
from PIL import Image

In [None]:
# choosing a existing network for transfer learning: 
resnet = models.resnet101(pretrained=True)
resnet

In [4]:
# formatting the input
preprocess = transforms.Compose([
    transforms.Resize(256), 
    transforms.CenterCrop(224), 
    transforms.ToTensor(), 
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225]
    )
])

In [27]:
# choosing an image: 
img = Image.open("dog.jpg")
img.show()

In [28]:
# pass the image through the preprocessing function: 
img_t = preprocess(img)
batch_t = torch.unsqueeze(img_t, 0) # adding one dimension -> batch

In [29]:
# run the model:
resnet.eval() # to do inference (running a trained model on new data) the model must be in eval mode!
output = resnet(batch_t)

In [None]:
# get the labels for classification: 
with open("imagenet_classes.txt") as f: 
    labels = [line.strip() for line in f.readlines()]
labels

In [30]:
# get index of biggest value and percentage: 
value, index = torch.max(output, 1) # returns the max value and index in dim=1
percentage = torch.nn.functional.softmax(output, dim=1)[0] * 100

labels[index[0]], percentage[index[0]].item()

('dogsled, dog sled, dog sleigh', 40.75822830200195)

In [31]:
# sort the values and print the most promising results: 
_, indices = torch.sort(output, descending=True)
[(labels[idx], percentage[idx].item()) for idx in indices[0][:5]]

[('dogsled, dog sled, dog sleigh', 40.75822830200195),
 ('Eskimo dog, husky', 32.627079010009766),
 ('Siberian husky', 24.379453659057617),
 ('malamute, malemute, Alaskan malamute', 1.090492844581604),
 ('Samoyed, Samoyede', 0.20688243210315704)]