In [5]:
import torch
torch.__version__
torch.cuda.is_available()

True

Import pre-trained models

In [6]:
from torchvision import models
# dir(models)

alexnet = models.AlexNet()  # architecture is specified here


In [7]:
resnet = models.resnet101(pretrained=True)  # lowercase: arch. + pretrained weights

In [8]:
print(resnet)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In `torch`, traditional network layers are called "[modules](https://pytorch.org/docs/stable/notes/modules.html)". They represent building-block operations.

In [9]:
from torchvision import transforms
# provides standard transformations on image data
preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
)])


In [10]:
from PIL import Image
img = Image.open("./data/cat.jpg")

In [11]:
# img

In [17]:
img_t = preprocess(img)
batch_t = torch.unsqueeze(img_t, 0)

In [19]:
resnet.eval()  # set module in evaluation mode
out = resnet(batch_t)
out

tensor([[-6.6371e-01, -1.2233e+00, -3.0677e+00, -2.4489e+00, -2.8887e+00,
          2.9366e-01, -3.6781e+00, -3.4950e+00, -1.3948e+00, -3.3166e+00,
         -2.2427e+00, -3.2068e+00, -2.7910e+00, -4.3084e+00, -3.1309e+00,
         -2.4273e+00, -2.1430e+00, -1.1768e+00, -3.1448e+00, -4.8561e+00,
         -3.8926e+00, -2.7131e+00, -4.3019e+00, -2.8315e+00, -2.8083e+00,
         -1.0943e+00, -1.9731e+00, -2.0351e+00, -2.8238e+00, -1.2295e+00,
         -2.6373e+00, -2.7480e+00, -1.9592e+00, -2.4702e+00, -1.9835e+00,
         -2.8651e+00, -1.1766e+00, -1.3538e+00, -5.0326e-01, -3.3656e-01,
         -2.7935e+00, -1.9881e-01, -7.4370e-01,  8.8353e-02, -1.4657e+00,
         -1.6965e+00, -7.3915e-01, -3.2425e-01, -2.7127e+00, -3.5619e+00,
         -2.0607e+00,  1.8579e+00, -1.7963e+00, -2.9054e+00, -2.1705e+00,
         -3.6613e+00, -3.9566e+00, -3.4509e+00, -3.0760e+00, -5.5103e-01,
         -1.7026e+00, -3.0858e+00, -1.2576e+00, -5.5635e-01, -1.8138e+00,
         -2.1655e+00,  1.2065e+00, -1.

In [20]:
with open('./data/imagenet_classes.txt', 'r') as fin:
    labels = [line.strip() for line in fin.readlines()]

In [26]:
_, index = torch.max(out, 1)
percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100
labels[index[0]], percentage[index[0]].item()

('Egyptian cat', 61.92461395263672)

In [27]:
_, indices = torch.sort(out, descending=True)

In [38]:
[ (labels[index], percentage[index].item())
    for index in indices[0][:10]
]

[('Egyptian cat', 61.92461395263672),
 ('tabby, tabby cat', 21.199674606323242),
 ('tiger cat', 11.12000846862793),
 ('Siamese cat, Siamese', 0.6991593837738037),
 ('lynx, catamount', 0.3436599671840668),
 ('paper towel', 0.3314582407474518),
 ('space heater', 0.20457488298416138),
 ('mouse, computer mouse', 0.17155268788337708),
 ('carton', 0.16546174883842468),
 ('German shepherd, German shepherd dog, German police dog, alsatian',
  0.1558435559272766)]