## A pretrained network that recognizes the subject of an image

In [15]:
from torchvision import models
import torch

In [3]:
len(dir(models))

72

In [5]:
alexnet = models.AlexNet() # instance of the AlexNet class
# This is currentply untrained and will produce garbage even with correctly formatted input
# output = alexnet(input)
# lowercase names instantiate models with predefined layers and units and 
# optionally download and load pretrained weights into them

In [6]:
resnet = models.resnet101(pretrained=True)
# Downloads the weights trained on the ImageNet DataSet with 1.2 Million Images and 1000 categories
# Supports 44.5 Million parameters - difficult to optimize automatically

Downloading: "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth" to C:\Users\divya/.cache\torch\hub\checkpoints\resnet101-5d3b4d8f.pth


HBox(children=(IntProgress(value=0, max=178728960), HTML(value='')))




In [7]:
resnet

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [8]:
from torchvision import transforms
# Preprocess to make images of right size and make their colors to be in some predefined ranges
# transforms helps to define pipelines of basic preprocessing functions

preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean = [0.485, 0.456, 0.406],
        std = [0.229, 0.224, 0.225]
    )
])

In [11]:
from PIL import Image
img = Image.open(r"C:\Users\divya\PyTorchDiaries\data\p1ch2\bobby.jpg")

In [13]:
img.show()

In [14]:
img_t = preprocess(img)

In [16]:
batch_t = torch.unsqueeze(img_t, 0) # reshape, crop and normalize the input tensor the way the NN expects

In [17]:
resnet.eval()
"""The process of running a trained model on new data is called inference,
to do inference we put the network in eval mode"""

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [18]:
out = resnet(batch_t)

In [21]:
with open(r"C:\Users\divya\PyTorchDiaries\data\p1ch2\imagenet_classes.txt") as f:
    labels = [line.strip() for line in f.readlines()]

In [22]:
_, index = torch.max(out, 1)

In [24]:
percentage = torch.nn.functional.softmax(out, dim = 1)[0] * 100
labels[index[0]], percentage[index[0]].item()
# index here is not a python int, but a one-dimensional tensor, specifically tensor[207]

('golden retriever', 96.29334259033203)

In [26]:
# getting top outputs
_, indices = torch.sort(out, descending = True)
[(labels[idx], percentage[idx].item()) for idx in indices[0][:5]]

[('golden retriever', 96.29334259033203),
 ('Labrador retriever', 2.80812668800354),
 ('cocker spaniel, English cocker spaniel, cocker', 0.28267428278923035),
 ('redbone', 0.20863059163093567),
 ('tennis ball', 0.11621591448783875)]

In [27]:
# This won the image classification challenge in 2015

## Loading Models from Torch Hub

In [29]:
from torch import hub
resnet18_model = hub.load('pytorch/vision:master','resnet18',pretrained= True)
# Name and branch of github repo, name of entry point function, keyword arguements

Downloading: "https://github.com/pytorch/vision/archive/master.zip" to C:\Users\divya/.cache\torch\hub\master.zip
Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to C:\Users\divya/.cache\torch\hub\checkpoints\resnet18-5c106cde.pth


HBox(children=(IntProgress(value=0, max=46827520), HTML(value='')))


