In [None]:
# Download a sample image to test the inference
!wget -O cat.jpg https://upload.wikimedia.org/wikipedia/commons/3/3a/Cat03.jpg
# DOwnload imagenet labels json
!wget - O imagenet_labels.json https: // raw.githubusercontent.com/anishathalye/imagenet-simple-labels/master/imagenet-simple-labels.json

In [1]:
# Load the labels
import json
with open('imagenet_labels.json') as f:
    labels = json.load(f)

In [2]:
import torch
from torchvision import models
import time

In [3]:
# Getting the model

model = models.inception_v3(weights=True)



In [4]:
# Load the image, resize it and convert it to a tensor and display it
from PIL import Image
from torchvision import transforms

img = Image.open('cat.jpg')
img = transforms.Resize((299, 299))(img)
img = transforms.ToTensor()(img)


## CPU

In [5]:
model.eval()

Inception3(
  (Conv2d_1a_3x3): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2a_3x3): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2b_3x3): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Conv2d_3b_1x1): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_4a_3x3): BasicConv2d(
    (conv): Conv2d(80, 192, kernel_size=(3, 3), stri

In [7]:
img.size()

torch.Size([3, 299, 299])

In [8]:
model(img)

ValueError: expected 4D input (got 3D input)

In [10]:
img.unsqueeze(0).size()

torch.Size([1, 3, 299, 299])

In [9]:
model(img.unsqueeze(0))

tensor([[ 6.3392e-02,  1.3842e-01, -1.0551e+00, -4.2040e-01, -4.4763e-01,
         -5.2219e-01, -6.4090e-01, -6.6549e-01, -2.0210e-01, -6.8641e-02,
         -1.7541e+00, -3.8522e-01,  4.7412e-02, -1.7103e+00, -9.3095e-01,
          5.9680e-01, -6.6260e-01, -7.3043e-01, -6.2631e-01, -2.1400e+00,
         -1.1112e+00,  2.3347e-01,  6.5075e-02, -6.8009e-01, -4.0422e-01,
          1.1774e-01, -1.4651e-01, -4.6028e-01, -1.0499e+00, -4.7615e-01,
          3.9497e-01,  2.4963e-01, -7.6607e-01,  1.2975e-01, -6.2330e-01,
         -3.8985e-02,  1.9995e-01, -1.3208e+00, -6.4955e-01, -9.7720e-01,
         -8.9921e-01, -1.2975e+00, -2.1008e-01, -8.0320e-01, -5.3831e-01,
         -7.4888e-01, -7.1160e-01, -9.6493e-01, -1.3782e+00, -1.6308e+00,
         -1.1421e+00, -6.3365e-01, -1.9313e-01, -6.5053e-01, -8.9459e-01,
         -9.0946e-01, -2.2303e-01, -9.4507e-01,  1.6818e-01, -5.0270e-02,
         -5.7709e-01, -5.1203e-01, -1.1104e+00, -1.3300e+00, -6.4939e-01,
         -5.5449e-01, -5.1022e-01, -6.

In [5]:
# Running Inference once

start = time.time()
out = model(img.unsqueeze(0))
end = time.time()
print('Inference time: ', end - start)

Inference time:  0.12563204765319824


In [6]:
# Running inference 100 times
start = time.time()
for _ in range(100):
    output = model(img.unsqueeze(0))
end = time.time()
print(f'Time: {end - start} seconds')

Time: 8.394792556762695 seconds


## GPU

In [8]:
# Same in GPU
model = model.cuda()
img = img.cuda()
model.eval()

Inception3(
  (Conv2d_1a_3x3): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2a_3x3): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2b_3x3): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Conv2d_3b_1x1): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_4a_3x3): BasicConv2d(
    (conv): Conv2d(80, 192, kernel_size=(3, 3), stri

In [10]:
# Single image inference
start = time.time()
out = model(img.unsqueeze(0))
end = time.time()
print('Inference time: ', end - start)

Inference time:  0.023438692092895508


In [11]:

start = time.time()
for _ in range(100):
    output = model(img.unsqueeze(0))
end = time.time()
print(f'Time: {end - start} seconds')


Time: 1.2342636585235596 seconds


## Inference Results

In [13]:
# Getting the top 5 predictions
_, indices = torch.sort(out, descending=True)
[(labels[idx], out[0, idx].item()) for idx in indices[0][:5]]


[('Egyptian Mau', 14.871809959411621),
 ('tiger cat', 14.212199211120605),
 ('tabby cat', 13.434951782226562),
 ('lynx', 12.389911651611328),
 ('carton', 8.164073944091797)]

## Conclusion

Its a known fact that GPUs make AI inference faster. In this case, it gave about 3 - 4x speedup. But we can go more!

See you in the next notebook :)
