Implement image classification model using Resnet in Pytorch.

In [None]:
# import required libraries
import torch
import torchvision
# import the pretrained model from pytorch
model = torchvision.models.resnext50_32x4d(pretrained=True)
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1

In [None]:
# download sample test image from the internet
import urllib
url, filename = ("https://images.boats.com/resize/wp/2/files/2021/03/Bowrider.jpg", "Boat.jpg")
try: urllib.URLopener().retrieve(url, filename)
except: urllib.request.urlretrieve(url, filename)

In [None]:
from PIL import Image
from torchvision import transforms
input_image = Image.open(filename)

preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model
# so now the shape if the image is good to be fed into the model

# move the input image and model to GPU for speed if available
if torch.cuda.is_available():
    input_batch = input_batch.to('cuda')
    model.to('cuda')

with torch.no_grad():
    output = model(input_batch)
# Tensor of shape 1000, with confidence scores over Imagenet's 1000 classes
# print(output[0])
# # The output has unnormalized scores. To get probabilities, you can run a softmax on it.
probabilities = torch.nn.functional.softmax(output[0], dim=0)
print(probabilities)

tensor([7.01959e-09, 1.08613e-08, 9.25720e-06, 2.20847e-06, 1.51166e-06, 2.43790e-07, 7.01983e-08, 9.87981e-09, 1.21196e-08, 2.54432e-09, 5.11534e-09, 8.62341e-09, 1.47552e-08, 1.05232e-08, 6.95999e-09, 4.89054e-09, 2.75892e-08, 5.06259e-09, 8.57004e-09, 3.73438e-09, 9.79035e-09, 6.23639e-09, 5.87251e-09, 8.30264e-09,
        2.54258e-10, 3.00239e-09, 1.04926e-08, 2.66959e-08, 2.99233e-09, 2.57263e-08, 3.48165e-09, 2.27732e-08, 3.93091e-08, 2.25424e-07, 1.33835e-07, 3.52924e-09, 3.73714e-08, 4.39840e-09, 3.17497e-08, 2.63790e-08, 3.94039e-08, 2.34748e-08, 4.57249e-09, 9.29903e-09, 1.70750e-08, 9.34031e-09, 1.77471e-08, 4.66809e-09,
        3.89809e-09, 1.14007e-08, 2.86369e-09, 8.02955e-10, 3.79929e-08, 2.56830e-09, 5.60664e-09, 2.26309e-08, 2.85500e-09, 3.78673e-09, 3.26409e-08, 5.47412e-08, 1.92263e-08, 2.53141e-09, 3.35889e-09, 4.24597e-09, 1.92358e-08, 1.15574e-07, 3.22670e-08, 1.43621e-09, 1.66023e-08, 3.95352e-09, 1.09779e-08, 8.32567e-09,
        3.23326e-09, 3.19880e-08, 3.5815

In [None]:
!wget https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt

--2021-12-15 03:50:14--  https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10472 (10K) [text/plain]
Saving to: ‘imagenet_classes.txt.1’


2021-12-15 03:50:15 (73.0 MB/s) - ‘imagenet_classes.txt.1’ saved [10472/10472]



In [None]:
# Read the categories
with open("imagenet_classes.txt", "r") as f:
    categories = [s.strip() for s in f.readlines()]
# Show top categories per image
top5_prob, top5_catid = torch.topk(probabilities, 5)
for i in range(top5_prob.size(0)):
    print(categories[top5_catid[i]], round(top5_prob[i].item() * 100, 2))

speedboat 99.35
lifeboat 0.47
fireboat 0.11
trimaran 0.02
catamaran 0.01
