In [1]:
import torch
import torchvision
from torchvision import datasets, transforms, models

In [2]:
# Load the pre-trained model
alexnet = models.alexnet(weights=models.AlexNet_Weights.IMAGENET1K_V1) # Same as pretrained=True
print(alexnet)

Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to C:\Users\4311779/.cache\torch\hub\checkpoints\alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [06:34<00:00, 619kB/s]  


AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [3]:
# Specify image transformations
transform = transforms.Compose([ # Defining the variable
    transforms.Resize(256), # Resize the image to 256×256 pixels
    transforms.CenterCrop(224), #Crop the image to 224×224 pixels about the center
    transforms.ToTensor(), # Convert the image to PyTorch Tensor data type
    transforms.Normalize( # Normalize the image by setting its mean and standard deviation to the specified values
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225])
])

In [None]:
# Load the input image and pre-process it
from PIL import Image
cat = Image.open("val_108.JPEG")

In [None]:
cat_t = transform(cat)
catbatch_t = torch.unsqueeze(cat_t, 0) # Change to batch format [from (channels, height, width) to (batch_size, channels, height, width)]

In [None]:
# Model Inference
alexnet.eval()
out = alexnet(catbatch_t)
print(out.shape)

torch.Size([1, 1000])


In [8]:
with open('imagenet_classes.txt') as f:
    classes = [line.strip() for line in f.readlines()]
    
print(classes[0])

0, tench


In [9]:
_, indices = torch.sort(out, descending=True)  
percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100  
[(classes[idx], f"{percentage[idx].item():.2f}%") for idx in indices[0][:5]]

[('282, tiger_cat', '23.91%'),
 ('285, Egyptian_cat', '20.19%'),
 ('287, lynx', '19.79%'),
 ('281, tabby', '10.94%'),
 ('283, Persian_cat', '5.60%')]

# Resnet

In [10]:
resnet = models.resnet101(pretrained=True)
print(resnet)

Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to C:\Users\4311779/.cache\torch\hub\checkpoints\resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:22<00:00, 7.79MB/s] 

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 




In [None]:
resnet.eval()
out = resnet(catbatch_t)
print(out.shape)

torch.Size([1, 1000])


In [12]:
_, indices = torch.sort(out, descending=True)  
percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100  
[(classes[idx], f"{percentage[idx].item():.2f}%") for idx in indices[0][:5]]

[('285, Egyptian_cat', '90.60%'),
 ('281, tabby', '7.50%'),
 ('282, tiger_cat', '1.73%'),
 ('287, lynx', '0.04%'),
 ('280, grey_fox', '0.03%')]

# Trying both on different images

## Butterfly

In [13]:
butterfly = Image.open("val_42.JPEG")
butterfly_t = transform(butterfly)

butterflybatch_t = torch.unsqueeze(butterfly_t, 0)

In [14]:
resnet.eval()
out = resnet(butterflybatch_t)
print(out.shape)

torch.Size([1, 1000])


In [15]:
_, indices = torch.sort(out, descending=True)  
percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100  
[(classes[idx], f"{percentage[idx].item():.2f}%") for idx in indices[0][:5]]

[('323, monarch', '98.69%'),
 ('326, lycaenid', '0.92%'),
 ('325, sulphur_butterfly', '0.11%'),
 ('322, ringlet', '0.07%'),
 ('43, frilled_lizard', '0.02%')]

In [20]:
alexnet.eval()

out = alexnet(butterflybatch_t)

In [21]:
_, indices = torch.sort(out, descending=True)  
percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100  
[(classes[idx], f"{percentage[idx].item():.2f}%") for idx in indices[0][:5]]

[('323, monarch', '69.95%'),
 ('325, sulphur_butterfly', '3.67%'),
 ('25, European_fire_salamander', '2.79%'),
 ('699, panpipe', '2.54%'),
 ('846, table_lamp', '1.54%')]

## Ladies

In [16]:
ladies = Image.open("val_20.JPEG")
ladies_t = transform(ladies)

ladiesbatch_t = torch.unsqueeze(ladies_t, 0)

In [17]:
resnet.eval()
out = resnet(ladiesbatch_t)

In [18]:
_, indices = torch.sort(out, descending=True)  
percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100  
[(classes[idx], f"{percentage[idx].item():.2f}%") for idx in indices[0][:5]]

[('678, neck_brace', '81.06%'),
 ('917, comic_book', '8.32%'),
 ('639, maillot', '1.97%'),
 ('921, book_jacket', '1.27%'),
 ('689, overskirt', '0.92%')]

In [25]:
alexnet.eval()

out = alexnet(ladiesbatch_t)

In [26]:
_, indices = torch.sort(out, descending=True)  
percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100  
[(classes[idx], f"{percentage[idx].item():.2f}%") for idx in indices[0][:5]]

[('906, Windsor_tie', '23.84%'),
 ('683, oboe', '13.74%'),
 ('577, gong', '11.87%'),
 ('678, neck_brace', '4.31%'),
 ('699, panpipe', '4.10%')]