In [1]:
# https://pyimagesearch.com/2021/07/26/pytorch-image-classification-with-pre-trained-networks/

In [47]:
# import the necessary packages
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torchvision
from torchvision import models

print("Torch:", torch.__version__)
print("Torchvision:", torchvision.__version__)
print("GPU:", torch.cuda.device_count(), torch.cuda.get_device_name(0)) if torch.cuda.is_available() else print("NO GPU")

Torch: 1.8.2+cu102
Torchvision: 0.9.2+cu102
GPU: 1 Quadro RTX 3000 with Max-Q Design


In [2]:
# specify image dimension
IMAGE_SIZE = 224

# specify ImageNet mean and standard deviation
MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]

# determine the device we will be using for inference
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [10]:
def preprocess_image(image):
    # swap the color channels from BGR to RGB, resize it, 
    # and scale the pixel values to [0, 1] range
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (IMAGE_SIZE, IMAGE_SIZE))
    image = image.astype("float32") / 255.0
    
    # subtract ImageNet mean, divide by ImageNet standard deviation, 
    # set "channels first" ordering, and add a batch dimension
    image -= MEAN
    image /= STD
    image = np.transpose(image, (2, 0, 1))
    image = np.expand_dims(image, 0)
    
    # return the preprocessed image
    return image

In [5]:
# define a dictionary that maps model names to their classes inside torchvision
MODELS = {
    "vgg16": models.vgg16(pretrained=True),
    "vgg19": models.vgg19(pretrained=True),
    "inception": models.inception_v3(pretrained=True),
    "densenet": models.densenet121(pretrained=True),
    "resnet": models.resnet50(pretrained=True)
}

# load our the network weights from disk, flash it to the current device, and set it to evaluation mode
print("[INFO] loading model...")
model = MODELS["vgg16"].to(DEVICE)
model.eval()

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to C:\Users\zhanghe/.cache\torch\hub\checkpoints\vgg16-397923af.pth


  0%|          | 0.00/528M [00:00<?, ?B/s]

Downloading: "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth" to C:\Users\zhanghe/.cache\torch\hub\checkpoints\vgg19-dcbb9e9d.pth


  0%|          | 0.00/548M [00:00<?, ?B/s]

Downloading: "https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth" to C:\Users\zhanghe/.cache\torch\hub\checkpoints\inception_v3_google-1a9a5a14.pth


  0%|          | 0.00/104M [00:00<?, ?B/s]

Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to C:\Users\zhanghe/.cache\torch\hub\checkpoints\densenet121-a639ec97.pth


  0%|          | 0.00/30.8M [00:00<?, ?B/s]

Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to C:\Users\zhanghe/.cache\torch\hub\checkpoints\resnet50-19c8e357.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

[INFO] loading model...


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [18]:
# load the image from disk, clone it (so we can draw on it later), and preprocess it
print("[INFO] loading image...")
image = cv2.imread("images/n04548280_wall_clock.jpg")
orig = image.copy()
image = preprocess_image(image)
print(image.shape)

[INFO] loading image...
(1, 3, 224, 224)


In [20]:
# convert the preprocessed image to a torch tensor and flash it to the current device
image = torch.from_numpy(image)
image = image.to(DEVICE)
image.shape

torch.Size([1, 3, 224, 224])

In [22]:
# classify the image and extract the predictions
print("[INFO] classifying image...")
logits = model(image)
logits.shape

[INFO] classifying image...


torch.Size([1, 1000])

In [56]:
# covnert to probabilities and rank
probabilities = torch.nn.Softmax(dim=-1)(logits)

probs = probabilities.cpu().detach().numpy()[0]
pd.Series(probs).sort_values(ascending=False)[:5]

799    0.324463
892    0.150873
553    0.072144
760    0.050447
894    0.044143
dtype: float32

In [58]:
# check index to class name matching list: 
# https://deeplearning.cms.waikato.ac.nz/user-guide/class-maps/IMAGENET/
# 799 = "sliding door"
# 892 = "wall clock"
# 553 = "file, file cabinet, filing cabinet"
# 760 = "refrigerator, icebox"
# 894 = "wardrobe, closet, press"