In [1]:
import json

import numpy as np
import torch
from torchvision.io import read_image
from torchvision import transforms, datasets
import torchvision.transforms.functional as TF

from torchvision.models import (
    vgg16_bn,
    googlenet,
    resnet50,
)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
with open("imagenet_labels.json", "r") as read_file:
    imagenet_labels = json.load(read_file)

In [4]:
data_transform = transforms.Compose([
    transforms.Resize(size=(256, 256)),
    transforms.CenterCrop(size=(224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

In [5]:
imagenet_part_dataset = datasets.ImageFolder(root='data', transform=data_transform)
imagenet_dataloader = torch.utils.data.DataLoader(imagenet_part_dataset, batch_size=20, shuffle=False)

In [6]:
class_to_idx = imagenet_part_dataset.class_to_idx
idx_to_class = inv_map = {v: k for k, v in class_to_idx.items()}

In [7]:
y_true = imagenet_part_dataset.targets
y_true_classes = [idx_to_class[i] for i in y_true]

In [8]:
def evaluate(model, dataloader, k=5):
    with torch.no_grad():
        predictions = []
        for x, y in dataloader:
            x = x.to(device)
            y = y.to(device)
            pred = model(x).softmax(1)
            class_id = torch.topk(pred, k).indices.cpu().detach().numpy().tolist()
            predictions.extend(class_id)
    return predictions

In [9]:
def calculate_accuracy(y_true, y_pred):
    top1 = topk = 0
    for true_class, topk_class in zip(y_true, y_pred):
        if true_class in topk_class[0]:
            top1 += 1
        class_in_topk = [pred_class for pred_class in topk_class if true_class.lower() in pred_class.lower()]
        if len(class_in_topk) > 0:
            topk += 1
    return top1 / len(y_pred), topk / len(y_pred)

## VGG16

In [10]:
model = vgg16_bn(pretrained=True)
model.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256

In [11]:
if device.type == "cuda":
    t = torch.cuda.get_device_properties(0).total_memory
    r = torch.cuda.memory_reserved(0)
    a = torch.cuda.memory_allocated(0)
    print("Total memory:", round(t / (1024 ** 3), 2))
    print("Reserved memory:", round(r / (1024 ** 3), 2))
    print("Allocated memory:", round(a / (1024 ** 3), 2))

Total memory: 4.0
Reserved memory: 0.52
Allocated memory: 0.52


In [12]:
%time predictions = evaluate(model, imagenet_dataloader)
predictions_classes = [[imagenet_labels[str(i)] for i in sub_list] for sub_list in predictions]

CPU times: total: 13 s
Wall time: 5.78 s


In [13]:
calculate_accuracy(y_true_classes, predictions_classes)

(0.8163265306122449, 0.9591836734693877)

In [14]:
torch.cuda.reset_max_memory_allocated()
torch.cuda.empty_cache()



## GoogLeNet

In [15]:
model = googlenet(pretrained=True)
model.to(device)

GoogLeNet(
  (conv1): BasicConv2d(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (conv2): BasicConv2d(
    (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv3): BasicConv2d(
    (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (inception3a): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track

In [16]:
if device.type == "cuda":
    t = torch.cuda.get_device_properties(0).total_memory
    r = torch.cuda.memory_reserved(0)
    a = torch.cuda.memory_allocated(0)
    print("Total memory:", round(t / (1024 ** 3), 2))
    print("Reserved memory:", round(r / (1024 ** 3), 2))
    print("Allocated memory:", round(a / (1024 ** 3), 2))

Total memory: 4.0
Reserved memory: 0.55
Allocated memory: 0.54


In [17]:
%time predictions = evaluate(model, imagenet_dataloader)
predictions_classes = [[imagenet_labels[str(i)] for i in sub_list] for sub_list in predictions]

CPU times: total: 6.31 s
Wall time: 1.06 s


In [18]:
calculate_accuracy(y_true_classes, predictions_classes)

(0.673469387755102, 0.8571428571428571)

In [19]:
torch.cuda.reset_max_memory_allocated()
torch.cuda.empty_cache()

## ResNet50

In [20]:
model = resnet50(pretrained=True)
model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [21]:
if device.type == "cuda":
    t = torch.cuda.get_device_properties(0).total_memory
    r = torch.cuda.memory_reserved(0)
    a = torch.cuda.memory_allocated(0)
    print("Total memory:", round(t / (1024 ** 3), 2))
    print("Reserved memory:", round(r / (1024 ** 3), 2))
    print("Allocated memory:", round(a / (1024 ** 3), 2))

Total memory: 4.0
Reserved memory: 0.65
Allocated memory: 0.64


In [22]:
%time predictions = evaluate(model, imagenet_dataloader)
predictions_classes = [[imagenet_labels[str(i)] for i in sub_list] for sub_list in predictions]

CPU times: total: 7.23 s
Wall time: 1.22 s


In [23]:
calculate_accuracy(y_true_classes, predictions_classes)

(0.6530612244897959, 0.8775510204081632)

In [24]:
torch.cuda.reset_max_memory_allocated()
torch.cuda.empty_cache()