In [1]:
import torchvision
model_quantized = torchvision.models.quantization.mobilenet_v2(pretrained=True, quantize=True)
model = torchvision.models.mobilenet_v2(pretrained=True)

Downloading: "https://download.pytorch.org/models/quantized/mobilenet_v2_qnnpack_37f702c5.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2_qnnpack_37f702c5.pth
100%|██████████| 3.42M/3.42M [00:00<00:00, 59.6MB/s]
  device=storage.device,
Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
100%|██████████| 13.6M/13.6M [00:00<00:00, 72.3MB/s]


**Compare model sizes**

In [2]:
import os
import torch

def print_model_size(mdl):
    torch.save(mdl.state_dict(), "tmp.pt")
    print("%.2f MB" %(os.path.getsize("tmp.pt")/1e6))
    os.remove('tmp.pt')

print_model_size(model)
print_model_size(model_quantized)

14.24 MB
3.62 MB


**Load dataset using the same transforms used for training.**

In [3]:
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from google.colab import drive
from torch.utils.data import Subset
drive.mount('/content/drive')

data_path = "/content/drive/My Drive/colab_files/imagenet/"
imagenet_val = datasets.ImageNet(
	root=data_path,
	split='val',
    transform=transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225])
	])
)

Mounted at /content/drive


**Create a dataset subset and create a dataloader**

In [4]:
from random import randint
#
# Create a subset of 300 images
#
maxSample = 300

#
#Randomly generate 300 indexes based on the whole dataset.
#
indexes = [randint(1, len(imagenet_val)) for _ in range(maxSample)]
dataset_subset = Subset(imagenet_val,indexes)

data_loader_subset = torch.utils.data.DataLoader(dataset_subset,
                                          batch_size=4,
                                          shuffle=True,
                                          num_workers=2)

**Evaluate function that will provde the assesment between a model and a dataset.**

In [5]:
import time
import numpy as np
def evaluate(model, data_loader, loss_history):
    model.eval()

    total_samples = len(data_loader.dataset)
    correct_samples = 0
    total_loss = 0
    times = []
    with torch.no_grad():
        for data, target in data_loader:
            start_time = time.time()
            output = torch.nn.functional.log_softmax(model(data), dim=1)
            end_time = time.time()
            #
            # Converts to milliseconds
            #
            times.append(1000*(end_time - start_time))
            loss = torch.nn.functional.nll_loss(output, target, reduction='sum')
            _, pred = torch.max(output, dim=1)

            total_loss += loss.item()
            correct_samples += pred.eq(target).sum()
    avg_inference = np.mean(times)
    std_dev_inference = np.std(times)
    min_inference = np.min(times)
    max_inference = np.max(times)

    avg_loss = total_loss / total_samples
    loss_history.append(avg_loss)
    print('\tAverage test loss: ' + '{:.4f}'.format(avg_loss) +
          '\tAccuracy:' + '{:5}'.format(correct_samples) + '/' +
          '{:5}'.format(total_samples) + ' (' +
          '{:4.2f}'.format(100.0 * correct_samples / total_samples) + '%)' +
          '\tAverage inference time: ' + '{:.4f}ms'.format(avg_inference) +'\n')


In [6]:
loss=[]
print("Quantized Model Metrics:\n")
evaluate(model_quantized,data_loader_subset,loss)
loss=[]
print("Non-Quantized Model Metrics:\n")
evaluate(model,data_loader_subset,loss)

Quantized Model Metrics:

	Average test loss: 1.0949	Accuracy:  223/  300 (74.33%)	Average inference time: 248.0277ms

Non-Quantized Model Metrics:

	Average test loss: 1.1311	Accuracy:  221/  300 (73.67%)	Average inference time: 312.4161ms

