In [2]:
import torch
import copy
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from src.vgg import *
from src.util import *

def quantize_model(model):
    quantize_model = copy.deepcopy(model)
    quantize_model.qconfig = torch.quantization.get_default_qconfig()
    quantize_model = torch.quantization.QuantWrapper(quantize_model)
    torch.quantization.prepare(quantize_model, inplace=True)
    torch.quantization.convert(quantize_model, inplace=True)
    return quantize_model

model_path = "./models"
vgg11 = VGG11(); vgg11.load_state_dict(torch.load(f'{model_path}/VGG11.pth')); vgg11.eval()
vgg13 = VGG13(); vgg13.load_state_dict(torch.load(f'{model_path}/VGG13.pth')); vgg13.eval()
vgg16 = VGG16(); vgg16.load_state_dict(torch.load(f'{model_path}/VGG16.pth')); vgg16.eval()
vgg19 = VGG19(); vgg19.load_state_dict(torch.load(f'{model_path}/VGG19.pth')); vgg19.eval()

q_vgg11 = quantize_model(vgg11); q_vgg11.load_state_dict(torch.load(f'{model_path}/quantized_VGG11.pth'))
q_vgg13 = quantize_model(vgg13); q_vgg13.load_state_dict(torch.load(f'{model_path}/quantized_VGG13.pth'))
q_vgg16 = quantize_model(vgg16); q_vgg16.load_state_dict(torch.load(f'{model_path}/quantized_VGG16.pth'))
q_vgg19 = quantize_model(vgg19); q_vgg19.load_state_dict(torch.load(f'{model_path}/quantized_VGG19.pth'))

models = {
    'vgg11': vgg11,
    'vgg13': vgg13,
    'vgg16': vgg16,
    'vgg19': vgg19,
    'q_vgg11': q_vgg11,
    'q_vgg13': q_vgg13,
    'q_vgg16': q_vgg16,
    'q_vgg19': q_vgg19
}

  device=storage.device,


In [3]:
# measure inference latency

for model_name, model in models.items():
    elapsed_time = measure_inference_latency(model, 'cpu')
    print(f'{model_name} elapsed time:\t {elapsed_time:4f}')

vgg11 elapsed time:	 0.001895
vgg13 elapsed time:	 0.002358
vgg16 elapsed time:	 0.003296
vgg19 elapsed time:	 0.004157
q_vgg11 elapsed time:	 0.000844
q_vgg13 elapsed time:	 0.001038
q_vgg16 elapsed time:	 0.001449
q_vgg19 elapsed time:	 0.002144


In [4]:
# measure model size
scale = 1024**2
for model_name, model in models.items():
    model_size = measure_model_size(model)
    print(f'{model_name} model size:\t {model_size/scale:.4f} MB')

vgg11 model size:	 35.2350 MB
vgg13 model size:	 35.9418 MB
vgg16 model size:	 56.2162 MB
vgg19 model size:	 76.4906 MB
q_vgg11 model size:	 0.0422 MB
q_vgg13 model size:	 0.0451 MB
q_vgg16 model size:	 0.0647 MB
q_vgg19 model size:	 0.0843 MB


In [5]:
# measure accuracy

data_path = "/workspace/shared/data"
test_dataset = datasets.CIFAR10(root=data_path, train=False, transform=transforms.ToTensor(), download=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

for model_name, model in models.items():
    accuracy = measure_accuracy(model, test_loader, 'cpu')
    print(f'{model_name} accuracy:\t {accuracy:.4f}')

Files already downloaded and verified
vgg11 accuracy:	 0.8568
vgg13 accuracy:	 0.8719
vgg16 accuracy:	 0.8728
vgg19 accuracy:	 0.8762
q_vgg11 accuracy:	 0.8566
q_vgg13 accuracy:	 0.8699
q_vgg16 accuracy:	 0.8745
q_vgg19 accuracy:	 0.8770
