https://visualstudiomagazine.com/articles/2020/09/10/pytorch-dataloader.aspx

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import DataLoader
from torchvision import datasets
import torchvision.transforms as transforms
import os
import time
import sys
import matplotlib.pyplot as plt
import torch.quantization

In [2]:
# # Setup warnings
import warnings
warnings.filterwarnings(
    action='ignore',
    category=DeprecationWarning,
    module=r'.*'
)
warnings.filterwarnings(
    action='default',
    module=r'torch.quantization'
)

# Specify random seed for repeatable results
torch.manual_seed(191009)

<torch._C.Generator at 0x7fee98039350>

In [3]:
import os
import requests
from requests.adapters import HTTPAdapter

import torch
from torch import nn
from torch.nn import functional as F
from torch.quantization import QuantStub, DeQuantStub

from facenet_pytorch.models.utils.download import download_url_to_file


class BasicConv2d(nn.Module):

    def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0):
        super().__init__()
        self.conv = nn.Conv2d(
            in_planes, out_planes,
            kernel_size=kernel_size, stride=stride,
            padding=padding, bias=False
        ) # verify bias false
        self.bn = nn.BatchNorm2d(
            out_planes,
            eps=0.001, # value found in tensorflow
            momentum=0.1, # default pytorch value
            affine=True
        )
        self.relu = nn.ReLU(inplace=False)

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
        return x


class Block35(nn.Module):

    def __init__(self, scale=1.0):
        super().__init__()

        self.scale = scale

        self.branch0 = BasicConv2d(256, 32, kernel_size=1, stride=1)

        self.branch1 = nn.Sequential(
            BasicConv2d(256, 32, kernel_size=1, stride=1),
            BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1)
        )

        self.branch2 = nn.Sequential(
            BasicConv2d(256, 32, kernel_size=1, stride=1),
            BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1),
            BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1)
        )

        self.conv2d = nn.Conv2d(96, 256, kernel_size=1, stride=1)
        self.relu = nn.ReLU(inplace=False)
        self.ff1 = nn.quantized.FloatFunctional()
        self.ff2 = nn.quantized.FloatFunctional()

    def forward(self, x):
        x0 = self.branch0(x)
        x1 = self.branch1(x)
        x2 = self.branch2(x)
        out = torch.cat((x0, x1, x2), 1)
         # out = out * self.scale + x        
        # out = self.ff1.mul(torch.tensor(out), torch.tensor(self.scale))
        # out = self.ff2.add(out, x)
        out = self.conv2d(out)
        out = self.ff2.add(self.ff1.mul_scalar(out, self.scale), x) 
        out = self.relu(out)
        return out


class Block17(nn.Module):

    def __init__(self, scale=1.0):
        super().__init__()

        self.scale = scale

        self.branch0 = BasicConv2d(896, 128, kernel_size=1, stride=1)

        self.branch1 = nn.Sequential(
            BasicConv2d(896, 128, kernel_size=1, stride=1),
            BasicConv2d(128, 128, kernel_size=(1,7), stride=1, padding=(0,3)),
            BasicConv2d(128, 128, kernel_size=(7,1), stride=1, padding=(3,0))
        )

        self.conv2d = nn.Conv2d(256, 896, kernel_size=1, stride=1)
        self.relu = nn.ReLU(inplace=False)
        self.ff1 = nn.quantized.FloatFunctional()
        self.ff2 = nn.quantized.FloatFunctional()

    def forward(self, x):
        x0 = self.branch0(x)
        x1 = self.branch1(x)
        out = torch.cat((x0, x1), 1)
        out = self.conv2d(out)
        out = self.ff2.add(self.ff1.mul_scalar(out, self.scale), x) 
        # out = out * self.scale + x
        out = self.relu(out)
        return out


class Block8(nn.Module):

    def __init__(self, scale=1.0, noReLU=False):
        super().__init__()

        self.scale = scale
        self.noReLU = noReLU

        self.branch0 = BasicConv2d(1792, 192, kernel_size=1, stride=1)

        self.branch1 = nn.Sequential(
            BasicConv2d(1792, 192, kernel_size=1, stride=1),
            BasicConv2d(192, 192, kernel_size=(1,3), stride=1, padding=(0,1)),
            BasicConv2d(192, 192, kernel_size=(3,1), stride=1, padding=(1,0))
        )

        self.conv2d = nn.Conv2d(384, 1792, kernel_size=1, stride=1)
        if not self.noReLU:
            self.relu = nn.ReLU(inplace=False)
        self.ff1 = nn.quantized.FloatFunctional()
        self.ff2 = nn.quantized.FloatFunctional()

    def forward(self, x):
        x0 = self.branch0(x)
        x1 = self.branch1(x)
        out = torch.cat((x0, x1), 1)
        out = self.conv2d(out)
        out = self.ff2.add(self.ff1.mul_scalar(out, self.scale), x) 
        # out = out * self.scale + x
        if not self.noReLU:
            out = self.relu(out)
        return out


class Mixed_6a(nn.Module):

    def __init__(self):
        super().__init__()

        self.branch0 = BasicConv2d(256, 384, kernel_size=3, stride=2)

        self.branch1 = nn.Sequential(
            BasicConv2d(256, 192, kernel_size=1, stride=1),
            BasicConv2d(192, 192, kernel_size=3, stride=1, padding=1),
            BasicConv2d(192, 256, kernel_size=3, stride=2)
        )

        self.branch2 = nn.MaxPool2d(3, stride=2)

    def forward(self, x):
        x0 = self.branch0(x)
        x1 = self.branch1(x)
        x2 = self.branch2(x)
        out = torch.cat((x0, x1, x2), 1)
        return out


class Mixed_7a(nn.Module):

    def __init__(self):
        super().__init__()

        self.branch0 = nn.Sequential(
            BasicConv2d(896, 256, kernel_size=1, stride=1),
            BasicConv2d(256, 384, kernel_size=3, stride=2)
        )

        self.branch1 = nn.Sequential(
            BasicConv2d(896, 256, kernel_size=1, stride=1),
            BasicConv2d(256, 256, kernel_size=3, stride=2)
        )

        self.branch2 = nn.Sequential(
            BasicConv2d(896, 256, kernel_size=1, stride=1),
            BasicConv2d(256, 256, kernel_size=3, stride=1, padding=1),
            BasicConv2d(256, 256, kernel_size=3, stride=2)
        )

        self.branch3 = nn.MaxPool2d(3, stride=2)

    def forward(self, x):
        x0 = self.branch0(x)
        x1 = self.branch1(x)
        x2 = self.branch2(x)
        x3 = self.branch3(x)
        out = torch.cat((x0, x1, x2, x3), 1)
        return out


class InceptionResnetV1(nn.Module):
    """Inception Resnet V1 model with optional loading of pretrained weights.
    Model parameters can be loaded based on pretraining on the VGGFace2 or CASIA-Webface
    datasets. Pretrained state_dicts are automatically downloaded on model instantiation if
    requested and cached in the torch cache. Subsequent instantiations use the cache rather than
    redownloading.
    Keyword Arguments:
        pretrained {str} -- Optional pretraining dataset. Either 'vggface2' or 'casia-webface'.
            (default: {None})
        classify {bool} -- Whether the model should output classification probabilities or feature
            embeddings. (default: {False})
        num_classes {int} -- Number of output classes. If 'pretrained' is set and num_classes not
            equal to that used for the pretrained model, the final linear layer will be randomly
            initialized. (default: {None})
        dropout_prob {float} -- Dropout probability. (default: {0.6})
    """
    def __init__(self, pretrained=None, classify=False, num_classes=None, dropout_prob=0.6, device=None):
        super().__init__()

        # Set simple attributes
        self.pretrained = pretrained
        self.classify = classify
        self.num_classes = num_classes

        if pretrained == 'vggface2':
            tmp_classes = 8631
        elif pretrained == 'casia-webface':
            tmp_classes = 10575
        elif pretrained is None and self.classify and self.num_classes is None:
            raise Exception('If "pretrained" is not specified and "classify" is True, "num_classes" must be specified')


        # Define layers
        self.conv2d_1a = BasicConv2d(3, 32, kernel_size=3, stride=2)
        self.conv2d_2a = BasicConv2d(32, 32, kernel_size=3, stride=1)
        self.conv2d_2b = BasicConv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.maxpool_3a = nn.MaxPool2d(3, stride=2)
        self.conv2d_3b = BasicConv2d(64, 80, kernel_size=1, stride=1)
        self.conv2d_4a = BasicConv2d(80, 192, kernel_size=3, stride=1)
        self.conv2d_4b = BasicConv2d(192, 256, kernel_size=3, stride=2)
        self.repeat_1 = nn.Sequential(
            Block35(scale=0.17),
            Block35(scale=0.17),
            Block35(scale=0.17),
            Block35(scale=0.17),
            Block35(scale=0.17),
        )
        self.mixed_6a = Mixed_6a()
        self.repeat_2 = nn.Sequential(
            Block17(scale=0.10),
            Block17(scale=0.10),
            Block17(scale=0.10),
            Block17(scale=0.10),
            Block17(scale=0.10),
            Block17(scale=0.10),
            Block17(scale=0.10),
            Block17(scale=0.10),
            Block17(scale=0.10),
            Block17(scale=0.10),
        )
        self.mixed_7a = Mixed_7a()
        self.repeat_3 = nn.Sequential(
            Block8(scale=0.20),
            Block8(scale=0.20),
            Block8(scale=0.20),
            Block8(scale=0.20),
            Block8(scale=0.20),
        )
        self.block8 = Block8(noReLU=True)
        self.avgpool_1a = nn.AdaptiveAvgPool2d(1)
        self.dropout = nn.Dropout(dropout_prob)
        self.last_linear = nn.Linear(1792, 512, bias=False)
        self.last_bn = nn.BatchNorm1d(512, eps=0.001, momentum=0.1, affine=True)
        self.quant = QuantStub()
        self.dequant = DeQuantStub()

        if pretrained is not None:
            self.logits = nn.Linear(512, tmp_classes)
            load_weights(self, pretrained)

        if self.classify and self.num_classes is not None:
            self.logits = nn.Linear(512, self.num_classes)

        self.device = torch.device('cpu')
        if device is not None:
            self.device = device
            self.to(device)

    def forward(self, x):
        """Calculate embeddings or logits given a batch of input image tensors.
        Arguments:
            x {torch.tensor} -- Batch of image tensors representing faces.
        Returns:
            torch.tensor -- Batch of embedding vectors or multinomial logits.
        """
        x = self.quant(x)
        x = self.conv2d_1a(x)
        x = self.conv2d_2a(x)
        x = self.conv2d_2b(x)
        x = self.maxpool_3a(x)
        x = self.conv2d_3b(x)
        x = self.conv2d_4a(x)
        x = self.conv2d_4b(x)
        x = self.repeat_1(x)
        x = self.mixed_6a(x)
        x = self.repeat_2(x)
        x = self.mixed_7a(x)
        x = self.repeat_3(x)
        x = self.block8(x)
        x = self.avgpool_1a(x)
        x = self.dropout(x)
        x = self.last_linear(x.view(x.shape[0], -1))
        x = self.last_bn(x)        
        if self.classify:
            x = self.logits(x)
            x = self.dequant(x)
        else:
            x = self.dequant(x)
            x = F.normalize(x, p=2, dim=1)
        # x = self.dequant(x)
        return x
    
    def fuse_model(self):
        for m in self.modules():
            if type(m) == BasicConv2d:
                torch.quantization.fuse_modules(m, ['conv', 'bn', 'relu'], inplace=True)
        torch.quantization.fuse_modules(self, [["last_linear", "last_bn"]], inplace=True)


def load_weights(mdl, name):
    """Download pretrained state_dict and load into model.
    Arguments:
        mdl {torch.nn.Module} -- Pytorch model.
        name {str} -- Name of dataset that was used to generate pretrained state_dict.
    Raises:
        ValueError: If 'pretrained' not equal to 'vggface2' or 'casia-webface'.
    """
    if name == 'vggface2':
        path = 'https://github.com/timesler/facenet-pytorch/releases/download/v2.2.9/20180402-114759-vggface2.pt'
    elif name == 'casia-webface':
        path = 'https://github.com/timesler/facenet-pytorch/releases/download/v2.2.9/20180408-102900-casia-webface.pt'
    else:
        raise ValueError('Pretrained models only exist for "vggface2" and "casia-webface"')

    model_dir = os.path.join(get_torch_home(), 'checkpoints')
    os.makedirs(model_dir, exist_ok=True)

    cached_file = os.path.join(model_dir, os.path.basename(path))
    if not os.path.exists(cached_file):
        download_url_to_file(path, cached_file)

    state_dict = torch.load(cached_file)
    mdl.load_state_dict(state_dict)


def get_torch_home():
    torch_home = os.path.expanduser(
        os.getenv(
            'TORCH_HOME',
            os.path.join(os.getenv('XDG_CACHE_HOME', '~/.cache'), 'torch')
        )
    )
    return torch_home

In [16]:
# for module_name, module in model_inception_resnet.named_children():
#     if module_name == 'last_linear':
#         print(module_name, module)

last_linear QuantizedLinear(in_features=1792, out_features=512, scale=0.018916359171271324, zero_point=59, qscheme=torch.per_tensor_affine)


In [5]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)


def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res


def evaluate(model, criterion, data_loader, neval_batches):
    model.eval()
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    cnt = 0
    with torch.no_grad():
        for image, target in data_loader:
            # output = model(image)
            output = model_inception_resnet(transforms.ToTensor()(image).unsqueeze(0))
            loss = criterion(output, torch.tensor([target]))
            cnt += 1
            acc1, acc5 = accuracy(output, torch.tensor([target]), topk=(1, 5))
            print('.', end = '')
            top1.update(acc1[0], transforms.ToTensor()(image).unsqueeze(0).size(0))
            # top1.update(acc1[0], image.size(0))
            # top5.update(acc5[0], image.size(0))
            if cnt >= neval_batches:
                 return top1, top5

    return top1, top5

def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p")/1e6)
    os.remove('temp.p')

In [6]:
criterion = nn.CrossEntropyLoss()
model_inception_resnet = InceptionResnetV1(pretrained='vggface2', classify=True).eval().to('cpu')

In [7]:
data_path = '/dataset/imagenet/'
saved_model_dir = '/home/avishek/Quantization/model_weights/'
float_model_file = 'mobilenet_pretrained_float.pth'
scripted_float_model_file = 'mobilenet_quantization_scripted.pth'
scripted_quantized_model_file = 'mobilenet_quantization_scripted_quantized.pth'

train_batch_size = 30
eval_batch_size = 50

In [8]:
def collate_fn(x):
    return x[0]

# test_dataset = datasets.ImageFolder('/dataset/VGG_Face2//test_images_aligned')
# test_dataset = datasets.ImageFolder('/home/avishek/Quantization/data/test_images_aligned2')
# test_dataset.idx_to_class = {i:c for c, i in test_dataset.class_to_idx.items()}
# test_loader = DataLoader(test_dataset, collate_fn=collate_fn, num_workers=32)

train_dataset = datasets.ImageFolder('/home/avishek/Quantization/data/train_images_aligned2')
train_dataset.idx_to_class = {i:c for c, i in train_dataset.class_to_idx.items()}
train_loader = DataLoader(train_dataset, collate_fn=collate_fn, num_workers=32, batch_size=32)

In [11]:
len(train_loader.dataset.classes)

4

In [12]:
len(train_loader.dataset.classes)

4

In [9]:
train_loader.dataset.targets[1000:1009]

[3, 3, 3, 3, 3, 3, 3, 3, 3]

In [10]:
train_loader.dataset.targets[-1]

3

In [41]:
# aligned = []
# names = []

# for x, y in test_loader:
#     # x_aligned, prob = mtcnn(x, return_prob=True)
#     # if x_aligned is not None:
#     #     print('Face detected with probability: {:8f}'.format(prob))
#     aligned.append(transforms.ToTensor()(x))
#     names.append(test_loader.dataset.idx_to_class[y])
#     # class_id.append(test_loader.dataset.samples[y][1])

In [9]:
# device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
# print('Running on device: {}'.format(device))
device = 'cpu'

https://stackoverflow.com/questions/57237381/runtimeerror-expected-4-dimensional-input-for-4-dimensional-weight-32-3-3-but

In [25]:
# feat_vector = []
# names = []

# for x, y in test_loader:
#     feat_vector.append(model_inception_resnet(transforms.ToTensor()(x).unsqueeze(0)).detach().numpy().reshape(8631, ))
#     names.append(test_loader.dataset.idx_to_class[y])

In [10]:
# print('\n Block: Before fusion \n\n', model_inception_resnet.conv2d_1a.conv)
# model_inception_resnet.eval()
# print('\n Model size before fusion : ', print_size_of_model( model_inception_resnet))

# # Fuses modules
# model_inception_resnet.fuse_model()

# # Note fusion of Conv+BN+Relu and Conv+Relu
# print('\n After fusion\n\n',model_inception_resnet.conv2d_1a.conv)
# # model_inception_resnet.eval()
# print('\n Model size after fusion : ', print_size_of_model( model_inception_resnet))


 Block: Before fusion 

 Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
Size (MB): 112.018331

 Model size before fusion :  None

 After fusion

 ConvReLU2d(
  (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2))
  (1): ReLU()
)
Size (MB): 111.714175

 Model size after fusion :  None


In [63]:
# with torch.no_grad():
#     output = model_inception_resnet(transforms.ToTensor()(image).unsqueeze(0))
#     print(output)
#     loss = criterion(output, torch.tensor([target]))

tensor([[-4.1048, 12.8072,  4.7573,  ..., -3.3688, -1.6255, -2.5136]])


In [62]:
# # input = torch.randn(3, 5, requires_grad=True)
# # target = torch.tensor([1, 0, 4])
# # output = F.nll_loss(F.log_softmax(input), target)

# input = torch.randn(1, 5, requires_grad=True)
# target = torch.tensor([1])
# output = F.nll_loss(F.log_softmax(input), target)
# outputc = criterion(F.log_softmax(input), target)

  import sys


In [9]:
num_eval_batches = 100
model_inception_resnet.eval()

print("Size of baseline model")
print_size_of_model(model_inception_resnet)

top1, top5 = evaluate(model_inception_resnet, criterion, train_loader, neval_batches=num_eval_batches)
print('Evaluation accuracy on %d images, %2.2f'%(num_eval_batches * eval_batch_size, top1.avg))
# torch.jit.save(torch.jit.script(float_model), saved_model_dir + scripted_float_model_file)

Size of baseline model
Size (MB): 112.006249


[W NNPACK.cpp:79] Could not initialize NNPACK! Reason: Unsupported hardware.


...................................Evaluation accuracy on 5000 images, 85.71


In [10]:
model_inception_resnet.fuse_model()

top1, top5 = evaluate(model_inception_resnet, criterion, train_loader, neval_batches=num_eval_batches)
print('Evaluation accuracy on %d images, %2.2f'%(num_eval_batches * eval_batch_size, top1.avg))

...................................Evaluation accuracy on 5000 images, 85.71


In [11]:
num_calibration_batches = 32

# # myModel = load_model(saved_model_dir + float_model_file).to('cpu')
# # myModel.eval()

# # Fuse Conv, bn and relu
# model_inception_resnet.fuse_model()

# Specify quantization configuration
# Start with simple min/max range estimation and per-tensor quantization of weights
model_inception_resnet.qconfig = torch.quantization.default_qconfig
print(model_inception_resnet.qconfig)
torch.quantization.prepare(model_inception_resnet, inplace=True)

# Calibrate first
print('Post Training Quantization Prepare: Inserting Observers')
print('\n After observer insertion \n\n', model_inception_resnet.conv2d_1a.conv)

QConfig(activation=functools.partial(<class 'torch.ao.quantization.observer.MinMaxObserver'>, reduce_range=True){}, weight=functools.partial(<class 'torch.ao.quantization.observer.MinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_tensor_symmetric){})




Post Training Quantization Prepare: Inserting Observers

 After observer insertion 

 ConvReLU2d(
  (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2))
  (1): ReLU()
  (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
)


In [11]:
print_size_of_model(model_inception_resnet)

Size (MB): 112.014939


In [12]:
# Calibrate with the training set
evaluate(model_inception_resnet, criterion, train_loader, neval_batches=num_calibration_batches)
print('Post Training Quantization: Calibration done')

# Convert to quantized model
torch.backends.quantized.engine = 'qnnpack'
torch.quantization.convert(model_inception_resnet, inplace=True)
print('Post Training Quantization: Convert done')
print('\n Block: After fusion and quantization, note fused modules: \n\n',model_inception_resnet.conv2d_1a.conv)

print("Size of model after quantization")
print_size_of_model(model_inception_resnet)

................................Post Training Quantization: Calibration done




Post Training Quantization: Convert done

 Block: After fusion and quantization, note fused modules: 

 QuantizedConvReLU2d(3, 32, kernel_size=(3, 3), stride=(2, 2), scale=0.05539536103606224, zero_point=0)
Size of model after quantization
Size (MB): 28.247461




In [14]:
### Accuracy of quantized model on validation Set
top1, top5 = evaluate(model_inception_resnet, criterion, train_loader, neval_batches=num_eval_batches)
print('Evaluation accuracy on %d images, %2.2f'%(num_eval_batches * eval_batch_size, top1.avg))
torch.jit.save(torch.jit.script(model_inception_resnet), saved_model_dir + scripted_quantized_model_file)

...................................Evaluation accuracy on 5000 images, 80.00


RuntimeError: 
Module 'Block8' has no attribute 'relu' :
  File "/tmp/ipykernel_48096/3826597506.py", line 137
        # out = out * self.scale + x
        if not self.noReLU:
            out = self.relu(out)
                  ~~~~~~~~~ <--- HERE
        return out


In [13]:
### Accuracy of quantized model on validation Set
top1, top5 = evaluate(model_inception_resnet, criterion, train_loader, neval_batches=num_eval_batches)
print('Evaluation accuracy on %d images, %2.2f'%(num_eval_batches * eval_batch_size, top1.avg))

RuntimeError: a Tensor with 73984 elements cannot be converted to Scalar