# Quantised SqueezeNet

In [1]:
import math
import numpy as np
import torch
import torch.nn as nn
import torch.jit as jit
import torch.backends.cudnn as cudnn

from torch.autograd import Variable

In [2]:
# Remember to:
!export PYTHONPATH=$(readlink -m ./pytorch-playground):$PYTHONPATH
from utee import misc, quant, selector
from imagenet import squeezenet
from collections import OrderedDict

In [3]:
gpu = misc.auto_select_gpu(utility_bound=0, num_gpu=1, selected_gpus='0')
ngpu = len(gpu)
input_size = 224

batch_size = 100
data_root='/tmp/public_dataset/pytorch/'

assert torch.cuda.is_available(), 'no cuda'
torch.manual_seed(117)
torch.cuda.manual_seed(117)

# load model and dataset fetcher
model_raw, ds_fetcher, is_imagenet = selector.select('squeezenet_v1', model_root='~/.torch/models')

Setting GPU: ['0']
Building and initializing squeezenet_v1 parameters


In [40]:
# eval model
val_ds = ds_fetcher(batch_size, data_root=data_root, train=False, input_size=input_size)
acc1, acc5 = misc.eval_model(model_raw, val_ds, ngpu=ngpu, is_imagenet=is_imagenet)

Loading pickle object from /tmp/public_dataset/pytorch/imagenet-data/val224.pkl


Building IMAGENET data loader, 50000 for train, 50000 for test


=> Done (6.2928 s)

  0%|          | 0/500 [00:00<?, ?it/s][A
  0%|          | 1/500 [00:00<01:35,  5.22it/s][A
  0%|          | 2/500 [00:00<01:31,  5.47it/s][A
  1%|          | 3/500 [00:00<01:27,  5.66it/s][A
  1%|          | 4/500 [00:00<01:24,  5.86it/s][A
  1%|          | 5/500 [00:00<01:22,  6.01it/s][A
  1%|          | 6/500 [00:00<01:20,  6.11it/s][A
  1%|▏         | 7/500 [00:01<01:19,  6.19it/s][A
  2%|▏         | 8/500 [00:01<01:18,  6.25it/s][A
  2%|▏         | 9/500 [00:01<01:17,  6.30it/s][A
  2%|▏         | 10/500 [00:01<01:17,  6.34it/s][A
  2%|▏         | 11/500 [00:01<01:16,  6.36it/s][A
  2%|▏         | 12/500 [00:01<01:16,  6.38it/s][A
  3%|▎         | 13/500 [00:02<01:16,  6.40it/s][A
  3%|▎         | 14/500 [00:02<01:15,  6.42it/s][A
  3%|▎         | 15/500 [00:02<01:15,  6.44it/s][A
  3%|▎         | 16/500 [00:02<01:14,  6.45it/s][A
  3%|▎         | 17/500 [00:02<01:14,  6.47it/s][A
  4%|▎         | 18/500 [00:02<01:14,  6.48it/s][A
  4%|▍    

 62%|██████▏   | 310/500 [00:46<00:28,  6.62it/s][A
 62%|██████▏   | 311/500 [00:46<00:28,  6.62it/s][A
 62%|██████▏   | 312/500 [00:47<00:28,  6.62it/s][A
 63%|██████▎   | 313/500 [00:47<00:28,  6.62it/s][A
 63%|██████▎   | 314/500 [00:47<00:28,  6.62it/s][A
 63%|██████▎   | 315/500 [00:47<00:27,  6.62it/s][A
 63%|██████▎   | 316/500 [00:47<00:27,  6.62it/s][A
 63%|██████▎   | 317/500 [00:47<00:27,  6.62it/s][A
 64%|██████▎   | 318/500 [00:48<00:27,  6.62it/s][A
 64%|██████▍   | 319/500 [00:48<00:27,  6.62it/s][A
 64%|██████▍   | 320/500 [00:48<00:27,  6.62it/s][A
 64%|██████▍   | 321/500 [00:48<00:27,  6.62it/s][A
 64%|██████▍   | 322/500 [00:48<00:26,  6.62it/s][A
 65%|██████▍   | 323/500 [00:48<00:26,  6.62it/s][A
 65%|██████▍   | 324/500 [00:48<00:26,  6.62it/s][A
 65%|██████▌   | 325/500 [00:49<00:26,  6.62it/s][A
 65%|██████▌   | 326/500 [00:49<00:26,  6.62it/s][A
 65%|██████▌   | 327/500 [00:49<00:26,  6.62it/s][A
 66%|██████▌   | 328/500 [00:49<00:25,  6.62it

In [41]:
# This is the accuracy at fp32
acc1, acc5

(0.55948, 0.7913)

# Collect activation statistics

In [25]:
class Logger(nn.Module):
    def __init__(self, name):
        super(Logger, self).__init__()
        print(f'Creating logger for "{name}"')
        self.name = name
        self.log_items = np.zeros((0, 2))
        #self.percentiles = torch.zeros(1, 5).cuda()
        #self.indices = None
        
    def forward(self, x):
        # Slowwwwwww (7s/it)
        #log_item = np.percentile(x.data.cpu().numpy(), q=[float(x) for x in [0, 5, 95, 100]])
        
        # (~4.9it/s)
        # NOTE this takes the min/max of the entire batch (100 images) 
        log_item = [ torch.min(x).data.cpu().numpy()[0],
                     torch.max(x).data.cpu().numpy()[0] ]
        self.log_items = np.append(self.log_items, log_item)
        
        # histc not implemented on cuda
        #log_item = torch.histc(x, bins=100).cpu().numpy()
        
        #if self.indices is None:
        #    length = x.data.view(-1).size()[0]
        #    self.indices = torch.LongTensor([int(float(p)/1000*length) for p in range(0,1001)]).cuda()
        #sorted, _indices = torch.sort(x.data.view(-1))
        #prow = torch.index_select(sorted, 0, self.indices)
        #self.percentiles = torch.cat((self.percentiles, prow))
        #del sorted
        #del prow
                
        return x
    
import copy
def duplicate_model_with_logging(model, indent_level=0, prefix=''):
    """
    Inserts logging nodes into a NN model.
    
    Assumes that original model has at least a nn.Sequential.
    """
    if indent_level == 0:
        l = OrderedDict()
        input_logger = Logger("input")
        l["input/log"] = input_logger
        l["/"], subloggers = duplicate_model_with_logging(copy.deepcopy(model), 1, prefix)
        m = nn.Sequential(l)
        loggers = [input_logger]
        loggers.extend(subloggers)
        return m, loggers
    loggers = []
    indent = " " * indent_level
    if isinstance(model, nn.Sequential):
        print(f'{indent}Sequential')
        l = OrderedDict()
        for k, v in model._modules.items():
            print(f'{indent} Looking at : {k}')
            prefix_ = f'{prefix}/{k}'
            if isinstance(v, (nn.Conv2d, nn.Linear, nn.BatchNorm1d, nn.BatchNorm2d, nn.AvgPool2d)):
                l[k] = v
                logger = Logger(prefix_)
                l[f'{prefix_}/log'] = logger
                loggers.append(logger)
            else:
                l[k], subloggers = duplicate_model_with_logging(v, indent_level + 1, prefix_)
                loggers.extend(subloggers)
        m = nn.Sequential(l)
        return m, loggers
    else:
        print(f'{indent}Not Sequential')
        for k, v in model._modules.items():
            prefix_ = f'{prefix}/{k}'
            model._modules[k], subloggers = duplicate_model_with_logging(v, indent_level + 1, prefix_)
            loggers.extend(subloggers)
        return model, loggers

In [26]:
model_log, loggers = duplicate_model_with_logging(model_raw)

Creating logger for "input"
 Not Sequential
  Sequential
   Looking at : 0
Creating logger for "/features/0"
   Looking at : 1
   Not Sequential
   Looking at : 2
   Not Sequential
   Looking at : 3
   Not Sequential
    Sequential
     Looking at : squeeze
Creating logger for "/features/3/group1/squeeze"
     Looking at : squeeze_activation
     Not Sequential
    Sequential
     Looking at : expand1x1
Creating logger for "/features/3/group2/expand1x1"
     Looking at : expand1x1_activation
     Not Sequential
    Sequential
     Looking at : expand3x3
Creating logger for "/features/3/group3/expand3x3"
     Looking at : expand3x3_activation
     Not Sequential
   Looking at : 4
   Not Sequential
    Sequential
     Looking at : squeeze
Creating logger for "/features/4/group1/squeeze"
     Looking at : squeeze_activation
     Not Sequential
    Sequential
     Looking at : expand1x1
Creating logger for "/features/4/group2/expand1x1"
     Looking at : expand1x1_activation
     Not Seque

In [27]:
val_ds = ds_fetcher(batch_size, data_root=data_root, train=False, input_size=input_size)
acc1, acc5 = misc.eval_model(model_log, val_ds, ngpu=ngpu, is_imagenet=is_imagenet)

Loading pickle object from /tmp/public_dataset/pytorch/imagenet-data/val224.pkl


Building IMAGENET data loader, 50000 for train, 50000 for test


=> Done (5.6219 s)

  0%|          | 0/500 [00:00<?, ?it/s][A
  0%|          | 1/500 [00:00<01:51,  4.49it/s][A
  0%|          | 2/500 [00:00<01:46,  4.69it/s][A
  1%|          | 3/500 [00:00<01:42,  4.86it/s][A
  1%|          | 4/500 [00:00<01:39,  5.00it/s][A
  1%|          | 5/500 [00:00<01:37,  5.10it/s][A
  1%|          | 6/500 [00:01<01:35,  5.18it/s][A
  1%|▏         | 7/500 [00:01<01:34,  5.23it/s][A
  2%|▏         | 8/500 [00:01<01:33,  5.27it/s][A
  2%|▏         | 9/500 [00:01<01:32,  5.30it/s][A
  2%|▏         | 10/500 [00:01<01:31,  5.33it/s][A
  2%|▏         | 11/500 [00:02<01:31,  5.35it/s][A
  2%|▏         | 12/500 [00:02<01:30,  5.37it/s][A
  3%|▎         | 13/500 [00:02<01:30,  5.39it/s][A
  3%|▎         | 14/500 [00:02<01:30,  5.40it/s][A
  3%|▎         | 15/500 [00:02<01:29,  5.41it/s][A
  3%|▎         | 16/500 [00:02<01:29,  5.42it/s][A
  3%|▎         | 17/500 [00:03<01:28,  5.43it/s][A
  4%|▎         | 18/500 [00:03<01:28,  5.44it/s][A
  4%|▍    

In [29]:
# We now have statistics for all the activations. Reduce to a scale factor for each one.
quantization_levels = 2 ** 8 - 1 # 8 bit signed fixed-point without (-2**7)
max_q_level = (quantization_levels - 1) / 2
def scale_factor_from_stats(ranges):
    min_min = np.min(ranges[0])
    max_max = np.max(ranges[1])
    pos_range = max(abs(min_min), abs(max_max))
    # val = q_val * scale_factor; so:
    scale_factor = pos_range / max_q_level
    return scale_factor
scale_factors = { logger.name : scale_factor_from_stats(np.asarray(logger.log_items))
                  for logger in loggers }

In [30]:
scale_factors

{'/classifier/1': 2.1598191148652806,
 '/classifier/3': 0.34802837822380966,
 '/features/0': 0.14690978883758304,
 '/features/10/group1/squeeze': 6.981395811546506,
 '/features/10/group2/expand1x1': 3.6382376603254185,
 '/features/10/group3/expand3x3': 6.351740799550935,
 '/features/11/group1/squeeze': 10.655975101500983,
 '/features/11/group2/expand1x1': 4.7391491987573815,
 '/features/11/group3/expand3x3': 7.224267193651575,
 '/features/12/group1/squeeze': 8.164420060285433,
 '/features/12/group2/expand1x1': 6.2214600570558565,
 '/features/12/group3/expand3x3': 9.469892847256398,
 '/features/3/group1/squeeze': 0.3836523641751507,
 '/features/3/group2/expand1x1': 0.2993703526774729,
 '/features/3/group3/expand3x3': 0.4190451554426058,
 '/features/4/group1/squeeze': 0.4447247362512303,
 '/features/4/group2/expand1x1': 0.43653551236851007,
 '/features/4/group3/expand3x3': 0.6296296007051243,
 '/features/6/group1/squeeze': 1.2370265450064593,
 '/features/6/group2/expand1x1': 0.7592978890

In [25]:
%matplotlib notebook
import matplotlib.pyplot as plt
import seaborn as sbs
data = np.asarray(loggers[1].log_items)
sbs.distplot(data[0], hist=True, rug=True)
#sbs.distplot(list(scale_factors.values()), hist=True, rug=True)

IndexError: index 0 is out of bounds for axis 0 with size 0

In [53]:
np.percentile(data, q=[0, 1, 5, 95, 99, 100], axis=0)

array([[-64.68961334,  37.50022125],
       [-59.58321198,  37.99289818],
       [-57.89531536,  39.38270359],
       [-48.49136219,  49.74721947],
       [-47.57300343,  51.59901909],
       [-46.29519653,  56.11297226]])

# Modifying for quantised inference

In [42]:
# model.apply(f) calls f for each Module in the model.
#  But it doesn't handle things like torch.concat
model_raw.apply(lambda m: print(type(m)))

# Can copy a model with:
import copy
copy.deepcopy(model_raw)
# Or just recreate the model:
model_copy = create_model()
model_copy.lead_state_dict(original_model.state_dict())



<class 'torch.nn.modules.conv.Conv2d'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.pooling.MaxPool2d'>
<class 'torch.nn.modules.conv.Conv2d'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.container.Sequential'>
<class 'torch.nn.modules.conv.Conv2d'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.container.Sequential'>
<class 'torch.nn.modules.conv.Conv2d'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.container.Sequential'>
<class 'imagenet.squeezenet.Fire'>
<class 'torch.nn.modules.conv.Conv2d'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.container.Sequential'>
<class 'torch.nn.modules.conv.Conv2d'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.container.Sequential'>
<class 'torch.nn.modules.conv.Conv2d'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.container.Sequential'>
<class 'imagenet.squeezenet.Fire'>
<class 'torch.nn.module

SqueezeNet(
  (features): Sequential(
    (0): Conv2d (3, 64, kernel_size=(3, 3), stride=(2, 2))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1))
    (3): Fire(
      (group1): Sequential(
        (squeeze): Conv2d (64, 16, kernel_size=(1, 1), stride=(1, 1))
        (squeeze_activation): ReLU(inplace)
      )
      (group2): Sequential(
        (expand1x1): Conv2d (16, 64, kernel_size=(1, 1), stride=(1, 1))
        (expand1x1_activation): ReLU(inplace)
      )
      (group3): Sequential(
        (expand3x3): Conv2d (16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (expand3x3_activation): ReLU(inplace)
      )
    )
    (4): Fire(
      (group1): Sequential(
        (squeeze): Conv2d (128, 16, kernel_size=(1, 1), stride=(1, 1))
        (squeeze_activation): ReLU(inplace)
      )
      (group2): Sequential(
        (expand1x1): Conv2d (16, 64, kernel_size=(1, 1), stride=(1, 1))
        (expand1x1_activation): ReLU(inplace)
  

In [58]:
val_ds = ds_fetcher(batch_size, data_root=data_root, train=False, input_size=input_size)
(data, target) = next(val_ds)
data = Variable(torch.FloatTensor(data)).cuda()
model_cuda = torch.nn.DataParallel(model_raw.eval(), device_ids=range(ngpu)).cuda()
trace, out = jit.trace(model_cuda, data)

Loading pickle object from /tmp/public_dataset/pytorch/imagenet-data/val224.pkl


Building IMAGENET data loader, 50000 for train, 50000 for test


=> Done (6.3597 s)


In [65]:
trace.graph()

graph(%1 : Float(100, 3, 224, 224)
      %2 : Float(64, 3, 3, 3)
      %3 : Float(64)
      %4 : Float(16, 64, 1, 1)
      %5 : Float(16)
      %6 : Float(64, 16, 1, 1)
      %7 : Float(64)
      %8 : Float(64, 16, 3, 3)
      %9 : Float(64)
      %10 : Float(16, 128, 1, 1)
      %11 : Float(16)
      %12 : Float(64, 16, 1, 1)
      %13 : Float(64)
      %14 : Float(64, 16, 3, 3)
      %15 : Float(64)
      %16 : Float(32, 128, 1, 1)
      %17 : Float(32)
      %18 : Float(128, 32, 1, 1)
      %19 : Float(128)
      %20 : Float(128, 32, 3, 3)
      %21 : Float(128)
      %22 : Float(32, 256, 1, 1)
      %23 : Float(32)
      %24 : Float(128, 32, 1, 1)
      %25 : Float(128)
      %26 : Float(128, 32, 3, 3)
      %27 : Float(128)
      %28 : Float(48, 256, 1, 1)
      %29 : Float(48)
      %30 : Float(192, 48, 1, 1)
      %31 : Float(192)
      %32 : Float(192, 48, 3, 3)
      %33 : Float(192)
      %34 : Float(48, 384, 1, 1)
      %35 : Float(48)
      %36 : Float(192, 48, 1, 1)
      

In [62]:
print(trace)

graph(%1 : Float(100, 3, 224, 224)
      %2 : Float(64, 3, 3, 3)
      %3 : Float(64)
      %4 : Float(16, 64, 1, 1)
      %5 : Float(16)
      %6 : Float(64, 16, 1, 1)
      %7 : Float(64)
      %8 : Float(64, 16, 3, 3)
      %9 : Float(64)
      %10 : Float(16, 128, 1, 1)
      %11 : Float(16)
      %12 : Float(64, 16, 1, 1)
      %13 : Float(64)
      %14 : Float(64, 16, 3, 3)
      %15 : Float(64)
      %16 : Float(32, 128, 1, 1)
      %17 : Float(32)
      %18 : Float(128, 32, 1, 1)
      %19 : Float(128)
      %20 : Float(128, 32, 3, 3)
      %21 : Float(128)
      %22 : Float(32, 256, 1, 1)
      %23 : Float(32)
      %24 : Float(128, 32, 1, 1)
      %25 : Float(128)
      %26 : Float(128, 32, 3, 3)
      %27 : Float(128)
      %28 : Float(48, 256, 1, 1)
      %29 : Float(48)
      %30 : Float(192, 48, 1, 1)
      %31 : Float(192)
      %32 : Float(192, 48, 3, 3)
      %33 : Float(192)
      %34 : Float(48, 384, 1, 1)
      %35 : Float(48)
      %36 : Float(192, 48, 1, 1)
      

In [32]:
# We do dumb quantisation; no fine-tuning.

weights_bw    = 8
biases_bw     = 32
activation_bw = 8
overflow_rate = 0.0
n_sample      = 20
quant_method  = "linear"
    
def duplicate_model_with_quant(model, bits, overflow_rate=0.0, counter=10, type='linear'):
    """assume that original model has at least a nn.Sequential"""
    assert type in ['linear', 'minmax', 'log', 'tanh']
    if isinstance(model, nn.Sequential):
        print(f'> Sequential')
        l = OrderedDict()
        for k, v in model._modules.items():
            print(f'Looking at : {k}')
            if isinstance(v, (nn.Conv2d, nn.Linear, nn.BatchNorm1d, nn.BatchNorm2d, nn.AvgPool2d)):
                parameters = list(v.parameters())
                if isinstance(v, nn.Conv2d):
                    print(f"Found Conv2D:")
                    print(f"  weights: [o,i,k,k] {parameters[0].shape}")
                    print(f"  biases : [o] {parameters[1].shape}")
                l[k] = v
                
            else:
                l[k] = duplicate_model_with_quant(v, bits, overflow_rate, counter, type)
        m = nn.Sequential(l)
        return m
    else:
        print(f'> Not Sequential')
        for k, v in model._modules.items():
            model._modules[k] = duplicate_model_with_quant(v, bits, overflow_rate, counter, type)
        return model

model_quant = duplicate_model_with_quant(model_raw, bits=activation_bw, overflow_rate=overflow_rate,
                                               counter=n_sample, type=quant_method)

> Not Sequential
> Sequential
Looking at : 0
Found Conv2D:
  weights: [o,i,k,k] torch.Size([64, 3, 3, 3])
  biases : [o] torch.Size([64])
Looking at : 0_linear_quant
> Not Sequential
Looking at : 1
> Not Sequential
Looking at : 2
> Not Sequential
Looking at : 3
> Not Sequential
> Sequential
Looking at : squeeze
Found Conv2D:
  weights: [o,i,k,k] torch.Size([16, 64, 1, 1])
  biases : [o] torch.Size([16])
Looking at : squeeze_linear_quant
> Not Sequential
Looking at : squeeze_activation
> Not Sequential
> Sequential
Looking at : expand1x1
Found Conv2D:
  weights: [o,i,k,k] torch.Size([64, 16, 1, 1])
  biases : [o] torch.Size([64])
Looking at : expand1x1_linear_quant
> Not Sequential
Looking at : expand1x1_activation
> Not Sequential
> Sequential
Looking at : expand3x3
Found Conv2D:
  weights: [o,i,k,k] torch.Size([64, 16, 3, 3])
  biases : [o] torch.Size([64])
Looking at : expand3x3_linear_quant
> Not Sequential
Looking at : expand3x3_activation
> Not Sequential
Looking at : 4
> Not Sequ

In [None]:
# eval model
val_ds = ds_fetcher(batch_size, data_root=data_root, train=False, input_size=input_size)
acc1q, acc5q = misc.eval_model(model_quant, val_ds, ngpu=ngpu, is_imagenet=is_imagenet)

In [25]:
acc1q, acc5q

NameError: name 'acc1q' is not defined

# Exporting model
There is the existing ONNX exporter. Either modify that, or manually add functions to each type of Module and call them recursively.

# Modifying for quantised training
Easiest would be to modify SqN source.

## MobileNet
Search GitHub; there seem to be plenty of implementations. E.g. https://github.com/marvis/pytorch-mobilenet
There are also implementations for v2.

## MobileNet-SSD
Try modifying https://github.com/amdegroot/ssd.pytorch:
  - Use MobileNet feature extractor
  - Use dw convolutions in SSD (SSDLite)

In [8]:
model_raw.state_dict().keys()

odict_keys(['features.0.weight', 'features.0.bias', 'features.3.group1.squeeze.weight', 'features.3.group1.squeeze.bias', 'features.3.group2.expand1x1.weight', 'features.3.group2.expand1x1.bias', 'features.3.group3.expand3x3.weight', 'features.3.group3.expand3x3.bias', 'features.4.group1.squeeze.weight', 'features.4.group1.squeeze.bias', 'features.4.group2.expand1x1.weight', 'features.4.group2.expand1x1.bias', 'features.4.group3.expand3x3.weight', 'features.4.group3.expand3x3.bias', 'features.6.group1.squeeze.weight', 'features.6.group1.squeeze.bias', 'features.6.group2.expand1x1.weight', 'features.6.group2.expand1x1.bias', 'features.6.group3.expand3x3.weight', 'features.6.group3.expand3x3.bias', 'features.7.group1.squeeze.weight', 'features.7.group1.squeeze.bias', 'features.7.group2.expand1x1.weight', 'features.7.group2.expand1x1.bias', 'features.7.group3.expand3x3.weight', 'features.7.group3.expand3x3.bias', 'features.9.group1.squeeze.weight', 'features.9.group1.squeeze.bias', 'featu

In [1]:
[1,2,3][:1]

[1]