# Quantised SqueezeNet

### Imports

In [1]:
import math
import builtins
import numpy as np
import torch
import torch.nn as nn
import torch.jit as jit
import torch.backends.cudnn as cudnn
from torch.autograd import Variable

# Remember to:
!export PYTHONPATH=$(readlink -m ./pytorch-playground):$PYTHONPATH
from utee import misc, quant, selector
from imagenet import squeezenet
from collections import OrderedDict

### Setup GPU and load the original model

In [2]:
gpu = misc.auto_select_gpu(utility_bound=0, num_gpu=1, selected_gpus='0')
ngpu = len(gpu)
input_size = 224

batch_size = 100
data_root='/tmp/public_dataset/pytorch/'

assert torch.cuda.is_available(), 'no cuda'
torch.manual_seed(117)
torch.cuda.manual_seed(117)

# load model and dataset fetcher
model_orig, ds_fetcher, is_imagenet = selector.select('squeezenet_v1', model_root='~/.torch/models')

Setting GPU: ['0']
Building and initializing squeezenet_v1 parameters


### Evaluate the original model with float32

In [3]:
# Eval model (this is the accuracy at fp32)
val_ds = ds_fetcher(batch_size, data_root=data_root, train=False, input_size=input_size)
acc1, acc5 = misc.eval_model(model_orig, val_ds, ngpu=ngpu, is_imagenet=is_imagenet)
acc1, acc5

Loading pickle object from /tmp/public_dataset/pytorch/imagenet-data/val224.pkl


Building IMAGENET data loader: 50,000 for train; 50,000 for test


=> Done (5.6688 s)


(0.55948, 0.7913)

## Infrastructure for changing models

In [4]:
from copy import deepcopy
from inspect import getfullargspec
def model_apply(model, fn, clone=True):
    """
    Recursively map a Module.
    """
    def _apply(m, path):
        if len(getfullargspec(fn).args) > 1:
            um = fn(m, path or '/')
        else:
            um = fn(m)
            
        if um is not None:
            return um
        else:
            for (k, v) in list(m._modules.items()):
                m._modules[k] = _apply(v, f'{path}/{k}')
            return m
    return _apply(deepcopy(model) if clone else model, '')

### Split convolutions into conv+add, and TODO fold batchnorm

In [8]:
class AddBias(nn.Module):
    def __init__(self, num_features):
        super(AddBias, self).__init__()
        self.num_features = num_features
        self.bias = nn.Parameter(torch.Tensor(num_features))
        
    def forward(self, x):
        return x + self.bias.view([1, self.num_features, 1, 1])
    
def split_conv(m):
    if isinstance(m, nn.Conv2d):
        conv = nn.Conv2d(m.in_channels, m.out_channels, m.kernel_size,
                         stride=m.stride, padding=m.padding,
                         dilation=m.dilation, groups=m.groups, bias=False)
        conv.weight = m.weight
        addbias = AddBias(m.out_channels)
        addbias.bias = m.bias
        return nn.Sequential(OrderedDict([('conv', conv), ('addbias', addbias)]))
model_raw = model_apply(model_orig, split_conv)

# Collect activation statistics

### Function for inserting loggers

In [9]:
class Logger(nn.Module):
    def __init__(self, name):
        super(Logger, self).__init__()
        #print(f'Creating logger for "{name}"')
        self.name = name
        self.log_items = np.zeros((0, 2))
        
    def forward(self, x):
        # (~4.9it/s)
        # NOTE this takes the min/max of the entire batch (100 images) 
        log_item = [ torch.min(x).data.cpu().numpy()[0],
                     torch.max(x).data.cpu().numpy()[0] ]
        self.log_items = np.append(self.log_items, log_item)                
        return x
    
def duplicate_model_with_logging(model):
    loggers = {}
    def insert_logger(m, path):
        if isinstance(m, (nn.Conv2d, AddBias, nn.Linear, nn.BatchNorm1d, nn.BatchNorm2d, nn.AvgPool2d)):
            log_in  = Logger(f'{path}/log_in')
            log_out = Logger(f'{path}/log_out')
            loggers[log_in.name]  = log_in
            loggers[log_out.name] = log_out
            return nn.Sequential(log_in, m, log_out)
    loggers['input'] = Logger('input')
    r = nn.Sequential(loggers['input'],
                      model_apply(model, insert_logger))
    return r, loggers

### Create copy of model with logging, and collect stats over the test set

In [10]:
# Insert loggers
model_log, loggers = duplicate_model_with_logging(model_raw)
# Run over the test set to collect stats
val_ds = ds_fetcher(batch_size, data_root=data_root, train=False, input_size=input_size)
acc1s, acc5s = misc.eval_model(model_log, val_ds, ngpu=ngpu, is_imagenet=is_imagenet)
print(f'Test accuracy of ({acc1s},{acc5s}), a drop of ({acc1-acc1s},{acc5-acc5s})')

Loading pickle object from /tmp/public_dataset/pytorch/imagenet-data/val224.pkl


Building IMAGENET data loader: 50,000 for train; 50,000 for test


=> Done (5.6529 s)


Test accuracy of (0.55948,0.7913), a drop of (0.0,0.0)


### Get scale factors from the collected stats

In [12]:
# We now have statistics for all the activations. Reduce to a scale factor for each one.
def scale_factor_from_range(min, max, max_q_level=(2**7-1)):
    pos_range = builtins.max(abs(min), abs(max))
    # val = q_val * scale_factor; so:
    scale_factor = pos_range / max_q_level
    return scale_factor
def scale_factor_from_stats(ranges, max_q_level=(2**7-1)):
    min_min = np.min(ranges[0])
    max_max = np.max(ranges[1])
    return scale_factor_from_range(min_min, max_max, max_q_level)
scale_factors = { k : scale_factor_from_stats(np.asarray(v.log_items))
                  for (k,v) in loggers.items() }
scale_factors

{'/classifier/1/addbias/log_in': 2.1596730149637056,
 '/classifier/1/addbias/log_out': 2.1598191148652806,
 '/classifier/1/conv/log_in': 4.711094653512549,
 '/classifier/1/conv/log_out': 2.1596730149637056,
 '/classifier/3/log_in': 2.1598191148652806,
 '/classifier/3/log_out': 0.34802837822380966,
 '/features/0/addbias/log_in': 0.14483134953055796,
 '/features/0/addbias/log_out': 0.14690978883758304,
 '/features/0/conv/log_in': 0.020787402400820273,
 '/features/0/conv/log_out': 0.14483134953055796,
 '/features/10/group1/squeeze/addbias/log_in': 6.9815669022207185,
 '/features/10/group1/squeeze/addbias/log_out': 6.981395811546506,
 '/features/10/group1/squeeze/conv/log_in': 3.7115569828063486,
 '/features/10/group1/squeeze/conv/log_out': 6.9815669022207185,
 '/features/10/group2/expand1x1/addbias/log_in': 3.638255201925443,
 '/features/10/group2/expand1x1/addbias/log_out': 3.6382376603254185,
 '/features/10/group2/expand1x1/conv/log_in': 6.981395811546506,
 '/features/10/group2/expand1x

### Visualise scale factors

In [13]:
%matplotlib notebook
import matplotlib.pyplot as plt
import seaborn as sbs
sbs.distplot(list(scale_factors.values()), hist=True, rug=True)

#np.percentile(data, q=[0, 1, 5, 95, 99, 100], axis=0)

  (fname, cnt))
  (fname, cnt))


<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x7f24ef4dd128>

# Modifying for quantised inference

### Functions to quantise a model

In [16]:
def quantise(x, scale_factor):
    # Quantise the activation
    x.div_(scale_factor)
    x.round_()
    # FIXME for now we scale back up
    x.mul_(scale_factor)
def quantise_const(x, scale_factor=None, max_q_level=(2**7-1)):
    if scale_factor is None:
        min = x.min().data.cpu().numpy()[0]
        max = x.max().data.cpu().numpy()[0]
        scale_factor = scale_factor_from_range(min, max, max_q_level)
    quantise(x.data, scale_factor)
    return scale_factor

class Quantise(nn.Module):
    def __init__(self, name, scale_factor):
        super(Quantise, self).__init__()
        self.name = name
        self.scale_factor = scale_factor
        
    def forward(self, x):
        x = x.clone()
        quantise(x, self.scale_factor)
        return x
    
# This just locks values to quantization levels after these ops.
# Also need to:
#  - Quantise weights + biases
#  - Merge Conv2d and BatchNorm weights before quantising
#  - Requantize after convolutions to lower bitwidth
#  - 
def duplicate_model_with_quantisation(model):
    def insert_quantise(m, path):
        # (nn.Conv2d, nn.Linear, nn.BatchNorm1d, nn.BatchNorm2d, nn.AvgPool2d)
        if isinstance(m, (nn.Conv2d)):
            sf_in  = scale_factors[f'{path}/log_in']
            sf_out = scale_factors[f'{path}/log_out']
            quantise_const(m.weight)
            if m.bias is not None:
                quantise_const(m.bias, sf_out, max_q_level=(2**32-1))
            return nn.Sequential(m, Quantise(path, sf_out))
        elif isinstance(m, (AddBias)):
            sf_in  = scale_factors[f'{path}/log_in']
            sf_out = scale_factors[f'{path}/log_out']
            quantise_const(m.weight)
            if m.bias is not None:
                quantise_const(m.bias, sf_out, max_q_level=(2**32-1))
            return nn.Sequential(m, Quantise(path, sf_out))
    return nn.Sequential(Quantise('input', scale_factors['input']),
                         model_apply(model, insert_quantise))

In [17]:
# Quantise model
model_quant = duplicate_model_with_quantisation(model_raw)
# Evaluate accuracy
val_ds = ds_fetcher(batch_size, data_root=data_root, train=False, input_size=input_size)
acc1q, acc5q = misc.eval_model(model_quant, val_ds, ngpu=ngpu, is_imagenet=is_imagenet)
print(f'Test accuracy of ({acc1q},{acc5q}), a drop of ({acc1-acc1q},{acc5-acc5q})')

Loading pickle object from /tmp/public_dataset/pytorch/imagenet-data/val224.pkl


Building IMAGENET data loader: 50,000 for train; 50,000 for test


=> Done (5.6731 s)





Exception in thread Thread-5:
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/tqdm/_tqdm.py", line 144, in run
    for instance in self.tqdm_cls._instances:
  File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/_weakrefset.py", line 60, in __iter__
    for itemref in self.data:
RuntimeError: Set changed size during iteration




Test accuracy of (0.55396,0.78662), a drop of (0.005519999999999969,0.0046800000000000175)


In [58]:
val_ds = ds_fetcher(batch_size, data_root=data_root, train=False, input_size=input_size)
(data, target) = next(val_ds)
data = Variable(torch.FloatTensor(data)).cuda()
model_cuda = torch.nn.DataParallel(model_raw.eval(), device_ids=range(ngpu)).cuda()
trace, out = jit.trace(model_cuda, data)

Loading pickle object from /tmp/public_dataset/pytorch/imagenet-data/val224.pkl


Building IMAGENET data loader, 50000 for train, 50000 for test


=> Done (6.3597 s)


In [65]:
trace.graph()

graph(%1 : Float(100, 3, 224, 224)
      %2 : Float(64, 3, 3, 3)
      %3 : Float(64)
      %4 : Float(16, 64, 1, 1)
      %5 : Float(16)
      %6 : Float(64, 16, 1, 1)
      %7 : Float(64)
      %8 : Float(64, 16, 3, 3)
      %9 : Float(64)
      %10 : Float(16, 128, 1, 1)
      %11 : Float(16)
      %12 : Float(64, 16, 1, 1)
      %13 : Float(64)
      %14 : Float(64, 16, 3, 3)
      %15 : Float(64)
      %16 : Float(32, 128, 1, 1)
      %17 : Float(32)
      %18 : Float(128, 32, 1, 1)
      %19 : Float(128)
      %20 : Float(128, 32, 3, 3)
      %21 : Float(128)
      %22 : Float(32, 256, 1, 1)
      %23 : Float(32)
      %24 : Float(128, 32, 1, 1)
      %25 : Float(128)
      %26 : Float(128, 32, 3, 3)
      %27 : Float(128)
      %28 : Float(48, 256, 1, 1)
      %29 : Float(48)
      %30 : Float(192, 48, 1, 1)
      %31 : Float(192)
      %32 : Float(192, 48, 3, 3)
      %33 : Float(192)
      %34 : Float(48, 384, 1, 1)
      %35 : Float(48)
      %36 : Float(192, 48, 1, 1)
      

In [62]:
print(trace)

graph(%1 : Float(100, 3, 224, 224)
      %2 : Float(64, 3, 3, 3)
      %3 : Float(64)
      %4 : Float(16, 64, 1, 1)
      %5 : Float(16)
      %6 : Float(64, 16, 1, 1)
      %7 : Float(64)
      %8 : Float(64, 16, 3, 3)
      %9 : Float(64)
      %10 : Float(16, 128, 1, 1)
      %11 : Float(16)
      %12 : Float(64, 16, 1, 1)
      %13 : Float(64)
      %14 : Float(64, 16, 3, 3)
      %15 : Float(64)
      %16 : Float(32, 128, 1, 1)
      %17 : Float(32)
      %18 : Float(128, 32, 1, 1)
      %19 : Float(128)
      %20 : Float(128, 32, 3, 3)
      %21 : Float(128)
      %22 : Float(32, 256, 1, 1)
      %23 : Float(32)
      %24 : Float(128, 32, 1, 1)
      %25 : Float(128)
      %26 : Float(128, 32, 3, 3)
      %27 : Float(128)
      %28 : Float(48, 256, 1, 1)
      %29 : Float(48)
      %30 : Float(192, 48, 1, 1)
      %31 : Float(192)
      %32 : Float(192, 48, 3, 3)
      %33 : Float(192)
      %34 : Float(48, 384, 1, 1)
      %35 : Float(48)
      %36 : Float(192, 48, 1, 1)
      

In [32]:
# We do dumb quantisation; no fine-tuning.

weights_bw    = 8
biases_bw     = 32
activation_bw = 8
overflow_rate = 0.0
n_sample      = 20
quant_method  = "linear"
    
def duplicate_model_with_quant(model, bits, overflow_rate=0.0, counter=10, type='linear'):
    """assume that original model has at least a nn.Sequential"""
    assert type in ['linear', 'minmax', 'log', 'tanh']
    if isinstance(model, nn.Sequential):
        print(f'> Sequential')
        l = OrderedDict()
        for k, v in model._modules.items():
            print(f'Looking at : {k}')
            if isinstance(v, (nn.Conv2d, nn.Linear, nn.BatchNorm1d, nn.BatchNorm2d, nn.AvgPool2d)):
                parameters = list(v.parameters())
                if isinstance(v, nn.Conv2d):
                    print(f"Found Conv2D:")
                    print(f"  weights: [o,i,k,k] {parameters[0].shape}")
                    print(f"  biases : [o] {parameters[1].shape}")
                l[k] = v
                
            else:
                l[k] = duplicate_model_with_quant(v, bits, overflow_rate, counter, type)
        m = nn.Sequential(l)
        return m
    else:
        print(f'> Not Sequential')
        for k, v in model._modules.items():
            model._modules[k] = duplicate_model_with_quant(v, bits, overflow_rate, counter, type)
        return model

model_quant = duplicate_model_with_quant(model_raw, bits=activation_bw, overflow_rate=overflow_rate,
                                               counter=n_sample, type=quant_method)

> Not Sequential
> Sequential
Looking at : 0
Found Conv2D:
  weights: [o,i,k,k] torch.Size([64, 3, 3, 3])
  biases : [o] torch.Size([64])
Looking at : 0_linear_quant
> Not Sequential
Looking at : 1
> Not Sequential
Looking at : 2
> Not Sequential
Looking at : 3
> Not Sequential
> Sequential
Looking at : squeeze
Found Conv2D:
  weights: [o,i,k,k] torch.Size([16, 64, 1, 1])
  biases : [o] torch.Size([16])
Looking at : squeeze_linear_quant
> Not Sequential
Looking at : squeeze_activation
> Not Sequential
> Sequential
Looking at : expand1x1
Found Conv2D:
  weights: [o,i,k,k] torch.Size([64, 16, 1, 1])
  biases : [o] torch.Size([64])
Looking at : expand1x1_linear_quant
> Not Sequential
Looking at : expand1x1_activation
> Not Sequential
> Sequential
Looking at : expand3x3
Found Conv2D:
  weights: [o,i,k,k] torch.Size([64, 16, 3, 3])
  biases : [o] torch.Size([64])
Looking at : expand3x3_linear_quant
> Not Sequential
Looking at : expand3x3_activation
> Not Sequential
Looking at : 4
> Not Sequ

In [None]:
# eval model
val_ds = ds_fetcher(batch_size, data_root=data_root, train=False, input_size=input_size)
acc1q, acc5q = misc.eval_model(model_quant, val_ds, ngpu=ngpu, is_imagenet=is_imagenet)

In [25]:
acc1q, acc5q

NameError: name 'acc1q' is not defined

# Exporting model
There is the existing ONNX exporter. Either modify that, or manually add functions to each type of Module and call them recursively.

# Modifying for quantised training
Easiest would be to modify SqN source.

## MobileNet
Search GitHub; there seem to be plenty of implementations. E.g. https://github.com/marvis/pytorch-mobilenet
There are also implementations for v2.

## MobileNet-SSD
Try modifying https://github.com/amdegroot/ssd.pytorch:
  - Use MobileNet feature extractor
  - Use dw convolutions in SSD (SSDLite)

In [8]:
model_raw.state_dict().keys()

odict_keys(['features.0.weight', 'features.0.bias', 'features.3.group1.squeeze.weight', 'features.3.group1.squeeze.bias', 'features.3.group2.expand1x1.weight', 'features.3.group2.expand1x1.bias', 'features.3.group3.expand3x3.weight', 'features.3.group3.expand3x3.bias', 'features.4.group1.squeeze.weight', 'features.4.group1.squeeze.bias', 'features.4.group2.expand1x1.weight', 'features.4.group2.expand1x1.bias', 'features.4.group3.expand3x3.weight', 'features.4.group3.expand3x3.bias', 'features.6.group1.squeeze.weight', 'features.6.group1.squeeze.bias', 'features.6.group2.expand1x1.weight', 'features.6.group2.expand1x1.bias', 'features.6.group3.expand3x3.weight', 'features.6.group3.expand3x3.bias', 'features.7.group1.squeeze.weight', 'features.7.group1.squeeze.bias', 'features.7.group2.expand1x1.weight', 'features.7.group2.expand1x1.bias', 'features.7.group3.expand3x3.weight', 'features.7.group3.expand3x3.bias', 'features.9.group1.squeeze.weight', 'features.9.group1.squeeze.bias', 'featu

In [1]:
[1,2,3][:1]

[1]