In [1]:
import sys
sys.path.append('../')

from model_utils import load_model

# MODEL_NAME = 'vgg16_imagenet'
MODEL_NAME = 'resnet50_imagenet'
# MODEL_NAME = 'resnet18_imagenet'

# MODEL_NAME = 'faster_rcnn_vgg16
# MODEL_NAME = 'faster_rcnn_resnet50'


# # Uncomment if MODEL_NAME = 'faster_rcnn_resnet50'
# sys.path.append('/workspace/home/jgusak/maxvol_objects/facebook_frcnn/')
# import maskrcnn_benchmark

model = load_model(MODEL_NAME)
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=F

### Get  all  layers

Function  **get_layer_names()** returns names of model layers (convolutional and fully connected) and boolean mask for convolutional layers. 

In [5]:
from model_utils import get_layer_names

layer_names, conv_layer_mask = get_layer_names(model)

fc_layer_mask = (1 - conv_layer_mask).astype(bool)

print(layer_names[conv_layer_mask])
print(layer_names[fc_layer_mask])

['conv1' 'layer1.0.conv1' 'layer1.0.conv2' 'layer1.0.conv3'
 'layer1.0.downsample.0' 'layer1.1.conv1' 'layer1.1.conv2' 'layer1.1.conv3'
 'layer1.2.conv1' 'layer1.2.conv2' 'layer1.2.conv3' 'layer2.0.conv1'
 'layer2.0.conv2' 'layer2.0.conv3' 'layer2.0.downsample.0' 'layer2.1.conv1'
 'layer2.1.conv2' 'layer2.1.conv3' 'layer2.2.conv1' 'layer2.2.conv2'
 'layer2.2.conv3' 'layer2.3.conv1' 'layer2.3.conv2' 'layer2.3.conv3'
 'layer3.0.conv1' 'layer3.0.conv2' 'layer3.0.conv3' 'layer3.0.downsample.0'
 'layer3.1.conv1' 'layer3.1.conv2' 'layer3.1.conv3' 'layer3.2.conv1'
 'layer3.2.conv2' 'layer3.2.conv3' 'layer3.3.conv1' 'layer3.3.conv2'
 'layer3.3.conv3' 'layer3.4.conv1' 'layer3.4.conv2' 'layer3.4.conv3'
 'layer3.5.conv1' 'layer3.5.conv2' 'layer3.5.conv3' 'layer4.0.conv1'
 'layer4.0.conv2' 'layer4.0.conv3' 'layer4.0.downsample.0' 'layer4.1.conv1'
 'layer4.1.conv2' 'layer4.1.conv3' 'layer4.2.conv1' 'layer4.2.conv2'
 'layer4.2.conv3']
['fc']


In [6]:
# auxiliary function
import numpy as np
def split_resnet_layers_by_blocks(lnames):
    starts = ['body.stem.conv1'] + ['body.layer{}'.format(i) for i in range(1,5)]

    start_idx = 0
    blocks_idxs = []
    layer_names_by_blocks = []

    for s in starts:
        curr_block =  [l for l in lnames if l.startswith(s)]
        layer_names_by_blocks.append(curr_block)

        blocks_idxs.append(np.arange(start_idx, start_idx+len(curr_block)))
        start_idx += len(curr_block)

    return blocks_idxs

### Compress selected layers

For **convolutional** layers
- Set **decomposition**: 'tucker2', 'cp3' or 'cp4'
- Set  decomposition **ranks** for convolutional layers (namely, ranks we use to decompose convolutional weight tensors). 
  - In Tucker2 case, for one layer 
      - If **rank = None**, the layer won't be decomposed.
      - Elif **rank = 0**, then  VBMF method with **vbmf_weaken_factor**  will be used to select (rank_cout, rank_cin).
      - Elif **rank = (-scalar) < 0**, then values (rank_cout, rank_cin) will be choosen as maximal values which allow **(sacalar x) layer parameter reduction**.
      - Else **rank = tuple** and determines absolute ranks values (rank_cout, rank_cin)
  - In CP case, rank for one layer is a scalar
      - If **rank = None**, the layer won't be decomposed.
      - Elif **rank = (-scalar) < 0** then value for rank will be choosen as maximal rank which allows **(sacalar x) layer parameter reduction**.
      - Else **rank = scalar > 0** and determines absolute rank value.
      
For **fully connected** layers
- Set **decomposition** = 'svd'
- Set decomposition for linear layers (namely, ranks we use to factorize weight matrices)
    - In SVD case, rank for one layer is a scalar
      - If **rank = None**, the layer won't be decomposed.
      - Elif **rank = 0**, then  VBMF method with **vbmf_weaken_factor**  will be used to select rank.
      - Elif **rank = (-scalar) < 0** then value for rank will be choosen as maximal rank which allows **(sacalar x) layer parameter reduction**.
      - Else **rank = scalar > 0** and determines absolute rank value.

In [10]:
from tensor_compression import get_compressed_model
import copy
import numpy as np

# decomposition_conv = 'cp4'
# decomposition_conv = 'cp3'
decomposition_conv = 'tucker2'

decomposition_fc = 'svd'

# RANK_SELECTION = 'vbmf'
RANK_SELECTION = 'nx'
# RANK_SELECTION = 'custom'

if RANK_SELECTION == 'vbmf':
    WEAKEN_FACTOR = 1.0
    X_FACTOR = 0
    rank_selection_suffix = "/wf:{}".format(WEAKEN_FACTOR)
elif RANK_SELECTION == 'nx':
    WEAKEN_FACTOR = None  
    X_FACTOR = 40
    rank_selection_suffix = "/{}x".format(X_FACTOR)
    
    
if MODEL_NAME == 'vgg16_imagenet':
    ranks_conv = [None] + [-X_FACTOR]*(len(layer_names[conv_layer_mask])-1)
    ranks_fc = [-X_FACTOR]*(len(layer_names[fc_layer_mask]))
elif MODEL_NAME == 'resnet50_imagenet':
    ranks_conv = [None if not name.endswith('conv2') else -X_FACTOR
                  for name in layer_names[conv_layer_mask]]
    ranks_fc = [-X_FACTOR]*(len(layer_names[fc_layer_mask]))
elif MODEL_NAME == 'resnet18_imagenet':
    ranks_conv = [None if name == 'conv1' or not (name.endswith('conv2') or
                                                  name.endswith('conv1')) else -X_FACTOR
              for name in layer_names[conv_layer_mask]]
    ranks_fc = [-X_FACTOR]*(len(layer_names[fc_layer_mask]))
elif MODEL_NAME ==  'faster_rcnn_resnet50':
    ranks_conv = [None if not name.endswith('body.conv2') else -X_FACTOR
                  for name in layer_names[conv_layer_mask]]
    ranks_fc = [-X_FACTOR]*(len(layer_names[fc_layer_mask]))
    
    

ranks = np.array([None]*len(layer_names))
ranks[conv_layer_mask] = ranks_conv
ranks[fc_layer_mask] = ranks_fc

decompositions = np.array([None]*len(layer_names))
decompositions[conv_layer_mask] = decomposition_conv
decompositions[fc_layer_mask] = decomposition_fc

CONV_SPLIT = 3
FC_SPLIT = 1
n_layers = len(layer_names)

RESNET_SPLIT = False
if MODEL_NAME in ['resnet50_imagenet', 'resnet18_imagenet',  'faster_rcnn_resnet50'] and RESNET_SPLIT:
    split_tuples = split_resnet_layers_by_blocks(layer_names[conv_layer_mask])[::-1]
else:
    split_tuples = np.array_split(np.arange(n_layers)[conv_layer_mask], CONV_SPLIT)[::-1]
split_tuples.append(np.array_split(np.arange(n_layers)[fc_layer_mask], FC_SPLIT))

In [11]:
split_tuples

[array([36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52]),
 array([18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
        35]),
 array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17]),
 [array([53])]]

In [12]:
compressed_model = copy.deepcopy(model)
for tupl in split_tuples:
    lname, rank, decomposition = layer_names[tupl], ranks[tupl], decompositions[tupl]
    print(lname, rank)
    compressed_model = get_compressed_model(compressed_model,
                                            ranks=rank,
                                            layer_names=lname,
                                            decompositions = decomposition,
                                            vbmf_weaken_factor=WEAKEN_FACTOR)

['layer3.3.conv3' 'layer3.4.conv1' 'layer3.4.conv2' 'layer3.4.conv3'
 'layer3.5.conv1' 'layer3.5.conv2' 'layer3.5.conv3' 'layer4.0.conv1'
 'layer4.0.conv2' 'layer4.0.conv3' 'layer4.0.downsample.0' 'layer4.1.conv1'
 'layer4.1.conv2' 'layer4.1.conv3' 'layer4.2.conv1' 'layer4.2.conv2'
 'layer4.2.conv3'] [None None -40 None None -40 None None -40 None None None -40 None None -40
 None]


KeyboardInterrupt: 

In [None]:
compressed_model

# Count parameters

In [14]:
from collections import defaultdict

def count_params(model):
    n_params = 0
    
    for name, param in model.named_parameters():
        n_params += param.numel()
    return n_params

In [15]:
params_count_dict_m = count_params(model)
params_count_dict_cm = count_params(compressed_model)

params_count_dict_m / params_count_dict_cm

2.0411385093278325

In [16]:
split_tuples

[array([36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52]),
 array([18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
        35]),
 array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17]),
 [array([53])]]

# Compute FLOPS

In [2]:
# compressed_model = model

In [17]:
import sys
sys.path.append("../")

from flopco import FlopCo

In [25]:
flopco_m = FlopCo(model)
flopco_cm = FlopCo(compressed_model)

In [27]:
flopco_m.total_flops / flopco_cm.total_flops

1.782351684812708

In [28]:
flopco_m.flops

defaultdict(list,
            {'conv1': [236027904],
             'layer1.0.conv1': [25690112],
             'layer1.0.conv2': [231211008],
             'layer1.0.conv3': [102760448],
             'layer1.0.downsample.0': [102760448],
             'layer1.1.conv1': [102760448],
             'layer1.1.conv2': [231211008],
             'layer1.1.conv3': [102760448],
             'layer1.2.conv1': [102760448],
             'layer1.2.conv2': [231211008],
             'layer1.2.conv3': [102760448],
             'layer2.0.conv1': [205520896],
             'layer2.0.conv2': [231211008],
             'layer2.0.conv3': [102760448],
             'layer2.0.downsample.0': [205520896],
             'layer2.1.conv1': [102760448],
             'layer2.1.conv2': [231211008],
             'layer2.1.conv3': [102760448],
             'layer2.2.conv1': [102760448],
             'layer2.2.conv2': [231211008],
             'layer2.2.conv3': [102760448],
             'layer2.3.conv1': [102760448],
          