## Accuracy

In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"  
os.environ["CUDA_VISIBLE_DEVICES"]="0,1,2,3" 

In [2]:
from catalyst.dl.runner import SupervisedRunner
from catalyst.dl.callbacks import EarlyStoppingCallback, AccuracyCallback
from catalyst.dl.core import Callback

from collections import OrderedDict, defaultdict

import torch
import numpy as np

In [3]:
loaders = OrderedDict()

In [4]:
import sys
sys.path.append('../')
import dataloaders

import torchvision.datasets as datasets
from torchvision import transforms

# DATA_ROOT = "/workspace/raid/data/datasets"
DATA_ROOT = "/gpfs/gpfs0/e.ponomarev/"
dataset_name = 'imagenet'

bs = 128
num_workers = 16

if dataset_name == 'cifar10':
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    DATA_PATH = "{}/cifar10".format(DATA_ROOT)

    loaders["valid"] = torch.utils.data.DataLoader(
        datasets.CIFAR10(root=DATA_PATH, train=False, transform=transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]),  download = True),
        batch_size=bs, shuffle=False,
        num_workers=num_workers, pin_memory=True)
elif dataset_name == 'imagenet':
    loaders["valid"] = dataloaders.get_loader(batch_size=bs,
                                        data_name = 'imagenet',
                                        data_root = DATA_ROOT,
                                        num_workers = num_workers, 
                                        pin_memory = True)['val']

Building imagenet data loader with 16 workers


In [5]:
import sys
sys.path.append('../')

from model_utils import load_model
from torchvision.models import resnet34, resnet18

# MODEL_NAME = 'vgg16_imagenet'
# MODEL_NAME = 'resnet50_imagenet'
MODEL_NAME = 'resnet18_imagenet'
# MODEL_NAME = 'resnet34_imagenet'

# MODEL_NAME = 'faster_rcnn_vgg16
# MODEL_NAME = 'faster_rcnn_resnet50'


# # Uncomment if MODEL_NAME = 'faster_rcnn_resnet50'
# sys.path.append('/workspace/home/jgusak/maxvol_objects/facebook_frcnn/')
# import maskrcnn_benchmark

# model = load_model(MODEL_NAME)

model = resnet18(pretrained = True)
model = model.eval()

In [6]:
for p in model.parameters():
#     print(p.requires_grad)
    p.requires_grad = False

In [7]:
from catalyst.dl.callbacks import InferCallback
from collections import OrderedDict

runner = SupervisedRunner()

model = model.to('cuda')
runner.infer(
    model=model,
    loaders = OrderedDict([("infer", loaders["valid"])]),
    callbacks=[InferCallback(),
               AccuracyCallback(accuracy_args=[1, 3, 5])],
    verbose = True
)

0/1 * Epoch (infer): 100% 391/391 [11:11<00:00,  3.10it/s, _timers/_fps=461.658, accuracy01=42.500, accuracy03=73.750, accuracy05=77.500] 


### Get  all  layers

Function  **get_layer_names()** returns names of model layers (convolutional and fully connected) and boolean mask for convolutional layers. 

In [8]:
from model_utils import get_layer_names

layer_names, conv_layer_mask = get_layer_names(model)


fc_layer_mask = (1 - conv_layer_mask).astype(bool)

print(layer_names[conv_layer_mask])
print(layer_names[fc_layer_mask])

['conv1' 'layer1.0.conv1' 'layer1.0.conv2' 'layer1.1.conv1'
 'layer1.1.conv2' 'layer2.0.conv1' 'layer2.0.conv2'
 'layer2.0.downsample.0' 'layer2.1.conv1' 'layer2.1.conv2'
 'layer3.0.conv1' 'layer3.0.conv2' 'layer3.0.downsample.0'
 'layer3.1.conv1' 'layer3.1.conv2' 'layer4.0.conv1' 'layer4.0.conv2'
 'layer4.0.downsample.0' 'layer4.1.conv1' 'layer4.1.conv2']
['fc']


In [9]:
# auxiliary function
import numpy as np
def split_resnet_layers_by_blocks(lnames):
#     starts = ['body.stem.conv1'] + ['body.layer{}'.format(i) for i in range(1,5)]
    starts = ['conv1'] + ['layer{}'.format(i) for i in range(1,5)]


    start_idx = 0
    blocks_idxs = []
    layer_names_by_blocks = []

    for s in starts:
        curr_block =  [l for l in lnames if l.startswith(s)]
        layer_names_by_blocks.append(curr_block)

        blocks_idxs.append(np.arange(start_idx, start_idx+len(curr_block)))
        start_idx += len(curr_block)

    return blocks_idxs

### Compress selected layers

For **convolutional** layers
- Set **decomposition**: 'tucker2', 'cp3' or 'cp4'
- Set  decomposition **ranks** for convolutional layers (namely, ranks we use to decompose convolutional weight tensors). 
  - In Tucker2 case, for one layer 
      - If **rank = None**, the layer won't be decomposed.
      - Elif **rank = 0**, then  VBMF method with **vbmf_weaken_factor**  will be used to select (rank_cout, rank_cin).
      - Elif **rank = (-scalar) < 0**, then values (rank_cout, rank_cin) will be choosen as maximal values which allow **(sacalar x) layer parameter reduction**.
      - Else **rank = tuple** and determines absolute ranks values (rank_cout, rank_cin)
  - In CP case, rank for one layer is a scalar
      - If **rank = None**, the layer won't be decomposed.
      - Elif **rank = (-scalar) < 0** then value for rank will be choosen as maximal rank which allows **(sacalar x) layer parameter reduction**.
      - Else **rank = scalar > 0** and determines absolute rank value.
      
For **fully connected** layers
- Set **decomposition** = 'svd'
- Set decomposition for linear layers (namely, ranks we use to factorize weight matrices)
    - In SVD case, rank for one layer is a scalar
      - If **rank = None**, the layer won't be decomposed.
      - Elif **rank = 0**, then  VBMF method with **vbmf_weaken_factor**  will be used to select rank.
      - Elif **rank = (-scalar) < 0** then value for rank will be choosen as maximal rank which allows **(sacalar x) layer parameter reduction**.
      - Else **rank = scalar > 0** and determines absolute rank value.

In [7]:
!pip install  tensorly --user



In [21]:
from tensor_compression import get_compressed_model
import copy
import numpy as np

# decomposition_conv = 'cp4'
# decomposition_conv = 'cp3'
decomposition_conv = 'tucker2'

decomposition_fc = 'svd'

RANK_SELECTION = 'vbmf'
# RANK_SELECTION = 'nx'
# RANK_SELECTION = 'custom'

if RANK_SELECTION == 'vbmf':
    WEAKEN_FACTOR = 0.1
    X_FACTOR = 0
    rank_selection_suffix = "/wf:{}".format(WEAKEN_FACTOR)
elif RANK_SELECTION == 'nx':
    WEAKEN_FACTOR = None  
    X_FACTOR = 10
    rank_selection_suffix = "/{}x".format(X_FACTOR)
    
    
if MODEL_NAME == 'vgg16_imagenet':
    ranks_conv = [None] + [-X_FACTOR]*(len(layer_names[conv_layer_mask])-1)

elif MODEL_NAME == 'resnet50_imagenet':
    ranks_conv = [None if not name.endswith('conv2') else -X_FACTOR
                  for name in layer_names[conv_layer_mask]]

elif MODEL_NAME in ['resnet18_imagenet', 'resnet34_imagenet']:
    ranks_conv = [None if name == 'conv1' or not (name.endswith('conv2') or
                                                  name.endswith('conv1')) else -X_FACTOR
              for name in layer_names[conv_layer_mask]]

elif MODEL_NAME ==  'faster_rcnn_resnet50':
    ranks_conv = [None if not name.endswith('body.conv2') else -X_FACTOR
                  for name in layer_names[conv_layer_mask]]

# ranks_fc = [-X_FACTOR]*(len(layer_names[fc_layer_mask]))
ranks_fc = [None]*(len(layer_names[fc_layer_mask]))

    
    

ranks = np.array([None]*len(layer_names))
ranks[conv_layer_mask] = ranks_conv
ranks[fc_layer_mask] = ranks_fc

decompositions = np.array([None]*len(layer_names))
decompositions[conv_layer_mask] = decomposition_conv
decompositions[fc_layer_mask] = decomposition_fc

CONV_SPLIT = 1
FC_SPLIT = 1
n_layers = len(layer_names)

RESNET_SPLIT = False
if MODEL_NAME in ['resnet50_imagenet', 'resnet34_imagenet', 'resnet18_imagenet',  'faster_rcnn_resnet50'] and RESNET_SPLIT:
    split_tuples = split_resnet_layers_by_blocks(layer_names[conv_layer_mask])[::-1]
else:
    split_tuples = np.array_split(np.arange(n_layers)[conv_layer_mask], CONV_SPLIT)[::-1]
split_tuples.append(np.array_split(np.arange(n_layers)[fc_layer_mask], FC_SPLIT))

In [22]:
for tupl in split_tuples:
    print(layer_names[tupl])

['conv1' 'layer1.0.conv1' 'layer1.0.conv2' 'layer1.1.conv1'
 'layer1.1.conv2' 'layer2.0.conv1' 'layer2.0.conv2'
 'layer2.0.downsample.0' 'layer2.1.conv1' 'layer2.1.conv2'
 'layer3.0.conv1' 'layer3.0.conv2' 'layer3.0.downsample.0'
 'layer3.1.conv1' 'layer3.1.conv2' 'layer4.0.conv1' 'layer4.0.conv2'
 'layer4.0.downsample.0' 'layer4.1.conv1' 'layer4.1.conv2']
['fc']



Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.



In [25]:
import time

scores = defaultdict(list)

compressed_model = copy.deepcopy(model.cpu())
for tupl in split_tuples:
    lname, rank, decomposition = layer_names[tupl], ranks[tupl], decompositions[tupl]
    print(lname, rank)
    start = time.time()
    compressed_model = get_compressed_model(compressed_model,
                                            ranks=rank,
                                            layer_names=lname,
                                            decompositions = decomposition,
                                            vbmf_weaken_factor=WEAKEN_FACTOR)
    end = time.time()
    print('================ time : {}'.format(end - start))
    score = run(copy.deepcopy(compressed_model))
    print(score)
#     scores[layer_names[tupl[0]][0]] = score
    

['conv1' 'layer1.0.conv1' 'layer1.0.conv2' 'layer1.1.conv1'
 'layer1.1.conv2' 'layer2.0.conv1' 'layer2.0.conv2'
 'layer2.0.downsample.0' 'layer2.1.conv1' 'layer2.1.conv2'
 'layer3.0.conv1' 'layer3.0.conv2' 'layer3.0.downsample.0'
 'layer3.1.conv1' 'layer3.1.conv2' 'layer4.0.conv1' 'layer4.0.conv2'
 'layer4.0.downsample.0' 'layer4.1.conv1' 'layer4.1.conv2'] [None 0 0 0 0 0 0 None 0 0 0 0 None 0 0 0 0 None 0 0]
-------------------- SVD time:  0.019542694091796875
------------------- VBMF time :  0.02077317237854004
-------------------- SVD time:  0.01649022102355957
------------------- VBMF time :  0.01769709587097168
-------------------- SVD time:  0.016348838806152344
------------------- VBMF time :  0.01729607582092285
-------------------- SVD time:  0.016177892684936523
------------------- VBMF time :  0.01706409454345703
-------------------- SVD time:  0.0170290470123291
------------------- VBMF time :  0.017976045608520508
-------------------- SVD time:  0.018669843673706055
------

ModuleNotFoundError: No module named 'catalyst.dl.metrics'

# Count parameters

In [28]:
from collections import defaultdict

def count_params(model):
    n_params = 0
    
    for name, param in model.named_parameters():
        n_params += param.numel()
    return n_params

In [29]:
params_count_dict_m = count_params(model)
params_count_dict_cm = count_params(compressed_model)

params_count_dict_m / params_count_dict_cm

7.232299048353415

In [None]:
split_tuples

# Compute FLOPS

In [30]:
# compressed_model = model

In [15]:
import sys
sys.path.append("../")

from flopco import FlopCo

In [32]:
model.to('cpu')
compressed_model.to('cpu')

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [33]:
flopco_m = FlopCo(model)
flopco_cm = FlopCo(compressed_model)

In [34]:
flopco_m.total_flops / flopco_cm.total_flops

3.2223035384295993

In [35]:
flopco_m.flops

defaultdict(list,
            {'conv1': [236027904],
             'bn1': [3211264],
             'relu': [802816],
             'maxpool': [1806336],
             'layer1.0.conv1': [231211008],
             'layer1.0.bn1': [802816],
             'layer1.0.relu': [200704, 200704],
             'layer1.0.conv2': [231211008],
             'layer1.0.bn2': [802816],
             'layer1.1.conv1': [231211008],
             'layer1.1.bn1': [802816],
             'layer1.1.relu': [200704, 200704],
             'layer1.1.conv2': [231211008],
             'layer1.1.bn2': [802816],
             'layer1.2.conv1': [231211008],
             'layer1.2.bn1': [802816],
             'layer1.2.relu': [200704, 200704],
             'layer1.2.conv2': [231211008],
             'layer1.2.bn2': [802816],
             'layer2.0.conv1': [115605504],
             'layer2.0.bn1': [401408],
             'layer2.0.relu': [100352, 100352],
             'layer2.0.conv2': [231211008],
             'layer2.0.bn2': [401

3x3xcxc

params: 9cc
flops: 9cchw

1x1xcxr, 3x3x1xr, 1x1xrxc

params: cr + 9r + cr= r(2c + 9)
flops: crhw + 9rhw + crhw = r(2c + 9)hw

paramx_nx = 9cc / r(2c + 9)
flops_nx = 9cchw / r(2c + 9)hw

# Time

In [36]:
model.to('cuda')
compressed_model.to('cuda')

x = torch.randn(32, 3, 224, 224).cuda()

In [37]:
%timeit y = model(x)

14.7 ms ± 7.41 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [38]:
%timeit y = compressed_model(x)

10.4 ms ± 284 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [39]:
model.to('cpu')
compressed_model.to('cpu')

x = torch.randn(1, 3, 224, 224)

In [40]:
%timeit y = model(x)

4.47 s ± 113 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [41]:
%timeit y = compressed_model(x)

20.2 s ± 1.94 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
