In [1]:
import os, sys
import torch
import torch.nn as nn
from torchvision import models
from py_auto_fact import auto_fact

In [2]:
def count_param(module, trainable=False):
    if trainable:
        return sum(p.numel() for p in module.parameters() if p.requires_grad)
    else:
        return sum(p.numel() for p in module.parameters())

# Init Model

In [3]:
model = models.vgg16(pretrained=True)
model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

# Factorize Model

In [4]:
count_param(model)

138357544

### Apply absolute rank

In [25]:
%%time
fact_model = auto_fact(model, rank=256, deepcopy=True, solver='random')
count_param(fact_model)

CPU times: user 4 ms, sys: 4 ms, total: 8 ms
Wall time: 6.87 ms


25596712

In [6]:
%%time
fact_model = auto_fact(model, rank=256, deepcopy=True, solver='svd')
count_param(fact_model)



CPU times: user 32.7 s, sys: 1.83 s, total: 34.6 s
Wall time: 3.67 s


25596712

In [7]:
%%time
fact_model = auto_fact(model, rank=256, deepcopy=True, solver='snmf')
count_param(fact_model)

CPU times: user 1min 10s, sys: 4.72 s, total: 1min 14s
Wall time: 9.35 s


25596712

In [8]:
%%time
fact_model = auto_fact(model, rank=256, deepcopy=True, solver='random', num_iter=50)
count_param(fact_model)

CPU times: user 436 ms, sys: 4 ms, total: 440 ms
Wall time: 94.1 ms


25596712

In [9]:
%%time
fact_model = auto_fact(model, rank=256, deepcopy=True, solver='svd', num_iter=50)
count_param(fact_model)

CPU times: user 3min 2s, sys: 11.8 s, total: 3min 14s
Wall time: 23.7 s


25596712

In [10]:
%%time
fact_model = auto_fact(model, rank=256, deepcopy=True, solver='snmf', num_iter=50)
count_param(fact_model)

CPU times: user 5min 20s, sys: 22.6 s, total: 5min 43s
Wall time: 41.4 s


25596712

### Apply percentage rank

In [11]:
%%time
fact_model = auto_fact(model, rank=0.4, deepcopy=True, solver='random', num_iter=50)
count_param(fact_model)

CPU times: user 868 ms, sys: 32 ms, total: 900 ms
Wall time: 335 ms


52613904

In [12]:
%%time
fact_model = auto_fact(model, rank=0.4, deepcopy=True, solver='svd', num_iter=50)
count_param(fact_model)

CPU times: user 11min 34s, sys: 1min 15s, total: 12min 50s
Wall time: 1min 27s


52613904

In [13]:
%%time
fact_model = auto_fact(model, rank=0.4, deepcopy=True, solver='snmf', num_iter=50)
count_param(fact_model)

CPU times: user 13min 30s, sys: 2min 2s, total: 15min 32s
Wall time: 1min 44s


52613904

# Test on CPU

### Test Inference CPU

In [14]:
%%timeit
with torch.no_grad():
    y = model(torch.zeros(16,3,224,224, dtype=torch.float))

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


832 ms ± 141 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [15]:
%%timeit
with torch.no_grad():
    y = fact_model(torch.zeros(16,3,224,224, dtype=torch.float))

493 ms ± 96.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Test Forward-Backward CPU

In [16]:
%%timeit
y = model(torch.zeros(8,3,224,224, dtype=torch.float))
y.sum().backward()

1.65 s ± 612 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [17]:
%%timeit
y = fact_model(torch.zeros(8,3,224,224, dtype=torch.float))
y.sum().backward()

641 ms ± 142 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Test on GPU

### Move models to GPU

In [18]:
model = model.cuda()
fact_model = fact_model.cuda()

### Test Inference GPU

In [19]:
x = torch.zeros(16,3,224,224, dtype=torch.float).cuda()

In [20]:
%%timeit
with torch.no_grad():
    y = model(x)

41.3 ms ± 188 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [21]:
%%timeit
with torch.no_grad():
    y = fact_model(x)

25.8 ms ± 5.84 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


### Test Forward-Backward GPU

In [22]:
x = torch.zeros(8,3,224,224, dtype=torch.float).cuda()

In [23]:
%%timeit
y = model(x)
y.sum().backward()

72.1 ms ± 109 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [24]:
%%timeit
y = fact_model(x)
y.sum().backward()

46.3 ms ± 28.8 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
