In [1]:
import os, sys
import torch
import torch.nn as nn
from torchvision import models
from py_auto_fact import auto_fact

In [2]:
def count_param(module, trainable=False):
    if trainable:
        return sum(p.numel() for p in module.parameters() if p.requires_grad)
    else:
        return sum(p.numel() for p in module.parameters())

# Init Model

In [3]:
model = models.vgg16(pretrained=True)
model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

# Factorize Model

In [5]:
count_param(model)

138357544

### Apply absolute rank

In [6]:
%%time
fact_model = auto_fact(model, rank=256, deepcopy=True, solver='random', eigen_threshold=0.6)
count_param(fact_model)



CPU times: user 39.5 s, sys: 2.55 s, total: 42.1 s
Wall time: 4.59 s


130323240

In [7]:
%%time
fact_model = auto_fact(model, rank=256, deepcopy=True, solver='svd', eigen_threshold=0.6)
count_param(fact_model)

CPU times: user 49.4 s, sys: 3.6 s, total: 53 s
Wall time: 5.72 s


130323240

In [8]:
%%time
fact_model = auto_fact(model, rank=256, deepcopy=True, solver='snmf', eigen_threshold=0.6)
count_param(fact_model)

CPU times: user 47.7 s, sys: 3.08 s, total: 50.8 s
Wall time: 5.45 s


130323240

In [9]:
%%time
fact_model = auto_fact(model, rank=256, deepcopy=True, solver='random', num_iter=50, eigen_threshold=0.0)
count_param(fact_model)

CPU times: user 40 s, sys: 3.41 s, total: 43.4 s
Wall time: 4.69 s


20353832

In [10]:
%%time
fact_model = auto_fact(model, rank=256, deepcopy=True, solver='svd', num_iter=50, eigen_threshold=0.0)
count_param(fact_model)

CPU times: user 2min 38s, sys: 13.9 s, total: 2min 52s
Wall time: 19.5 s


20353832

In [11]:
%%time
fact_model = auto_fact(model, rank=256, deepcopy=True, solver='snmf', num_iter=50, eigen_threshold=0.0)
count_param(fact_model)

CPU times: user 4min 1s, sys: 13.3 s, total: 4min 14s
Wall time: 25.7 s


20353832

### Apply percentage rank

In [12]:
%%time
fact_model = auto_fact(model, rank=0.4, deepcopy=True, solver='random', num_iter=50)
count_param(fact_model)

CPU times: user 692 ms, sys: 8 ms, total: 700 ms
Wall time: 302 ms


69476355

In [13]:
%%time
fact_model = auto_fact(model, rank=0.4, deepcopy=True, solver='svd', num_iter=50)
count_param(fact_model)

CPU times: user 10min 40s, sys: 1min 12s, total: 11min 52s
Wall time: 1min 15s


69476355

In [14]:
%%time
fact_model = auto_fact(model, rank=0.4, deepcopy=True, solver='snmf', num_iter=50)
count_param(fact_model)

CPU times: user 12min 47s, sys: 3min 15s, total: 16min 2s
Wall time: 1min 42s


69476355

# Test on CPU

### Test Inference CPU

In [15]:
%%timeit
with torch.no_grad():
    y = model(torch.zeros(16,3,224,224, dtype=torch.float))

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


471 ms ± 106 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [16]:
%%timeit
with torch.no_grad():
    y = fact_model(torch.zeros(16,3,224,224, dtype=torch.float))

411 ms ± 609 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Test Forward-Backward CPU

In [17]:
%%timeit
y = model(torch.zeros(8,3,224,224, dtype=torch.float))
y.sum().backward()

681 ms ± 662 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [18]:
%%timeit
y = fact_model(torch.zeros(8,3,224,224, dtype=torch.float))
y.sum().backward()

591 ms ± 995 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Test on GPU

### Move models to GPU

In [19]:
model = model.cuda()
fact_model = fact_model.cuda()

### Test Inference GPU

In [20]:
x = torch.zeros(16,3,224,224, dtype=torch.float).cuda()

In [21]:
%%timeit
with torch.no_grad():
    y = model(x)

41.1 ms ± 178 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [22]:
%%timeit
with torch.no_grad():
    y = fact_model(x)

31.8 ms ± 44.9 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


### Test Forward-Backward GPU

In [23]:
x = torch.zeros(8,3,224,224, dtype=torch.float).cuda()

In [24]:
%%timeit
y = model(x)
y.sum().backward()

71.9 ms ± 41.8 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [25]:
%%timeit
y = fact_model(x)
y.sum().backward()

57 ms ± 70.3 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
