In [1]:
import torch
from torchvision import models
from py_auto_fact import auto_fact

# Init Model

In [2]:
model = models.vgg16()
model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

# Factorize Model

In [3]:
fact_model = auto_fact(model, rank=64, deepcopy=True)
fact_model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): CED(
      (ced_unit): Sequential(
        (0): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      )
    )
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): CED(
      (ced_unit): Sequential(
        (0): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): Conv2d(64, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      )
    )
    (11): ReLU(inplace=True)
    (1

# Test on CPU

### Test Inference CPU

In [4]:
%%timeit
with torch.no_grad():
    y = model(torch.zeros(16,3,224,224, dtype=torch.float))

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


1.38 s ± 77.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [5]:
%%timeit
with torch.no_grad():
    y = fact_model(torch.zeros(16,3,224,224, dtype=torch.float))

827 ms ± 13.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Test Forward-Backward CPU

In [6]:
%%timeit
y = model(torch.zeros(4,3,224,224, dtype=torch.float))
y.sum().backward()

928 ms ± 177 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [7]:
%%timeit
y = fact_model(torch.zeros(4,3,224,224, dtype=torch.float))
y.sum().backward()

483 ms ± 22.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Test on GPU

### Move models to GPU

In [8]:
model = model.cuda()
fact_model = fact_model.cuda()

### Test Inference GPU

In [9]:
x = torch.zeros(16,3,224,224, dtype=torch.float).cuda()

In [10]:
%%timeit
with torch.no_grad():
    y = model(x)

41.3 ms ± 137 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [11]:
%%timeit
with torch.no_grad():
    y = fact_model(x)

28.6 ms ± 37.7 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


### Test Forward-Backward GPU

In [12]:
x = torch.zeros(16,3,224,224, dtype=torch.float).cuda()

In [13]:
%%timeit
y = model(x)
y.sum().backward()

126 ms ± 152 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [14]:
%%timeit
y = fact_model(x)
y.sum().backward()

90.5 ms ± 58.7 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
