In [1]:
import os, sys
import torch
from transformers import BertModel, BertConfig
from py_auto_fact import auto_fact
from itertools import chain

from os import path
import sys

In [2]:
def count_param(module, trainable=False):
    if trainable:
        return sum(p.numel() for p in module.parameters() if p.requires_grad)
    else:
        return sum(p.numel() for p in module.parameters())

# Init Model

In [3]:
config = BertConfig.from_pretrained('bert-base-uncased')
model = BertModel(config=config)
model = BertModel.from_pretrained('bert-base-uncased')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [4]:
count_param(model)

109482240

# Factorize Model

### Apply absolute rank

In [5]:
%%time
fact_model = auto_fact(model, rank=256, deepcopy=True, solver='random')
count_param(fact_model)

CPU times: user 604 ms, sys: 164 ms, total: 768 ms
Wall time: 238 ms


66818304

In [6]:
%%time
fact_model = auto_fact(model, rank=256, deepcopy=True, solver='svd', eigen_threshold=0.6)
count_param(fact_model)

TypeError: auto_fact() got an unexpected keyword argument 'eigen_threshold'

In [7]:
%%time
fact_model = auto_fact(model, rank=256, deepcopy=True, solver='snmf', eigen_threshold=0.6)
count_param(fact_model)

TypeError: auto_fact() got an unexpected keyword argument 'eigen_threshold'

In [8]:
%%time
fact_model = auto_fact(model, rank=256, deepcopy=True, solver='random', num_iter=50, eigen_threshold=0.0)
count_param(fact_model)

TypeError: auto_fact() got an unexpected keyword argument 'eigen_threshold'

In [9]:
%%time
fact_model = auto_fact(model, rank=256, deepcopy=True, solver='svd', num_iter=50, eigen_threshold=0.0)
count_param(fact_model)

TypeError: auto_fact() got an unexpected keyword argument 'eigen_threshold'

In [10]:
%%time
fact_model = auto_fact(model, rank=256, deepcopy=True, solver='snmf', num_iter=50, eigen_threshold=0.0)
count_param(fact_model)

TypeError: auto_fact() got an unexpected keyword argument 'eigen_threshold'

### Apply percentage rank

In [11]:
%%time
fact_model = auto_fact(model, rank=0.4, deepcopy=True, solver='random', num_iter=50)
count_param(fact_model)

CPU times: user 640 ms, sys: 120 ms, total: 760 ms
Wall time: 214 ms


58052352

In [12]:
%%time
fact_model = auto_fact(model, rank=0.4, deepcopy=True, solver='svd', num_iter=50)
count_param(fact_model)

CPU times: user 3min 31s, sys: 6.43 s, total: 3min 37s
Wall time: 23.2 s


58052352

In [13]:
%%time
fact_model = auto_fact(model, rank=0.4, deepcopy=True, solver='snmf', num_iter=50)
count_param(fact_model)

CPU times: user 10min 6s, sys: 18.4 s, total: 10min 24s
Wall time: 1min 19s


58052352

In [14]:
%%time
fact_model = auto_fact(model, rank=0.2, deepcopy=True, solver='random', num_iter=50)
count_param(fact_model)

CPU times: user 4.76 s, sys: 24 ms, total: 4.79 s
Wall time: 681 ms


40921344

In [15]:
%%time
fact_model = auto_fact(model, rank=0.2, deepcopy=True, solver='svd', num_iter=50)
count_param(fact_model)

CPU times: user 2min 38s, sys: 6.81 s, total: 2min 45s
Wall time: 18.3 s


40921344

In [16]:
%%time
fact_model = auto_fact(model, rank=0.2, deepcopy=True, solver='snmf', num_iter=50)
count_param(fact_model)

CPU times: user 5min 41s, sys: 6.26 s, total: 5min 47s
Wall time: 44.5 s


40921344

# Test on CPU

### Test Inference CPU

In [17]:
%%timeit
with torch.no_grad():
    y = model(torch.zeros(32,128, dtype=torch.long))

1.95 s ± 243 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [18]:
690 ms ± 2.76 ms
452 ms ± 0.88 ms

SyntaxError: invalid syntax (3098913484.py, line 1)

In [None]:
%%timeit
with torch.no_grad():
    y = fact_model(torch.zeros(32,128, dtype=torch.long))

### Test Forward-Backward CPU

In [None]:
%%timeit
y = model(torch.zeros(8,128, dtype=torch.long))
y.last_hidden_state.sum().backward()

In [None]:
%%timeit
y = fact_model(torch.zeros(8,128, dtype=torch.long))
y.last_hidden_state.sum().backward()

# Test on GPU

### Move models to GPU

In [None]:
model = model.cuda()
fact_model = fact_model.cuda()

### Test Inference GPU

In [None]:
x = torch.zeros(64,128, dtype=torch.long).cuda()

In [None]:
%%timeit
with torch.no_grad():
    y = model(x)

In [None]:
%%timeit
with torch.no_grad():
    y = fact_model(x)

### Test Forward-Backward GPU

In [None]:
x = torch.zeros(16,128, dtype=torch.long).cuda()

In [None]:
%%timeit
y = model(x)
y.last_hidden_state.sum().backward()

In [None]:
%%timeit
y = fact_model(x)
y.last_hidden_state.sum().backward()