In [1]:
import torch
from transformers import BertModel, BertConfig
from py_auto_fact import auto_fact

ModuleNotFoundError: No module named 'pymf3'

# Init Model

In [None]:
config = BertConfig.from_pretrained('bert-base-uncased')
model = BertModel(config=config)
model

# Factorize Model

In [3]:
fact_model = auto_fact(model, rank=128, deepcopy=True, solver='snmf')
fact_model

AttributeError: 'int' object has no attribute 'T'

In [6]:
import pymf3
import numpy as np
mdl = pymf3.semiNMF(np.zeros((100,100)), 10)

In [10]:
mdl.__dict__

{'data': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 'ndims': (100, 100),
 'W': 10,
 'H': None,
 '_num_bases': 2,
 '_logger': <Logger pymf (DEBUG)>}

# Test on CPU

### Test Inference CPU

In [4]:
%%timeit
with torch.no_grad():
    y = model(torch.zeros(32,128, dtype=torch.long))

1.76 s ± 233 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [5]:
%%timeit
with torch.no_grad():
    y = fact_model(torch.zeros(32,128, dtype=torch.long))

854 ms ± 33.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Test Forward-Backward CPU

In [6]:
%%timeit
y = model(torch.zeros(8,128, dtype=torch.long))
y.last_hidden_state.sum().backward()

1.32 s ± 203 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [7]:
%%timeit
y = fact_model(torch.zeros(8,128, dtype=torch.long))
y.last_hidden_state.sum().backward()

599 ms ± 55.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Test on GPU

### Move models to GPU

In [8]:
model = model.cuda()
fact_model = fact_model.cuda()

### Test Inference GPU

In [9]:
x = torch.zeros(32,128, dtype=torch.long).cuda()

In [10]:
%%timeit
with torch.no_grad():
    y = model(x)

113 ms ± 251 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [11]:
%%timeit
with torch.no_grad():
    y = fact_model(x)

61.3 ms ± 32.2 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


### Test Forward-Backward GPU

In [12]:
x = torch.zeros(8,128, dtype=torch.long).cuda()

In [13]:
%%timeit
y = model(x)
y.last_hidden_state.sum().backward()

92.3 ms ± 252 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [14]:
%%timeit
y = fact_model(x)
y.last_hidden_state.sum().backward()

58.1 ms ± 200 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
