In [None]:
from copy import deepcopy

import tltorch
import torch

torch.manual_seed(42)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Linear decomposition

In [None]:
random_tensor = torch.randn(128, 1000).to(device)
linear_layer = torch.nn.Linear(1000, 100).to(device)
linear_layer

In [None]:
tl_linear = tltorch.FactorizedLinear.from_linear(linear_layer, factorization='tucker')
tl_linear

In [None]:
tl_linear.weight.to_tensor()

In [None]:
torch.norm(linear_layer(random_tensor) - tl_linear(random_tensor)) / torch.norm(linear_layer(random_tensor))

In [None]:
print(tl_linear(random_tensor))

In [None]:
print(linear_layer(random_tensor))

# Convolutional decomposition

In [None]:
random_tensor = torch.randn(128, 512, 7, 7).to(device)
conv_layer = torch.nn.Conv2d(512, 256, 3, padding=1).to(device)
conv_layer

In [None]:
tl_conv = tltorch.FactorizedConv.from_conv(conv_layer, factorization='tucker')
tl_conv

In [None]:
tl_conv.weight.to_tensor()

In [None]:
torch.norm(conv_layer(random_tensor) - tl_conv(random_tensor)) / torch.norm(conv_layer(random_tensor))

In [None]:
%%timeit
tl_conv(random_tensor)

In [None]:
%%timeit
conv_layer(random_tensor)

## Optimum ranks by Tensorly
### Tucker
total_params * x ** order + sum_of_squared_ranks * x + fixed_params * x - 1 * total_params = 0

solve for x

where x is coef from rank
### CP
x = total_prams / sum_params

where x is coef from rank

In [None]:
[512, 256, 3, 3]
512*256*3*3/(512+256+3+3)

# Test model_compressor

In [4]:
from src.model_compressor.model_compressor import compress_model
from copy import deepcopy
import torch

model = torch.nn.Sequential(
    torch.nn.ConvTranspose2d(3, 64, 3, padding=1),
    torch.nn.ConvTranspose2d(64, 128, 3, padding=1),
    torch.nn.ConvTranspose2d(128, 256, 3, padding=1),
    torch.nn.ConvTranspose2d(256, 512, 3, padding=1)
)

copy_model = deepcopy(model)

compress_model(copy_model, conv_compression_method='TKD')

differential_evolution step 1: f(x)= 0.011269003715610518
differential_evolution step 2: f(x)= 0.011269003715610518
differential_evolution step 3: f(x)= 0.011269003715610518
differential_evolution step 4: f(x)= 0.011269003715610518
differential_evolution step 5: f(x)= 0.011269003715610518
differential_evolution step 6: f(x)= 0.011269003715610518
differential_evolution step 7: f(x)= 0.011269003715610518
Polishing solution with 'L-BFGS-B'
2.3529609280012664
[np.int64(3), np.int64(27), np.int64(9)]
Starting iteration 1
Mode 0 of 3
Mode 1 of 3
Mode 2 of 3
reconstruction error=0.7755113840103149
Starting iteration 2
Mode 0 of 3
Mode 1 of 3
Mode 2 of 3
iteration 1, reconstruction error: 0.6957712769508362, decrease = 0.07974010705947876, unnormalized = 0.341970294713974
Starting iteration 3
Mode 0 of 3
Mode 1 of 3
Mode 2 of 3
iteration 2, reconstruction error: 0.6660742163658142, decrease = 0.029697060585021973, unnormalized = 0.32737424969673157
Starting iteration 4
Mode 0 of 3
Mode 1 of 3


  warn("rank_tkd[0] is bigger then in_channels. Setting it equal to in_channels")


differential_evolution step 1: f(x)= 0.543242644498741
differential_evolution step 2: f(x)= 0.5394687118680715
differential_evolution step 3: f(x)= 0.537807200507699
differential_evolution step 4: f(x)= 0.5376705208932676
Polishing solution with 'L-BFGS-B'
4.433310083000833
[np.int64(55), np.int64(77), np.int64(9)]
Starting iteration 1
Mode 0 of 3
Mode 1 of 3
Mode 2 of 3
reconstruction error=0.982111930847168
Starting iteration 2
Mode 0 of 3
Mode 1 of 3
Mode 2 of 3
iteration 1, reconstruction error: 0.974830150604248, decrease = 0.007281780242919922, unnormalized = 3.5694756507873535
Starting iteration 3
Mode 0 of 3
Mode 1 of 3
Mode 2 of 3
iteration 2, reconstruction error: 0.9715883135795593, decrease = 0.0032418370246887207, unnormalized = 3.557605266571045
Starting iteration 4
Mode 0 of 3
Mode 1 of 3
Mode 2 of 3
iteration 3, reconstruction error: 0.9697669744491577, decrease = 0.0018213391304016113, unnormalized = 3.550935983657837
Starting iteration 5
Mode 0 of 3
Mode 1 of 3
Mode 2

In [5]:
copy_model

Sequential(
  (0): Sequential(
    (0): ConvTranspose2d(3, 3, kernel_size=(1, 1), stride=(1, 1))
    (1): Sequential(
      (0): ConvTranspose2d(3, 3, kernel_size=(1, 1), stride=(1, 1))
      (1): ConvTranspose2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3)
      (2): ConvTranspose2d(3, 3, kernel_size=(1, 1), stride=(1, 1))
    )
    (2): ConvTranspose2d(3, 64, kernel_size=(1, 1), stride=(1, 1))
  )
  (1): Sequential(
    (0): ConvTranspose2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
    (1): Sequential(
      (0): ConvTranspose2d(64, 9, kernel_size=(1, 1), stride=(1, 1))
      (1): ConvTranspose2d(9, 9, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=9)
      (2): ConvTranspose2d(9, 55, kernel_size=(1, 1), stride=(1, 1))
    )
    (2): ConvTranspose2d(55, 128, kernel_size=(1, 1), stride=(1, 1))
  )
  (2): Sequential(
    (0): ConvTranspose2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
    (1): Sequential(
      (0): ConvTranspose2d(128, 9, kernel_size

In [6]:
random_tensor = torch.randn(128, 3, 32, 32)
out1 = model(random_tensor)
out2 = copy_model(random_tensor)

In [8]:
torch.mean(torch.abs(out1 - out2))

tensor(0.4155, grad_fn=<MeanBackward0>)