In [1]:
%load_ext autoreload
%autoreload 2

In [4]:
import os

def select_hardware(
    cuda: str = None,
    cpu: str = None,
) -> None:
    os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'

    if cuda is not None:
        os.environ['CUDA_VISIBLE_DEVICES'] = cuda

    if cpu is not None:
        os.environ["MKL_NUM_THREADS"] = cpu
        os.environ["NUMEXPR_NUM_THREADS"] = cpu
        os.environ["OMP_NUM_THREADS"] = cpu

select_hardware(
    cuda="3",
    cpu="4"
)

In [5]:
import tensorly
import tensorly as tl
tl.set_backend('pytorch')
from tensorly.decomposition import Tucker, CP, TensorTrain
from tltorch import FactorizedConv

In [6]:
import timm

model_name='efficientnet_b2'
# checkpoint_path=''
pretrained=True

model = timm.create_model(
    model_name,
    num_classes=1000,
    in_chans=3,
    pretrained=pretrained,
    # checkpoint_path=checkpoint_path,
)

In [7]:
model.cuda()

EfficientNet(
  (conv_stem): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): SiLU(inplace=True)
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act1): SiLU(inplace=True)
        (se): SqueezeExcite(
          (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
          (act1): SiLU(inplace=True)
          (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
          (gate): Sigmoid()
        )
        (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act2): Identity()
   

In [8]:
model.blocks[0][0].conv_dw

Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)

In [9]:
module = model.blocks[0][0].conv_dw
module

Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)

In [39]:
import tltorch


factorization='tucker'

decomposition_kwargs = {'init': 'random'} if factorization == 'cp' else {}
fixed_rank_modes = 'spatial' if factorization == 'tucker' else None

fact_module = tltorch.FactorizedConv.from_conv(
    module, 
    rank=0.5, 
    decompose_weights=True, 
    factorization=factorization,
    fixed_rank_modes=fixed_rank_modes,
    decomposition_kwargs=decomposition_kwargs,
    # **kwargs,
)

In [40]:
fact_module

FactorizedConv(
  in_channels=1, out_channels=32, kernel_size=(3, 3), rank=(4, 1, 3, 3), order=2, padding=[1, 1], bias=False
  (weight): TuckerTensor(shape=(32, 1, 3, 3), rank=(4, 1, 3, 3))
)

In [24]:
import tensorly as tl 
tl.set_backend('numpy')

In [55]:

from tddl.factorizations import number_layers
from tddl.factorizations import factorize_network

numbered_layers = number_layers(model)
numbered_layers

{'conv_stem': (0,
  Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)),
 'bn1': (1,
  BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)),
 'act1': (2, SiLU(inplace=True)),
 'blocks': (3,
  {'0': (4,
    {'0': (5,
      {'conv_dw': (6,
        Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)),
       'bn1': (7,
        BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)),
       'act1': (8, SiLU(inplace=True)),
       'se': (9,
        {'conv_reduce': (10, Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))),
         'act1': (11, SiLU(inplace=True)),
         'conv_expand': (12, Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))),
         'gate': (13, Sigmoid())}),
       'conv_pw': (14,
        Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)),
       'bn2': (15,
        BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)),
 

In [26]:
import copy

fact_model =  copy.deepcopy(model)

# conv_dw_by_str_layers = ['conv_dw']
# conv_dw_layers = [6, 19, 34, 48, 62, 77, 91, 105, 120, 134, 148, 162, 177, 191, 205, 219, 234, 248, 262, 276, 290, 305, 319]
conv_dw_layers = [6]

factorize_network(
    fact_model,
    layers=conv_dw_layers,
    verbose=False,
    return_error=False,
)

In [27]:
model

EfficientNet(
  (conv_stem): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): SiLU(inplace=True)
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act1): SiLU(inplace=True)
        (se): SqueezeExcite(
          (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
          (act1): SiLU(inplace=True)
          (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
          (gate): Sigmoid()
        )
        (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act2): Identity()
   

In [56]:
numbered_layers = number_layers(model)

In [11]:
tensor = tl.tensor(np.arange(24).reshape((3, 4, 2)), dtype=tl.float64)
unfolded = tl.unfold(tensor, mode=0)
tl.fold(unfolded, mode=0, shape=tensor.shape)

AttributeError: module 'tensorly' has no attribute 'float64'

https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html

groups controls the connections between inputs and outputs. in_channels and out_channels must both be divisible by groups. For example,

- At groups=1, all inputs are convolved to all outputs.

- At groups=2, the operation becomes equivalent to having two conv layers side by side, each seeing half the input channels and producing half the output channels, and both subsequently concatenated.

- At groups= in_channels, each input channel is convolved with its own set of filters (of size out_channelsin_channels\frac{\text{out\_channels}}{\text{in\_channels}}in_channelsout_channels​).

In [18]:
[m.numel() for m in model.conv_stem.parameters()]

[864]

In [19]:
[m.numel() for m in model.conv_head.parameters()]

[495616]

In [57]:
def listify_numbered_layers(numbered_layers, layers):
    output = []

    def parse_errors(d, layers):
        
        nonlocal output
        for k, v in d.items():
            # print(v)
            if isinstance(v[1], dict):
                parse_errors(v[1], layers)
            elif k in layers:
                output.append(
                    (
                        k, # layer_name
                        v[0], # layer_nr
                        v[1], # layer
                        # float(v[1].detach().cpu()), # approx.error wrt pretrained
                        # str(v[2]), # layer
                    )
                )
    
    parse_errors(numbered_layers, layers)
    
    return output

In [58]:
conv_pw_layers = listify_numbered_layers(numbered_layers, layers=['conv_pw'])
conv_pw_numbers = [x[1] for x in conv_pw_layers]
print(conv_pw_numbers)

[14, 26, 31, 45, 59, 74, 88, 102, 117, 131, 145, 159, 174, 188, 202, 216, 231, 245, 259, 273, 287, 302, 316]


In [59]:
conv_dw_layers = listify_numbered_layers(numbered_layers, layers=['conv_dw'])
conv_dw_numbers = [x[1] for x in conv_dw_layers]
print(conv_dw_numbers)

[6, 18, 34, 48, 62, 77, 91, 105, 120, 134, 148, 162, 177, 191, 205, 219, 234, 248, 262, 276, 290, 305, 319]


In [60]:
conv_reduce_layers = listify_numbered_layers(numbered_layers, layers=['conv_reduce'])
conv_reduce_numbers = [x[1] for x in conv_reduce_layers]
print(conv_reduce_numbers)

[10, 22, 38, 52, 66, 81, 95, 109, 124, 138, 152, 166, 181, 195, 209, 223, 238, 252, 266, 280, 294, 309, 323]


In [61]:
conv_expand_layers = listify_numbered_layers(numbered_layers, layers=['conv_expand'])
conv_expand_numbers = [x[1] for x in conv_expand_layers]
print(conv_expand_numbers)

[12, 24, 40, 54, 68, 83, 97, 111, 126, 140, 154, 168, 183, 197, 211, 225, 240, 254, 268, 282, 296, 311, 325]


In [62]:
conv_pwl_layers = listify_numbered_layers(numbered_layers, layers=['conv_pwl'])
conv_pwl_numbers = [x[1] for x in conv_pwl_layers]
print(conv_pwl_numbers)

[42, 56, 70, 85, 99, 113, 128, 142, 156, 170, 185, 199, 213, 227, 242, 256, 270, 284, 298, 313, 327]


In [64]:
k1x1 = conv_pwl_numbers + conv_expand_numbers + conv_reduce_numbers + conv_pw_numbers
print(k1x1)

[42, 56, 70, 85, 99, 113, 128, 142, 156, 170, 185, 199, 213, 227, 242, 256, 270, 284, 298, 313, 327, 12, 24, 40, 54, 68, 83, 97, 111, 126, 140, 154, 168, 183, 197, 211, 225, 240, 254, 268, 282, 296, 311, 325, 10, 22, 38, 52, 66, 81, 95, 109, 124, 138, 152, 166, 181, 195, 209, 223, 238, 252, 266, 280, 294, 309, 323, 14, 26, 31, 45, 59, 74, 88, 102, 117, 131, 145, 159, 174, 188, 202, 216, 231, 245, 259, 273, 287, 302, 316]


In [65]:
fact_model =  copy.deepcopy(model)

factorize_network(
    fact_model,
    layers=k1x1,
    verbose=False,
    return_error=False,
)

In [66]:
fact_model

EfficientNet(
  (conv_stem): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): SiLU(inplace=True)
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act1): SiLU(inplace=True)
        (se): SqueezeExcite(
          (conv_reduce): FactorizedConv(
            in_channels=32, out_channels=8, kernel_size=(1, 1), rank=(1, 4, 1, 1), order=2, 
            (weight): TuckerTensor(shape=(8, 32, 1, 1), rank=(1, 4, 1, 1))
          )
          (act1): SiLU(inplace=True)
          (conv_expand): FactorizedConv(
            in_channels=8, out_channels=32, kernel_size=(1, 1), rank=(4, 1, 1, 1), order=2, 
            (weight): TuckerTensor(sh