In [2]:
import os

def select_hardware(
    cuda: str = None,
    cpu: str = None,
) -> None:
    os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'

    if cuda is not None:
        os.environ['CUDA_VISIBLE_DEVICES'] = cuda

    if cpu is not None:
        os.environ["MKL_NUM_THREADS"] = cpu
        os.environ["NUMEXPR_NUM_THREADS"] = cpu
        os.environ["OMP_NUM_THREADS"] = cpu

select_hardware(cpu="4", cuda="2")

In [3]:
import timm

model_name='efficientnet_b2'
checkpoint_path=''
pretrained=True

model = timm.create_model(
    model_name,
    num_classes=1000,
    in_chans=3,
    pretrained=pretrained,
    checkpoint_path=checkpoint_path)

In [4]:
model

EfficientNet(
  (conv_stem): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): SiLU(inplace=True)
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act1): SiLU(inplace=True)
        (se): SqueezeExcite(
          (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
          (act1): SiLU(inplace=True)
          (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
          (gate): Sigmoid()
        )
        (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act2): Identity()
   

In [5]:
from tddl.factorizations import number_layers
from tddl.factorizations import factorize_network

numbered_layers = number_layers(model)



In [6]:
numbered_layers

{'conv_stem': (0,
  Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)),
 'bn1': (1,
  BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)),
 'act1': (2, SiLU(inplace=True)),
 'blocks': (3,
  {'0': (4,
    {'0': (5,
      {'conv_dw': (6,
        Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)),
       'bn1': (7,
        BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)),
       'act1': (8, SiLU(inplace=True)),
       'se': (9,
        {'conv_reduce': (10, Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))),
         'act1': (11, SiLU(inplace=True)),
         'conv_expand': (12, Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))),
         'gate': (13, Sigmoid())}),
       'conv_pw': (14,
        Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)),
       'bn2': (15,
        BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)),
 

In [7]:
model.blocks

Sequential(
  (0): Sequential(
    (0): DepthwiseSeparableConv(
      (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
      (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act1): SiLU(inplace=True)
      (se): SqueezeExcite(
        (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
        (act1): SiLU(inplace=True)
        (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
        (gate): Sigmoid()
      )
      (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act2): Identity()
    )
    (1): DepthwiseSeparableConv(
      (conv_dw): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act1): SiLU(inplace=Tr

In [15]:
import copy

fact_model = copy.deepcopy(model)

# layers = ['conv_dw']
layers = [6, 18, 34, 48, 62, 77, 91, 105, 120, 134, 148, 162, 177, 191, 205, 219, 234, 248, 262, 276, 290, 305, 319]


error = factorize_network(
    fact_model,
    layers=layers,
    factorization='tucker',
    rank=0.5,
    return_error=True,
    verbose=True,
)

0 conv_stem <class 'torch.nn.modules.conv.Conv2d'>
1 bn1 <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
2 act1 <class 'torch.nn.modules.activation.SiLU'>
3 blocks <class 'torch.nn.modules.container.Sequential'>
4 0 <class 'torch.nn.modules.container.Sequential'>
5 0 <class 'timm.models.efficientnet_blocks.DepthwiseSeparableConv'>
6 conv_dw <class 'torch.nn.modules.conv.Conv2d'>
7 bn1 <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
8 act1 <class 'torch.nn.modules.activation.SiLU'>
9 se <class 'timm.models.efficientnet_blocks.SqueezeExcite'>
10 conv_reduce <class 'torch.nn.modules.conv.Conv2d'>
11 act1 <class 'torch.nn.modules.activation.SiLU'>
12 conv_expand <class 'torch.nn.modules.conv.Conv2d'>
13 gate <class 'torch.nn.modules.activation.Sigmoid'>
14 conv_pw <class 'torch.nn.modules.conv.Conv2d'>
15 bn2 <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
16 act2 <class 'torch.nn.modules.linear.Identity'>
17 1 <class 'timm.models.efficientnet_blocks.DepthwiseSeparableConv'>
18 conv_dw 



163 bn2 <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
164 act2 <class 'torch.nn.modules.activation.SiLU'>
165 se <class 'timm.models.efficientnet_blocks.SqueezeExcite'>
166 conv_reduce <class 'torch.nn.modules.conv.Conv2d'>
167 act1 <class 'torch.nn.modules.activation.SiLU'>
168 conv_expand <class 'torch.nn.modules.conv.Conv2d'>
169 gate <class 'torch.nn.modules.activation.Sigmoid'>
170 conv_pwl <class 'torch.nn.modules.conv.Conv2d'>
171 bn3 <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
172 4 <class 'torch.nn.modules.container.Sequential'>
173 0 <class 'timm.models.efficientnet_blocks.InvertedResidual'>
174 conv_pw <class 'torch.nn.modules.conv.Conv2d'>
175 bn1 <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
176 act1 <class 'torch.nn.modules.activation.SiLU'>
177 conv_dw <class 'torch.nn.modules.conv.Conv2d'>
178 bn2 <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
179 act2 <class 'torch.nn.modules.activation.SiLU'>
180 se <class 'timm.models.efficientnet_blocks.SqueezeExcite'>


In [18]:
import torch

type(torch.nn.modules.Conv2d(3,512,1))

torch.nn.modules.conv.Conv2d

In [9]:
error

{'conv_stem': (0,
  None,
  Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)),
 'bn1': (1,
  None,
  BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)),
 'act1': (2, None, SiLU(inplace=True)),
 'blocks': (3,
  None,
  {'0': (4,
    None,
    {'0': (5,
      None,
      {'conv_dw': (6,
        tensor(0.1943),
        Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)),
       'bn1': (7,
        tensor(0.1943),
        BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)),
       'act1': (8, tensor(0.1943), SiLU(inplace=True)),
       'se': (9,
        tensor(0.1943),
        {'conv_reduce': (10,
          None,
          Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))),
         'act1': (11, None, SiLU(inplace=True)),
         'conv_expand': (12,
          None,
          Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))),
         'gate': (13, None, Sigmoid())}),
 

In [10]:
from tddl.factorizations import list_errors


list_of_errors = list_errors(error, layers=layers)

In [11]:
relative_errors = [i[1] for i in list_of_errors]

In [12]:
list_of_errors[0][1]

0.19434179365634918

In [13]:
relative_errors

[0.19434179365634918,
 0.3921471834182739,
 0.45865166187286377,
 0.3504534959793091,
 0.42756468057632446,
 0.11148995161056519,
 0.14238274097442627,
 0.16468842327594757,
 0.10330334305763245,
 0.35429617762565613,
 0.37860140204429626,
 0.38208067417144775,
 0.10906480997800827,
 0.18084409832954407,
 0.18137454986572266,
 0.19196642935276031,
 0.06355753540992737,
 0.19842730462551117,
 0.19831618666648865,
 0.20576049387454987,
 0.20409443974494934,
 0.10619490593671799,
 0.21607007086277008]

In [2]:
import torchvision

imagenet_data = torchvision.datasets.ImageNet("/scratch/jetzeschuurman/imagenet/")

In [14]:
model

EfficientNet(
  (conv_stem): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): SiLU(inplace=True)
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act1): SiLU(inplace=True)
        (se): SqueezeExcite(
          (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
          (act1): SiLU(inplace=True)
          (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
          (gate): Sigmoid()
        )
        (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act2): Identity()
   