In [2]:
import torch
from torch import nn
from torch import Tensor
import requests
import os
from pathlib import Path
from typing import Callable
from functools import partial


URL = 'https://download.pytorch.org/models/resnet18-5c106cde.pth'

In [3]:
%load_ext autoreload

%autoreload 2

In [3]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [4]:
from efficientnet_pytorch import EfficientNet
from torchsummary import summary
from glasses.utils.Tracker import Tracker
from glasses.utils.ModuleTransfer import ModuleTransfer

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_src = EfficientNet.from_pretrained('efficientnet-b0')

summary(model_src.to(device), (3,224,224))

Loaded pretrained weights for efficientnet-b0
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         ZeroPad2d-1          [-1, 3, 226, 226]               0
Conv2dStaticSamePadding-2         [-1, 32, 112, 112]             864
       BatchNorm2d-3         [-1, 32, 112, 112]              64
MemoryEfficientSwish-4         [-1, 32, 112, 112]               0
         ZeroPad2d-5         [-1, 32, 114, 114]               0
Conv2dStaticSamePadding-6         [-1, 32, 112, 112]             288
       BatchNorm2d-7         [-1, 32, 112, 112]              64
MemoryEfficientSwish-8         [-1, 32, 112, 112]               0
          Identity-9             [-1, 32, 1, 1]               0
Conv2dStaticSamePadding-10              [-1, 8, 1, 1]             264
MemoryEfficientSwish-11              [-1, 8, 1, 1]               0
         Identity-12              [-1, 8, 1, 1]               0
Conv2dStaticSamePadding-13        

In [5]:
model_src._global_params

GlobalParams(width_coefficient=1.0, depth_coefficient=1.0, image_size=224, dropout_rate=0.2, num_classes=1000, batch_norm_momentum=0.99, batch_norm_epsilon=0.001, drop_connect_rate=0.2, depth_divisor=8, min_depth=None, include_top=True)

In [6]:
from glasses.nn.models.classification.efficientnet import EfficientNet, MemoryEfficientSwish
from torchsummary import summary
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model_dst  = EfficientNet(activation=MemoryEfficientSwish)

summary(model_dst.to(device), (3,224,224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         Conv2dPad-1         [-1, 32, 112, 112]             864
       BatchNorm2d-2         [-1, 32, 112, 112]              64
MemoryEfficientSwish-3         [-1, 32, 112, 112]               0
   DepthWiseConv2d-4         [-1, 32, 112, 112]             288
       BatchNorm2d-5         [-1, 32, 112, 112]              64
MemoryEfficientSwish-6         [-1, 32, 112, 112]               0
 AdaptiveAvgPool2d-7             [-1, 32, 1, 1]               0
            Conv2d-8              [-1, 8, 1, 1]             264
MemoryEfficientSwish-9              [-1, 8, 1, 1]               0
           Conv2d-10             [-1, 32, 1, 1]             288
          Sigmoid-11             [-1, 32, 1, 1]               0
     SEModuleConv-12         [-1, 32, 112, 112]               0
        Conv2dPad-13         [-1, 16, 112, 112]             512
      BatchNorm2d-14         [-1,

In [22]:
from dataclasses import dataclass
import numpy as np

@dataclass
class CompoundScaling:
    alpha: float = 1.2
    beta: float = 1.1
    gamma: float = 1.15
    
    def __call__(self, model, depths, width):
        depths = np.array(depths) * self.beta
        width = np.array(width) * self.alpha
        depths = np.ceil(depths)
        width = np.ceil(width)
        
        return depths, width
        
CompoundScaling()(EfficientNet, [1, 2, 2, 3, 3, 4, 1], [32, 16, 24, 40, 80, 112, 192, 320, 1280])

(array([2., 3., 3., 4., 4., 5., 2.]),
 array([  39.,   20.,   29.,   48.,   96.,  135.,  231.,  384., 1536.]))

In [27]:
from efficientnet_pytorch import EfficientNet

EfficientNet.from_pretrained('efficientnet-b1')

Loaded pretrained weights for efficientnet-b1


EfficientNet(
  (_conv_stem): Conv2dStaticSamePadding(
    3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False
    (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
  )
  (_bn0): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
  (_blocks): ModuleList(
    (0): MBConvBlock(
      (_depthwise_conv): Conv2dStaticSamePadding(
        32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
        (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
      )
      (_bn1): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
      (_se_reduce): Conv2dStaticSamePadding(
        32, 8, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_se_expand): Conv2dStaticSamePadding(
        8, 32, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_project_conv): Conv2dStaticSamePadding(
        32, 16, kernel_size=

In [23]:
224 * 1.15

257.59999999999997

In [20]:
1.1 **4 * 1.1 * 1

1.6105100000000006

In [8]:
model_dst

EfficientNet(
  (encoder): EfficientNetEncoder(
    (gate): ConvBnAct(
      (conv): Conv2dPad(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(32, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (act): MemoryEfficientSwish()
    )
    (blocks): ModuleList(
      (0): EfficientNetLayer(
        (block): Sequential(
          (0): EfficientNetBasicBlock(
            (block): Residual(
              (block): Sequential(
                (conv): Sequential(
                  (0): ConvBnAct(
                    (conv): DepthWiseConv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
                    (bn): BatchNorm2d(32, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
                    (act): MemoryEfficientSwish()
                  )
                  (1): Sequential(
                    (0): SEModuleConv(
                      (avg_pool): AdaptiveAvgPool2d(output_size

In [8]:
from glasses.nn.models.classification.efficientnet import EfficientNet
model_dst  = EfficientNet().to(device)

x = torch.ones((1, 3, 224, 224)).to(device)
_ = ModuleTransfer(model_src, model_dst)(x)

y_dst = model_dst.eval()(x)
y_src = model_src.eval()(x)

diocane
diocane
diocane
diocane
diocane
diocane
diocane
diocane
diocane


In [9]:
y_dst

tensor([[-9.3545e-01,  4.4536e-01,  4.2809e-02, -7.1374e-01,  3.6706e-01,
          2.3940e-01, -1.3259e+00, -3.2074e-01, -1.6859e+00, -7.2495e-01,
         -1.2006e+00, -5.1331e-01, -7.6875e-01, -1.4668e+00, -8.2695e-01,
         -4.0084e-02,  3.0943e-01, -7.2798e-01, -4.7880e-01, -8.8295e-01,
         -1.6215e+00,  7.9493e-01,  3.6127e-02, -5.1066e-01, -4.0636e-01,
         -8.5400e-01, -1.2117e+00, -2.1563e-01, -1.1900e+00, -5.3588e-01,
         -9.6766e-01, -8.8796e-01, -1.2135e+00, -6.0439e-01, -7.0122e-01,
         -1.4644e+00, -1.1432e+00, -9.4588e-01, -9.1610e-01, -1.1081e+00,
         -7.2389e-01, -8.2417e-01, -8.6236e-01, -1.3655e+00, -1.6005e+00,
         -1.3012e-01, -1.6388e+00, -7.9500e-01, -1.2640e+00, -1.1749e+00,
         -1.2268e+00, -4.6094e-01, -7.8901e-01, -1.4785e+00, -2.4503e+00,
         -5.8356e-01, -1.4344e+00, -6.2406e-01, -1.2202e+00, -3.8103e-01,
         -1.2667e+00, -7.2553e-01, -1.5403e+00, -2.9658e-01, -7.7501e-01,
         -4.5203e-01, -1.2975e+00, -1.

In [10]:
y_src

tensor([[-8.3332e-01,  1.3565e-01,  1.8682e-01, -9.6878e-01,  3.0802e-01,
         -1.0456e-01, -1.3901e+00, -7.8224e-01, -1.4066e+00, -6.7172e-01,
         -1.2094e+00, -1.6737e-01, -3.9705e-01, -1.2694e+00, -5.5863e-01,
          8.4872e-02,  5.3619e-01, -2.5127e-01, -2.3983e-01, -4.9079e-01,
         -1.5709e+00,  1.3914e+00,  3.4888e-01, -2.6297e-01, -6.4338e-01,
         -5.9679e-01, -1.1755e+00, -5.5661e-01, -8.8801e-01, -8.4676e-01,
         -9.5552e-01, -1.0848e+00, -1.5485e+00, -8.1680e-01, -6.2627e-01,
         -1.0898e+00, -9.7406e-01, -6.9361e-01, -1.4542e+00, -9.9279e-01,
         -9.3710e-01, -6.9748e-01, -1.0204e+00, -1.5217e+00, -1.2848e+00,
         -2.5608e-01, -8.9295e-01, -9.4482e-01, -9.1420e-01, -1.8948e+00,
         -1.4947e+00, -8.9701e-01, -5.1282e-01, -8.7356e-01, -1.7619e+00,
         -6.9843e-01, -1.2738e+00, -4.2122e-01, -1.0504e+00, -6.8182e-01,
         -1.2162e+00, -5.6925e-01, -1.4510e+00, -1.7719e-01, -4.2607e-01,
         -2.0107e-01, -1.4580e+00, -1.

In [11]:
model_src

EfficientNet(
  (_conv_stem): Conv2dStaticSamePadding(
    3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False
    (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
  )
  (_bn0): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
  (_blocks): ModuleList(
    (0): MBConvBlock(
      (_depthwise_conv): Conv2dStaticSamePadding(
        32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
        (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
      )
      (_bn1): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
      (_se_reduce): Conv2dStaticSamePadding(
        32, 8, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_se_expand): Conv2dStaticSamePadding(
        8, 32, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_project_conv): Conv2dStaticSamePadding(
        32, 16, kernel_size=