In [1]:
from torchsummary import summary
import torch
import torch.nn as nn
import torchvision.models as models

In [2]:
import seaborn as sns
sns.set(font='Franklin Gothic Book',
        rc={
 'axes.axisbelow': False,
 'axes.edgecolor': 'lightgrey',
 'axes.facecolor': 'None',
 'axes.grid': False,
 'axes.labelcolor': 'dimgrey',
 'axes.spines.right': False,
 'axes.spines.top': False,
 'figure.facecolor': 'white',
 'lines.solid_capstyle': 'round',
 'patch.edgecolor': 'w',
 'patch.force_edgecolor': True,
 'text.color': 'dimgrey',
 'xtick.bottom': False,
 'xtick.color': 'dimgrey',
 'xtick.direction': 'out',
 'xtick.top': False,
 'ytick.color': 'dimgrey',
 'ytick.direction': 'out',
 'ytick.left': False,
 'ytick.right': False})
sns.set_context("notebook", rc={"font.size":16,
                                "axes.titlesize":20,
                                "axes.labelsize":18})


In [3]:
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MLP, self).__init__()
        if hidden_dim != 0:
            self.mlp = nn.Sequential(nn.Linear(input_dim, hidden_dim),
                                     nn.Linear(hidden_dim, output_dim))
        else:
            self.mlp = nn.Linear(input_dim, output_dim)
    
    def forward(self, inputs):
        return self.mlp(inputs)
    
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self, inputs):
        return inputs

# R18


In [3]:
hidden_dims = [0, 512, 1024, 1280, 2048]
archs = ['resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152']

In [5]:
for arch in archs:
    model = models.__dict__[arch]()
    input_dim = model.fc.weight.shape[1]
    output_dim = 128
    for hd in hidden_dims:
        print('{} {}'.format(arch, hd))
        model.fc = MLP(input_dim=input_dim, hidden_dim=hd, output_dim=128)
#         summary(model, (3, 224, 224))
        print(sum([m.numel() for m in model.parameters()])/1024/1024)

resnet18 0
10.72137451171875
resnet18 512
10.97186279296875
resnet18 1024
11.28485107421875
resnet18 1280
11.44134521484375
resnet18 2048
11.91082763671875
resnet34 0
20.36126708984375
resnet34 512
20.61175537109375
resnet34 1024
20.92474365234375
resnet34 1280
21.08123779296875
resnet34 2048
21.55072021484375
resnet50 0
22.66912841796875
resnet50 512
23.48211669921875
resnet50 1024
24.54510498046875
resnet50 1280
25.07659912109375
resnet50 2048
26.67108154296875
resnet101 0
40.78143310546875
resnet101 512
41.59442138671875
resnet101 1024
42.65740966796875
resnet101 1280
43.18890380859375
resnet101 2048
44.78338623046875
resnet152 0
55.70037841796875
resnet152 512
56.51336669921875
resnet152 1024
57.57635498046875
resnet152 1280
58.10784912109375
resnet152 2048
59.70233154296875


In [13]:
for arch in archs:
    model = models.__dict__[arch]()
    input_dim = model.fc.weight.shape[1]
    output_dim = 128
    print('{}'.format(arch))
    model.fc = Identity()
    print(sum([m.numel() for m in model.parameters()])/1024/1024)

resnet18
10.65875244140625
resnet34
20.29864501953125
resnet50
22.41900634765625
resnet101
40.53131103515625
resnet152
55.45025634765625


In [6]:
from backbone.mobilenetv3 import mobilenetv3_large_100
from backbone.efficientnet import efficientnet_b0
from backbone.efficientnet import efficientnet_b1

In [46]:
model = efficientnet_b0()
model.classifier = Identity()
summary(model, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 112, 112]             864
       BatchNorm2d-2         [-1, 32, 112, 112]              64
              SiLU-3         [-1, 32, 112, 112]               0
            Conv2d-4         [-1, 32, 112, 112]             288
       BatchNorm2d-5         [-1, 32, 112, 112]              64
              SiLU-6         [-1, 32, 112, 112]               0
            Conv2d-7              [-1, 8, 1, 1]             264
              SiLU-8              [-1, 8, 1, 1]               0
            Conv2d-9             [-1, 32, 1, 1]             288
    SqueezeExcite-10         [-1, 32, 112, 112]               0
           Conv2d-11         [-1, 16, 112, 112]             512
      BatchNorm2d-12         [-1, 16, 112, 112]              32
         Identity-13         [-1, 16, 112, 112]               0
DepthwiseSeparableConv-14         [-1, 

In [8]:
def cal_model(model, name):
    input_dim = model.classifier.weight.shape[1]
    output_dim = 128
    for hd in hidden_dims:
        print('{} {}'.format(name, hd))
        model.classifier = MLP(input_dim=input_dim, hidden_dim=hd, output_dim=128)
#         summary(model, (3, 224, 224))
        print(sum([m.numel() for m in model.parameters()])/1024/1024)

In [60]:
cal_model(mobilenetv3_large_100(), 'mob')

mob 0
4.1637420654296875
mob 512
4.6954803466796875
mob 1024
5.3834686279296875
mob 1280
5.7274627685546875
mob 2048
6.7594451904296875


In [61]:
cal_model(efficientnet_b0(), 'effib0')

effib0 0
3.9782676696777344
effib0 512
4.510005950927734
effib0 1024
5.197994232177734
effib0 1280
5.541988372802734
effib0 2048
6.573970794677734


In [62]:
cal_model(efficientnet_b1(), 'effib1')

effib1 0
6.367828369140625
effib1 512
6.899566650390625
effib1 1024
7.587554931640625
effib1 1280
7.931549072265625
effib1 2048
8.963531494140625


In [18]:
cd backbone

/Users/lincolnzjx/Downloads/tencent/acmm/backbone


In [25]:
from swav_resnet50 import resnet50w2

r50w2 = resnet50w2()
print(sum([m.numel() for m in model.parameters()])/1024/1024)

In [21]:
r5

ResNet(
  (padding): ConstantPad2d(padding=(1, 1, 1, 1), value=0.0)
  (conv1): Conv2d(3, 128, kernel_size=(7, 7), stride=(2, 2), padding=(2, 2), bias=False)
  (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Seq

# Effib0, R101,R50

In [14]:
model = efficientnet_b0()
# model.classifier = Identity()
effb0_param = sum([p.numel() for p in model.parameters() if p.requires_grad])
print('effib0', effb0_param/1024/1024)

mob = mobilenetv3_large_100()
mob_param = sum([p.numel() for p in mob.parameters() if p.requires_grad])
print('mob', mob_param/1024/1024)

r101 = models.resnet101()
r101_param = sum([p.numel() for p in r101.parameters() if p.requires_grad])
print('r101', r101_param/1024/1024)

r50 = models.resnet50()
r50_param = sum([p.numel() for p in r50.parameters() if  p.requires_grad])
print('r50', r50_param/1024/1024)

r50 = models.resnet50()
r50_param = sum([p.numel() for p in r50.parameters() if  p.requires_grad])
print('r50', r50_param/1024/1024)

r152 = models.resnet152()
r152_param = sum([p.numel() for p in r152.parameters() if  p.requires_grad])
print('r152', r152_param/1024/1024)

print("Effib0/R152", effb0_param/r152_param)
print("Effib0/R101", effb0_param/r101_param)
print("Effib0/R50", effb0_param/r50_param)

print("Mob/R152", mob_param/r152_param)
print("Mob/R101", mob_param/r101_param)
print("Mob/R50", mob_param/r50_param)

effib0 3.9782676696777344
mob 5.229026794433594
r101 42.485389709472656
r50 24.373085021972656
r50 24.373085021972656
r152 57.404335021972656
Effib0/R152 0.06930256518353488
Effib0/R101 0.09363848835758071
Effib0/R50 0.16322380470470907
Mob/R152 0.0910911482979827
Mob/R101 0.12307823536964557
Mob/R50 0.21454103121207502


In [15]:
r101

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

# ViT

In [5]:
from timm.models import create_model
import collections.abc as container_abcs

## Tiny

In [25]:
model = create_model(
    'vit_tiny_patch16_224',
    pretrained=False,
    num_classes=1000,
    drop_rate=0.0,
    drop_path_rate=0.1,
    drop_block_rate=None,
)

In [26]:
model.norm = Identity()
model.pre_logits = Identity()
model.head = Identity()
print(model)
print(sum([m.numel() for m in model.parameters()])/1024/1024)

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 192, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=192, out_features=576, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=192, out_features=192, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (drop_path): Identity()
      (norm2): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=192, out_features=768, bias=True)
        (act): GELU()
        (fc2): Linear(in_features=768, out_features=192, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
    )
    (1): Block(
      (norm1): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
      (attn): 

## Small

In [27]:
model = create_model(
    'vit_small_patch16_224',
    pretrained=False,
    num_classes=1000,
    drop_rate=0.0,
    drop_path_rate=0.1,
    drop_block_rate=None,
)

In [28]:
print(model)
model.norm = Identity()
model.pre_logits = Identity()
model.head = Identity()
print(model)
print(sum([m.numel() for m in model.parameters()])/1024/1024)

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 384, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=384, out_features=1152, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=384, out_features=384, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (drop_path): Identity()
      (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=384, out_features=1536, bias=True)
        (act): GELU()
        (fc2): Linear(in_features=1536, out_features=384, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
    )
    (1): Block(
      (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (attn

# XCiT

## Tiny

In [6]:
model = create_model(
    'xcit_tiny_12_p16_224',
    pretrained=False,
    num_classes=1000,
    drop_rate=0.0,
    drop_path_rate=0.1,
    drop_block_rate=None,
) 

In [7]:
print(model)
model.norm = Identity()
model.pre_logits = Identity()
model.head = Identity()
print(model)
print(sum([m.numel() for m in model.parameters()])/1024/1024)

XCiT(
  (patch_embed): ConvPatchEmbed(
    (proj): Sequential(
      (0): Sequential(
        (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): GELU()
      (2): Sequential(
        (0): Conv2d(24, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (3): GELU()
      (4): Sequential(
        (0): Conv2d(48, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (5): GELU()
      (6): Sequential(
        (0): Conv2d(96, 192, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
  )
  (pos_embed

## Small

In [9]:
model = create_model(
    'xcit_small_12_p16_224',
    pretrained=False,
    num_classes=1000,
    drop_rate=0.0,
    drop_path_rate=0.1,
    drop_block_rate=None,
) 

In [10]:
print(model)
model.norm = Identity()
model.pre_logits = Identity()
model.head = Identity()
print(model)
print(sum([m.numel() for m in model.parameters()])/1024/1024)

XCiT(
  (patch_embed): ConvPatchEmbed(
    (proj): Sequential(
      (0): Sequential(
        (0): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): GELU()
      (2): Sequential(
        (0): Conv2d(48, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (3): GELU()
      (4): Sequential(
        (0): Conv2d(96, 192, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (5): GELU()
      (6): Sequential(
        (0): Conv2d(192, 384, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
  )
  (pos_em