In [2]:
import timm

In [4]:
timm.list_models('efficientnet*', pretrained=True)

['efficientnet_b0.ra_in1k',
 'efficientnet_b1.ft_in1k',
 'efficientnet_b1_pruned.in1k',
 'efficientnet_b2.ra_in1k',
 'efficientnet_b2_pruned.in1k',
 'efficientnet_b3.ra2_in1k',
 'efficientnet_b3_pruned.in1k',
 'efficientnet_b4.ra2_in1k',
 'efficientnet_b5.sw_in12k',
 'efficientnet_b5.sw_in12k_ft_in1k',
 'efficientnet_el.ra_in1k',
 'efficientnet_el_pruned.in1k',
 'efficientnet_em.ra2_in1k',
 'efficientnet_es.ra_in1k',
 'efficientnet_es_pruned.in1k',
 'efficientnet_lite0.ra_in1k',
 'efficientnetv2_rw_m.agc_in1k',
 'efficientnetv2_rw_s.ra2_in1k',
 'efficientnetv2_rw_t.ra2_in1k']

In [6]:
model = timm.create_model('mobilenetv4_hybrid_medium.e500_r224_in1k', pretrained=True)
# model = model.eval()

model.safetensors:   0%|          | 0.00/44.7M [00:00<?, ?B/s]

In [7]:
model

MobileNetV3(
  (conv_stem): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNormAct2d(
    32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
    (drop): Identity()
    (act): ReLU(inplace=True)
  )
  (blocks): Sequential(
    (0): Sequential(
      (0): EdgeResidual(
        (conv_exp): Conv2d(32, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn1): BatchNormAct2d(
          128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU(inplace=True)
        )
        (aa): Identity()
        (se): Identity()
        (conv_pwl): Conv2d(128, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNormAct2d(
          48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
    )
    (1): Sequential(
      (0): Uni

In [5]:
type(model)

timm.models.vision_transformer.VisionTransformer

In [6]:
model.head

Linear(in_features=512, out_features=20, bias=True)

In [19]:
from torch import nn

In [31]:
nn.Softmax(x=nn.Linear(512, 20), dim=1)

TypeError: __init__() got an unexpected keyword argument 'x'

In [32]:
import torch.nn.functional as F

In [33]:
model.head = nn.Sequential(
            nn.Linear(512, 20),
            nn.Softmax(dim=1)
        )

In [7]:
model.train()

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 512, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (patch_drop): Identity()
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=512, out_features=1536, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=512, out_features=512, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): LayerScale()
      (drop_path1): Identity()
      (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=512, out_features=2048, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False)
        (norm): Identit

In [8]:
from urllib.request import urlopen
from PIL import Image

In [9]:
img = Image.open(urlopen(
    'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'
))

In [10]:
data_config = timm.data.resolve_model_data_config(model)
transforms = timm.data.create_transform(**data_config, is_training=True)

In [11]:
output = model(transforms(img).unsqueeze(0))  # output is (batch_size, num_features) shaped tensor

In [12]:
output

tensor([[ 1.1495,  0.4777,  0.0709, -0.1181,  5.0403,  0.8764, -2.2838, -0.7508,
          0.2900, -1.1607,  2.8216, -0.6053,  0.6105, -0.9580, -1.0377, -1.0684,
          0.3275, -0.5722,  0.6820, -0.8778]], grad_fn=<AddmmBackward0>)

In [16]:
output.softmax(dim=1)

tensor([[1.6570e-02, 8.4641e-03, 5.6353e-03, 4.6649e-03, 8.1115e-01, 1.2611e-02,
         5.3492e-04, 2.4778e-03, 7.0155e-03, 1.6446e-03, 8.8215e-02, 2.8658e-03,
         9.6659e-03, 2.0140e-03, 1.8598e-03, 1.8035e-03, 7.2838e-03, 2.9622e-03,
         1.0383e-02, 2.1823e-03]], grad_fn=<SoftmaxBackward0>)

In [19]:
import torch.optim as optim
from torch import nn

In [18]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [22]:
criterion = nn.CrossEntropyLoss()

In [25]:
import torch

In [27]:
import numpy as np

In [28]:
loss = criterion(output, torch.from_numpy(np.array([4])))

In [29]:
loss

tensor(0.2093, grad_fn=<NllLossBackward0>)

In [23]:
transforms

Compose(
    RandomResizedCropAndInterpolation(size=(256, 256), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=bicubic)
    RandomHorizontalFlip(p=0.5)
    ColorJitter(brightness=(0.6, 1.4), contrast=(0.6, 1.4), saturation=(0.6, 1.4), hue=None)
    ToTensor()
    Normalize(mean=tensor([0.5000, 0.5000, 0.5000]), std=tensor([0.5000, 0.5000, 0.5000]))
)

In [24]:
from torchvision import transforms

In [25]:
data_config = timm.data.resolve_model_data_config(model)
transforms1 = timm.data.create_transform(**data_config, is_training=True)

In [26]:
transforms1

Compose(
    RandomResizedCropAndInterpolation(size=(256, 256), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=bicubic)
    RandomHorizontalFlip(p=0.5)
    ColorJitter(brightness=(0.6, 1.4), contrast=(0.6, 1.4), saturation=(0.6, 1.4), hue=None)
    ToTensor()
    Normalize(mean=tensor([0.5000, 0.5000, 0.5000]), std=tensor([0.5000, 0.5000, 0.5000]))
)

In [27]:
transforms.Compose([transforms1])

Compose(
    Compose(
    RandomResizedCropAndInterpolation(size=(256, 256), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=bicubic)
    RandomHorizontalFlip(p=0.5)
    ColorJitter(brightness=(0.6, 1.4), contrast=(0.6, 1.4), saturation=(0.6, 1.4), hue=None)
    ToTensor()
    Normalize(mean=tensor([0.5000, 0.5000, 0.5000]), std=tensor([0.5000, 0.5000, 0.5000]))
)
)

In [3]:
from torchvision.models import efficientnet_b6, EfficientNet_B6_Weights

In [4]:
model = efficientnet_b6(weights=EfficientNet_B6_Weights.IMAGENET1K_V1)

Downloading: "https://download.pytorch.org/models/efficientnet_b6_lukemelas-c76e70fd.pth" to /home/dudu/.cache/torch/hub/checkpoints/efficientnet_b6_lukemelas-c76e70fd.pth
100%|████████████████████████████████████████████████████████████████████████████████| 165M/165M [00:07<00:00, 23.0MB/s]


In [5]:
model

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 56, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(56, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(56, 56, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=56, bias=False)
            (1): BatchNorm2d(56, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(56, 14, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(14, 56, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormAct

In [9]:
model.classifier[1].in_features

2304