# PyTorch Image Models

GitHub: https://github.com/huggingface/pytorch-image-models

Doc: https://huggingface.co/docs/hub/timm

#### Installation

In [None]:
!pip install timm

In [None]:
import timm
import torch
import torch.nn as nn

#### Statistics

In [None]:
len(timm.list_models("*"))

In [None]:
len(timm.list_models("*", pretrained=True))

In [None]:
timm.list_models("res*", pretrained=True)

#### Usage

In [None]:
MODEL_NAME = "resnet50"

In [None]:
model = timm.create_model(
    MODEL_NAME,
    pretrained=True)

In [None]:
model

In [None]:
model.default_cfg

In [None]:
# inference
inputs = torch.rand(1, 3, 224, 224)
outputs = model(inputs)
outputs.shape

In [None]:
# classifier head
model.fc, model.get_classifier()

Custom num_classes

In [None]:
# get a pretrained model with custom classifier
model = timm.create_model(MODEL_NAME,
    pretrained=True,
    num_classes=3)

In [None]:
# classifier head
model.get_classifier()

Without classifier: embeddings only

In [None]:
# get a pretrained model without classifier
model = timm.create_model(MODEL_NAME,
    pretrained=True,
    num_classes=0)

model.get_classifier()

In [None]:
inputs = torch.rand(1, 3, 224, 224)
outputs = model(inputs)
outputs.shape

#### Custom Classifier

In [None]:
# Get the in_features of classifier (out_feature of encoder)
model = timm.create_model(MODEL_NAME,
    pretrained=True)

num_features = model.get_classifier().in_features
num_features

In [None]:
# replace with custom classifier
model.fc = nn.Sequential(
    nn.Dropout(0.2),
    nn.Linear(num_features, 64),
    nn.LayerNorm(64),
    nn.ReLU(),
    nn.Linear(64, 10),
)

In [None]:
inputs = torch.rand(1, 3, 224, 224)
outputs = model(inputs)
outputs.shape

#### Feature Extraction

In [None]:
# get last layer of features

inputs = torch.rand(1, 3, 224, 224)
outputs = model.forward_features(inputs)
outputs.shape # (N, cout, H, W)

multi-stage features (not supported in all models)

In [None]:
model = timm.create_model(MODEL_NAME,
    pretrained=True,
    features_only=True)

In [None]:
inputs = torch.rand(1, 3, 224, 224)
outputs = model(inputs)

# multi-stage features
for o in outputs:
    print(o.shape)

#### Image Transform

In [None]:
# basic transform
timm.data.create_transform((3, 224, 224))

In [None]:
# Transform from pretrained model
model = timm.create_model(MODEL_NAME,
    pretrained=True)
model.pretrained_cfg

In [None]:
# Data config only
timm.data.resolve_data_config(model.pretrained_cfg)

In [None]:
# create transform from pretrained model
data_cfg = timm.data.resolve_data_config(model.pretrained_cfg)
transform = timm.data.create_transform(**data_cfg)
transform

#### Use pretrained model to predict

In [None]:
import requests
from PIL import Image
from io import BytesIO
url = 'https://datasets-server.huggingface.co/assets/imagenet-1k/--/default/test/12/image/image.jpg'
image = Image.open(requests.get(url, stream=True).raw)
image

In [None]:
# Create model and data transforms
model = timm.create_model(MODEL_NAME,
    pretrained=True).eval()
transform = timm.data.create_transform(
    **timm.data.resolve_data_config(model.pretrained_cfg)
)

In [None]:
# transform image to tensor
image_tensor = transform(image)
image_tensor.shape

In [None]:
# inference
output = model(image_tensor.unsqueeze(0))
output.shape

In [None]:
# logits to probabilities
probabilities = torch.nn.functional.softmax(output[0], dim=0)
probabilities.shape

In [None]:
# get top 5 probalities indices
values, indices = torch.topk(probabilities, 5)
indices

In [None]:
# get imagenet 1000 class names
# mapping class idxs to names
IMAGENET_1k_URL = 'https://storage.googleapis.com/bit_models/ilsvrc2012_wordnet_lemmas.txt'
IMAGENET_1k_LABELS = requests.get(IMAGENET_1k_URL).text.strip().split('\n')
[{'label': IMAGENET_1k_LABELS[idx], 'value': val.item()} for val, idx in zip(values, indices)]