In [1]:
import torch
import timm
import torch.nn as nn
from thop import profile
from torchvision.models import resnet50
import open_clip
from transformers import CLIPModel

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
input1 = torch.randn(1, 3, 224, 224) 

In [5]:
# for resnet50
resnet = resnet50()
flops, params = profile(resnet, inputs=(input1, )) #vision_model
print('FLOPs = ' + str(flops/1000**3) + 'G')
print('Params = ' + str(params/1000**2) + 'M')

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_adap_avgpool() for <class 'torch.nn.modules.pooling.AdaptiveAvgPool2d'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
FLOPs = 4.133742592G
Params = 25.557032M


In [3]:
#for dinov2
dinov2 = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitb14')
dinov2.head = nn.Identity()
flops, params = profile(dinov2, inputs=(input1, )) #vision_model
print('FLOPs = ' + str(flops/1000**3) + 'G')
print('Params = ' + str(params/1000**2) + 'M')

Using cache found in /home/ligaojie/.cache/torch/hub/facebookresearch_dinov2_main


[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.normalization.LayerNorm'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
FLOPs = 21.963549696G
Params = 85.508352M


In [4]:
#for clip
clip, _ ,preprocess_val = open_clip.create_model_and_transforms('ViT-B-16', pretrained='laion2b_s34b_b88k')
clip_visual = clip.visual
flops, params = profile(clip_visual, inputs=(input1, )) #vision_model
print('FLOPs = ' + str(flops/1000**3) + 'G')
print('Params = ' + str(params/1000**2) + 'M')

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
FLOPs = 11.270356992G
Params = 57.259008M


In [6]:
#for biomedclip
biomedclip, _, _ = open_clip.create_model_and_transforms('hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224')
biomedclip = biomedclip.visual
flops, params = profile(biomedclip, inputs=(input1, )) #vision_model
print('FLOPs = ' + str(flops/1000**3) + 'G')
print('Params = ' + str(params/1000**2) + 'M')

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.normalization.LayerNorm'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
FLOPs = 16.863255552G
Params = 86.039808M


In [7]:
#for plip
plip = CLIPModel.from_pretrained("vinid/plip")
plip = plip.vision_model
flops, params = profile(plip, inputs=(input1, )) #vision_model
print('FLOPs = ' + str(flops/1000**3) + 'G')
print('Params = ' + str(params/1000**2) + 'M')

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.normalization.LayerNorm'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
FLOPs = 4.366181376G
Params = 87.416832M


In [5]:
#for gigapath
#gigapath = timm.create_model("hf_hub:prov-gigapath/prov-gigapath", pretrained=True)