In [1]:
#@title Importing open_clip
try : 
  import open_clip
except ImportError:
  !pip install open_clip_torch
  import open_clip

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting open_clip_torch
  Downloading open_clip_torch-2.0.2-py3-none-any.whl (1.4 MB)
[K     |████████████████████████████████| 1.4 MB 2.1 MB/s 
Collecting ftfy
  Downloading ftfy-6.1.1-py3-none-any.whl (53 kB)
[K     |████████████████████████████████| 53 kB 1.0 MB/s 
Collecting huggingface-hub
  Downloading huggingface_hub-0.10.0-py3-none-any.whl (163 kB)
[K     |████████████████████████████████| 163 kB 56.0 MB/s 
Installing collected packages: huggingface-hub, ftfy, open-clip-torch
Successfully installed ftfy-6.1.1 huggingface-hub-0.10.0 open-clip-torch-2.0.2


In [4]:
#@title Other Imports

# Handling all importing error 
try:
  import torch
except ImportError:
  !pip install torch>=1.9.0
  import torch

try:
  import PIL
  from PIL import Image
except:
  !pip install pillow
  import PIL
  from PIL import Image

In [5]:
#@title Available CLIP Models
# prints all the avaialble clip models
# Choose one of these models :: model name and pretrained 
open_clip.list_pretrained()

[('RN50', 'openai'),
 ('RN50', 'yfcc15m'),
 ('RN50', 'cc12m'),
 ('RN50-quickgelu', 'openai'),
 ('RN50-quickgelu', 'yfcc15m'),
 ('RN50-quickgelu', 'cc12m'),
 ('RN101', 'openai'),
 ('RN101', 'yfcc15m'),
 ('RN101-quickgelu', 'openai'),
 ('RN101-quickgelu', 'yfcc15m'),
 ('RN50x4', 'openai'),
 ('RN50x16', 'openai'),
 ('RN50x64', 'openai'),
 ('ViT-B-32', 'openai'),
 ('ViT-B-32', 'laion400m_e31'),
 ('ViT-B-32', 'laion400m_e32'),
 ('ViT-B-32', 'laion2b_e16'),
 ('ViT-B-32', 'laion2b_s34b_b79k'),
 ('ViT-B-32-quickgelu', 'openai'),
 ('ViT-B-32-quickgelu', 'laion400m_e31'),
 ('ViT-B-32-quickgelu', 'laion400m_e32'),
 ('ViT-B-16', 'openai'),
 ('ViT-B-16', 'laion400m_e31'),
 ('ViT-B-16', 'laion400m_e32'),
 ('ViT-B-16-plus-240', 'laion400m_e31'),
 ('ViT-B-16-plus-240', 'laion400m_e32'),
 ('ViT-L-14', 'openai'),
 ('ViT-L-14', 'laion400m_e31'),
 ('ViT-L-14', 'laion400m_e32'),
 ('ViT-L-14', 'laion2b_s32b_b82k'),
 ('ViT-L-14-336', 'openai'),
 ('ViT-H-14', 'laion2b_s32b_b79k'),
 ('ViT-g-14', 'laion2b_s12b_

In [6]:
#@title Loading CLIP model
model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32',pretrained='openai')

100%|███████████████████████████████████████| 354M/354M [00:03<00:00, 92.0MiB/s]


In [29]:
#@title Run Example
import pathlib

image_path = pathlib.Path.cwd().parent / 'images' / 'download_blue.png'
txt_list = ["pixel art" , "photography"]

image = preprocess(Image.open(image_path)).unsqueeze(0)
text = open_clip.tokenize(txt_list)

with torch.no_grad(), torch.cuda.amp.autocast():
    image_features = model.encode_image(image)
    text_features = model.encode_text(text)
    image_features /= image_features.norm(dim=-1, keepdim=True)
    text_features /= text_features.norm(dim=-1, keepdim=True)
    # .T for transpose 
    # @ : for matrix multiplication 
    # Calculating cosing similarity between text and image features 
    text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)
    # Getting the top similar image text feature to the image using .argmax()
    # which outputs the index of the maximum value in the text_probs tensor 
    print(f'Matched Text :: { txt_list[text_probs.argmax()] } ')

print("Label probs:", text_probs)  # prints: [[1., 0., 0.]]

Matched Text :: pixel art 
Label probs: tensor([[1.0000e+00, 1.2521e-07]])
