# Installing open-clip library

In [43]:
%%capture
!pip install open_clip_torch

# Cloning GitHub Repository

In [44]:
%%capture
!git clone https://github.com/kk-digital/kcg-ml.git

# OpenClip Examples

## Imports

In [45]:
import torch
from PIL import Image
import open_clip

## Listing All Pre-Trained Models.

In [46]:
open_clip.list_pretrained()

[('RN50', 'openai'),
 ('RN50', 'yfcc15m'),
 ('RN50', 'cc12m'),
 ('RN50-quickgelu', 'openai'),
 ('RN50-quickgelu', 'yfcc15m'),
 ('RN50-quickgelu', 'cc12m'),
 ('RN101', 'openai'),
 ('RN101', 'yfcc15m'),
 ('RN101-quickgelu', 'openai'),
 ('RN101-quickgelu', 'yfcc15m'),
 ('RN50x4', 'openai'),
 ('RN50x16', 'openai'),
 ('RN50x64', 'openai'),
 ('ViT-B-32', 'openai'),
 ('ViT-B-32', 'laion400m_e31'),
 ('ViT-B-32', 'laion400m_e32'),
 ('ViT-B-32', 'laion2b_e16'),
 ('ViT-B-32', 'laion2b_s34b_b79k'),
 ('ViT-B-32-quickgelu', 'openai'),
 ('ViT-B-32-quickgelu', 'laion400m_e31'),
 ('ViT-B-32-quickgelu', 'laion400m_e32'),
 ('ViT-B-16', 'openai'),
 ('ViT-B-16', 'laion400m_e31'),
 ('ViT-B-16', 'laion400m_e32'),
 ('ViT-B-16', 'laion2b_s34b_b88k'),
 ('ViT-B-16-plus-240', 'laion400m_e31'),
 ('ViT-B-16-plus-240', 'laion400m_e32'),
 ('ViT-L-14', 'openai'),
 ('ViT-L-14', 'laion400m_e31'),
 ('ViT-L-14', 'laion400m_e32'),
 ('ViT-L-14', 'laion2b_s32b_b82k'),
 ('ViT-L-14-336', 'openai'),
 ('ViT-H-14', 'laion2b_s32b_

## Text-Image Matching Example.
Showing the probaility distribution of a list of texts for single image.

In [47]:
MODEL_NAME = 'ViT-L-14'
PRETRAINED = 'laion2b_s32b_b82k'

In [48]:
model, _, preprocess = open_clip.create_model_and_transforms(model_name=MODEL_NAME, pretrained=PRETRAINED)
tokenizer = open_clip.get_tokenizer(MODEL_NAME)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)

In [50]:
image = preprocess(Image.open('./kcg-ml/datasets/test_images/test_image_000.jpeg')).unsqueeze(0)

text = tokenizer(["pixel art", "painting", "digital art"]) # List of texts which will be compared.

with torch.no_grad():
    image_features = model.encode_image(image)
    text_features = model.encode_text(text)
    image_features /= image_features.norm(dim=-1, keepdim=True)
    text_features /= text_features.norm(dim=-1, keepdim=True)

    text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)

print("Label probs:", text_probs)

RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor

## Getting CLIP Image Embeddings for Single Image

In [None]:
with torch.no_grad():
    image = preprocess(Image.open('./kcg-ml/datasets/test_images/test_image_000.jpeg')).unsqueeze(0).to(device)
    emb = model.encode_image(image).cpu().detach().numpy()

print(f"[INFO] CLIP embedding size: {emb.shape}")

## Checking the Similarity Between Two Image Using CLIP

In [None]:
image1 = preprocess(Image.open('./kcg-ml/datasets/test_images/test_image_001.jpeg')).unsqueeze(0).to(device)
image2 = preprocess(Image.open('./kcg-ml/datasets/test_images/test_image_002.jpeg')).unsqueeze(0).to(device)

image_features = model.encode_image(image1)
image_2_features = model.encode_image(image2)

image_features /= image_features.norm(dim=-1, keepdim=True)
image_2_features /= image_2_features.norm(dim=-1, keepdim=True)
similarity = image_2_features.detach() @ image_features.detach().T
print(f'Similarit: {similarity.cpu().detach().numpy()[0][0]:.4f}')

## ClipModel Examples
▶ ClipModel : Module built over OpenClip function, check: https://github.com/kk-digital/kcg-ml/blob/main/examples/ClipTools.py

In [None]:
!pip install patool

In [None]:
import sys
sys.path.insert(0, './kcg-ml/image_classifier_pipeline/')
sys.path.insert(0, './kcg-ml/')

from examples.ClipTools import ClipModel

## Creating an Instance of ClipModel Class

In [None]:
clip_model_instance = ClipModel(clip_model=MODEL_NAME, pretrained=PRETRAINED)

## Downloading Model Method Example.

In [None]:
clip_model_instance.download_model(MODEL_NAME, PRETRAINED)

## Encoding Image File Using It's Path

In [None]:
emb = clip_model_instance.encode_image_from_image_file('./kcg-ml/datasets/test_images/test_image_000.jpeg')
print(f"Embedding vector : {emb}")
print(f"Embedding size : {emb.shape}")

## Encoding Image Byte Object

In [None]:
def image_to_bytes(image_path):
    with open(image_path, 'rb') as image_file:
        bytes_array = bytearray(image_file.read())
    return bytes_array

emb = clip_model_instance.encode_image_from_image_data(image_to_bytes('./kcg-ml/datasets/test_images/test_image_000.jpeg'))
print(f"Embedding vector : {emb}")
print(f"Embedding size : {emb.shape}")

## Encoding a List of Images

In [None]:
emb_list = clip_model_instance.encode_image_list(['./kcg-ml/datasets/test_images/test_image_000.jpeg', './kcg-ml/datasets/test_images/test_image_001.jpeg', './kcg-ml/datasets/test_images/test_image_002.jpeg'])
for emb in emb_list:
  print(f"Embedding vector : {emb}")
  print(f"Embedding size : {emb.shape}")
  print("#"*50)

## Encoding a Dictionary of Zip Files

In [None]:
clip_model_instance.encode_data_directory('/content/kcg-ml/datasets/test_zip_files')