## Precompute CLIP embeddings on the THINGS dataset

In [1]:
import torch
import open_clip
from PIL import Image
from torchvision import transforms
import os
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


### CLIP

In [None]:
MODEL_NAME = "ViT-L-14"
PRETRAINED_DATASET = "openai"  # Use "laion2b_s32b_b82k" for OpenCLIP versions

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = open_clip.create_model_and_transforms(MODEL_NAME, pretrained=PRETRAINED_DATASET)
model.to(device)
model.eval()

### DATA

In [None]:
IMAGE_DIR = "path/to/THINGS_dataset"
OUTPUT_FILE = "things_clip_embeddings.pt"

In [None]:
def get_image_embedding(image_path):
    try:
        image = Image.open(image_path).convert("RGB")
        image = preprocess(image).unsqueeze(0).to(device)  # Preprocess and add batch dimension
        with torch.no_grad():
            embedding = model.encode_image(image).cpu()
        return embedding
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return None

### PROCESS ALL IMAGES

In [None]:
embeddings = {}
image_files = [f for f in os.listdir(IMAGE_DIR) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

for img_file in tqdm(image_files, desc="Extracting embeddings"):
    img_path = os.path.join(IMAGE_DIR, img_file)
    embedding = get_image_embedding(img_path)
    if embedding is not None:
        embeddings[img_file] = embedding

# 6. Save Embeddings to Disk
torch.save(embeddings, OUTPUT_FILE)
print(f"Saved {len(embeddings)} embeddings to {OUTPUT_FILE}")