In [1]:
import torch
import clip
from PIL import Image
import os
from tqdm import tqdm

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
model, preprocess = clip.load("ViT-B/32", device=device)

cuda


100%|███████████████████████████████████████| 338M/338M [00:28<00:00, 12.4MiB/s]


In [3]:
@torch.no_grad()
def image_to_vec(image_path: str) -> torch.Tensor:
    img = preprocess(Image.open(image_path).convert("RGB")).unsqueeze(0).to(device)
    feat = model.encode_image(img)              # shape: [1, d]
    feat = feat / feat.norm(dim=-1, keepdim=True)  # normalize for cosine similarity
    return feat.squeeze(0).cpu()      

In [8]:
if os.path.exists("clip_index.pt"):
    print("data exists")
    data = torch.load("clip_index.pt", map_location="cpu")

    paths = data["paths"]
    vecs  = data["vecs"] 
else:
    print("generating data")
    path = "dataset"

    paths = [os.path.join(path, f) for f in os.listdir(path)]
    vecs = torch.stack([image_to_vec(p) for p in tqdm(paths, desc="Embedding images")])

    data = {
        "paths": paths,
        "vecs": vecs,
    }
    torch.save(data, "clip_index.pt")

data exists


In [44]:
scores = vecs @ vecs.T
scores.fill_diagonal_(-1)
vals, idxs = torch.topk(scores, k=6, dim=1)

In [45]:
open('result.csv', 'w').close()
with open("result.csv", "a") as file:
    file.write("filename,ranking\n")
    for i in range(len(paths)):
        query = os.path.basename(paths[i])
        results = [os.path.basename(paths[j]) for j in idxs[i].tolist()]
        file.write(f"{query},{' '.join(results)}\n")