<a href="https://colab.research.google.com/github/TensorCruncher/animal-image-search/blob/main/embeddings.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Mount Drive, get image paths

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
from pathlib import Path

root_dir = Path("/content/drive/MyDrive/animals")

image_paths = sorted(list(root_dir.glob("*/*.jpg")))

image_paths = [str(p) for p in image_paths]

In [3]:
image_paths[:10]

['/content/drive/MyDrive/animals/antelope/02f4b3be2d.jpg',
 '/content/drive/MyDrive/animals/antelope/03d7fc0888.jpg',
 '/content/drive/MyDrive/animals/antelope/058fa9a60f.jpg',
 '/content/drive/MyDrive/animals/antelope/0a37838e99.jpg',
 '/content/drive/MyDrive/animals/antelope/0b1a3af197.jpg',
 '/content/drive/MyDrive/animals/antelope/0b688923b0.jpg',
 '/content/drive/MyDrive/animals/antelope/0c16ef86c0.jpg',
 '/content/drive/MyDrive/animals/antelope/0e17715606.jpg',
 '/content/drive/MyDrive/animals/antelope/0ee903ea13.jpg',
 '/content/drive/MyDrive/animals/antelope/0fb2e9aa81.jpg']

In [4]:
len(image_paths)

5400

# Create Embeddings

In [5]:
!pip install open-clip-torch torchvision -q

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m34.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m124.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m95.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m58.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [6]:
import torch
import open_clip
import numpy as np

from PIL import Image
from tqdm import tqdm
from google.colab import files

In [7]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [8]:
model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='laion2b_s34b_b79k')

open_clip_model.safetensors:   0%|          | 0.00/605M [00:00<?, ?B/s]

In [9]:
from tqdm import tqdm

batch_size = 64
image_embeddings_list = []

model.to(device)
model.eval()

with torch.no_grad():
    for i in tqdm(range(0, len(image_paths), batch_size)):
        batch_paths = image_paths[i:i+batch_size]

        batch_tensors = []
        for p in batch_paths:
            img = Image.open(p).convert("RGB")
            tensor = preprocess(img).unsqueeze(0)
            batch_tensors.append(tensor)

        image_input = torch.cat(batch_tensors, dim=0).to(device)
        batch_embeddings = model.encode_image(image_input)
        batch_embeddings = batch_embeddings / batch_embeddings.norm(dim=-1, keepdim=True)

        image_embeddings_list.append(batch_embeddings.cpu())

image_embeddings = torch.cat(image_embeddings_list, dim=0)
image_embeddings_np = image_embeddings.numpy()


100%|██████████| 85/85 [37:03<00:00, 26.16s/it]


In [12]:
np.save("image_embeddings.npy", image_embeddings_np)
files.download("image_embeddings.npy")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>