In [2]:
# ==== Cell 1: Install ====
!pip install git+https://github.com/facebookresearch/ImageBind.git
!pip install timm torch torchvision scikit-learn

# ==== Cell 2: Imports ====
import os
import glob
from PIL import Image

import torch
from imagebind.models import imagebind_model
from imagebind.models.imagebind_model import ModalityType
from imagebind import data as imagebind_data

from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score


# ==== Cell 3: Load images from a folder ====
# Put your images in /content/images/*.jpg
image_dir = "/content/images"
os.makedirs(image_dir, exist_ok=True)

from google.colab import files
uploaded = files.upload()   # select a few .jpg/.png images

for fname in uploaded.keys():
    os.replace(fname, os.path.join(image_dir, fname))

print("Uploaded images:", os.listdir(image_dir))


# ==== Cell 4: Load ImageBind model ====
image_dir = "/content/images"  # change if needed

image_paths = sorted(
    glob.glob(os.path.join(image_dir, "*.jpg")) +
    glob.glob(os.path.join(image_dir, "*.jpeg")) +
    glob.glob(os.path.join(image_dir, "*.png"))
)

print("Found", len(image_paths), "images")
if len(image_paths) == 0:
    raise ValueError("No images found. Put some .jpg/.png files into /content/images first.")


# ==== Cell 5: Compute embeddings ====
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

model = imagebind_model.imagebind_huge(pretrained=True)
model.eval()
model.to(device)

# ==== Cell 6: Cluster embeddings ====
def load_image(path):
    return Image.open(path).convert("RGB")

# (optional) quick check
_ = [load_image(p).resize((64, 64)) for p in image_paths[:2]]

# Use the *data* helper, not imagebind_model
vision_inputs = imagebind_data.load_and_transform_vision_data(
    image_paths,
    device,
)

with torch.no_grad():
    outputs = model({ModalityType.VISION: vision_inputs})

image_emb = outputs[ModalityType.VISION].cpu().numpy()
print("Embeddings shape:", image_emb.shape)


# ==== Cell 7: Show images per cluster (first few) ====
# choose k based on how many distinct image types you have
k = 3 if len(image_paths) >= 3 else max(1, len(image_paths))

kmeans = KMeans(n_clusters=k, random_state=42)
labels = kmeans.fit_predict(image_emb)

if len(set(labels)) > 1:
    sil = silhouette_score(image_emb, labels)
    print(f"Silhouette score: {sil:.3f}")

for c in range(k):
    print(f"\nCluster {c}:")
    for path, lab in zip(image_paths, labels):
        if lab == c:
            print("  ", os.path.basename(path))



Collecting git+https://github.com/facebookresearch/ImageBind.git
  Cloning https://github.com/facebookresearch/ImageBind.git to /tmp/pip-req-build-4sqvpfft
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/ImageBind.git /tmp/pip-req-build-4sqvpfft
  Resolved https://github.com/facebookresearch/ImageBind.git to commit 53680b02d7e37b19b124fa37bae4b6c98c38f5be
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pytorchvideo@ git+https://github.com/facebookresearch/pytorchvideo.git@6cdc929315aab1b5674b6dcf73b16ec99147735f (from imagebind==0.1.0)
  Using cached pytorchvideo-0.1.5-py3-none-any.whl


Saving Application UI.jpeg to Application UI.jpeg
Saving closeup-scarlet-macaw-from-side-view-scarlet-macaw-closeup-head.jpg to closeup-scarlet-macaw-from-side-view-scarlet-macaw-closeup-head.jpg
Saving fried-chicken-breast-cheese-tomato-french-fries-ketchup-green-salad-side-view-jpg.jpg to fried-chicken-breast-cheese-tomato-french-fries-ketchup-green-salad-side-view-jpg.jpg
Saving Kubernetes Running State.jpeg to Kubernetes Running State.jpeg
Saving RAG.png to RAG.png
Uploaded images: ['Kubernetes Running State.jpeg', 'fried-chicken-breast-cheese-tomato-french-fries-ketchup-green-salad-side-view-jpg.jpg', 'RAG.png', 'Application UI.jpeg', 'closeup-scarlet-macaw-from-side-view-scarlet-macaw-closeup-head.jpg']
Found 5 images
Using device: cuda
Embeddings shape: (5, 1024)
Silhouette score: 0.032

Cluster 0:
   Kubernetes Running State.jpeg
   RAG.png

Cluster 1:
   closeup-scarlet-macaw-from-side-view-scarlet-macaw-closeup-head.jpg
   fried-chicken-breast-cheese-tomato-french-fries-ketch