In [1]:
import os
from pathlib import Path
from dotenv import load_dotenv

In [2]:
BASE_DIR =Path().resolve()

In [3]:
images_root = BASE_DIR/"images"
images_root

WindowsPath('D:/Semantic-Image_Search/semantic_image_search/notebooks/images')

In [4]:
Model_id = "ViT-B-32__laion2b-s34b-b79k"

In [13]:
load_dotenv()

True

In [6]:
from langchain_experimental.open_clip import OpenCLIPEmbeddings
embedder = OpenCLIPEmbeddings(model = Model_id, device = "cpu")

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [8]:
img_embedding = embedder.embed_image([str(images_root/"animal"/"cat.jpeg")])

In [9]:
len(img_embedding[0])

1024

In [14]:
url = os.getenv('API_ENDPOINT')
url

'https://a998d728-32b1-41bc-b8dc-e0ff42436d72.us-east-1-1.aws.cloud.qdrant.io:6333'

In [15]:
api_key = os.getenv("QDRANT_API_KEY")
api_key

'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.ueVVrH-Yo1-SsQr4VoIELs8nKdOJnGoI0_60yWANTQU'

In [16]:
from qdrant_client import QdrantClient
qdrant_client = QdrantClient(url=url,api_key=api_key)

In [20]:
collections = qdrant_client.get_collections().collections
collections

[]

In [21]:
COLLECTION_NAME ="semantic-image-search"
VECTOR_SIZE = 1024

In [22]:
from qdrant_client.http import models

In [None]:
qdrant_client.create_collection(
    collection_name= COLLECTION_NAME,
    vectors_config = models.VectorParams(
         size = VECTOR_SIZE,
         distance = models.Distance.COSINE
        )
)

In [29]:
collections = qdrant_client.get_collections().collections
existing_names = {c.name for c in collections}
existing_names

{'semantic-image-search'}

In [37]:
if COLLECTION_NAME not in existing_names:
    print(f"Creating collection: {COLLECTION_NAME}")
    qdrant_client.recreate_collection(
    collection_name= COLLECTION_NAME,
    vectors_config = models.VectorParams(
         size = VECTOR_SIZE,
         distance = models.Distance.COSINE
        )
    )
else:
    print(f"Collection already exists: {COLLECTION_NAME} (reusing)")

Collection already exists: semantic-image-search (reusing)


In [32]:
import numpy as np
import os
from PIL import Image
from uuid import uuid4

In [38]:
def index_image(image_path, category =None):
    img_embed =  embedder.embed_image([image_path])[0]
    emb = np.array(img_embed).tolist()
    
    payload = {
        "filename":os.path.basename(image_path),
        "path":image_path,
        "category":category
    }
    
    qdrant_client.upsert(
        collection_name=COLLECTION_NAME,
        points=[
            models.PointStruct(
                id = str(uuid4()),
                vector=emb,
                payload=payload
            )
        ]
    )
    print(f"Indexed → {image_path}")

In [None]:
cat_image_path = str(images_root/"animal"/"cat.jpeg")
cat_image_path

'D:\\Semantic-Image_Search\\semantic_image_search\\notebooks\\images\\animal\\cat.jpeg'

In [40]:
index_image(cat_image_path, category="animal")

Indexed → D:\Semantic-Image_Search\semantic_image_search\notebooks\images\animal\cat.jpeg


In [41]:
def index_folder(root_folder):
    exts = (".jpg",".jpeg", ".png", ".webp")
    for dir_path, _,files in os.walk(root_folder):
        category = os.path.basename(dir_path)
        for f in files:
            if f.lower().endswith(exts):
                img_path = os.path.join(dir_path, f)
                index_image(img_path, category=category)

In [42]:
index_folder("images")

Indexed → images\animal\cat.jpeg
Indexed → images\animal\crocodile.jpeg
Indexed → images\animal\crocodile_1.png
Indexed → images\animal\dog.jpeg
Indexed → images\animal\elephant.jpeg
Indexed → images\animal\giraffe.webp
Indexed → images\animal\horse.webp
Indexed → images\animal\lion.jpeg
Indexed → images\animal\panda.jpg
Indexed → images\animal\tiger.jpeg
Indexed → images\animal\zebra.jpeg
Indexed → images\flower\lavender.jpeg
Indexed → images\flower\lily.jpeg
Indexed → images\flower\lotus.jpg
Indexed → images\flower\marigold.jpeg
Indexed → images\flower\rose.jpg
Indexed → images\flower\sunflower.jpeg
Indexed → images\flower\tulip.webp
Indexed → images\furniture\table.jpeg
Indexed → images\general\bottle.jpeg
Indexed → images\general\car.webp
Indexed → images\general\chair.jpeg
Indexed → images\general\cycle.webp
Indexed → images\general\laptop.jpeg
Indexed → images\general\pen.webp
Indexed → images\general\phone.jpeg
Indexed → images\general\table.jpeg
Indexed → images\uncategorized\a

## Retrieval

#### Text-to-Image

In [43]:
def search_text(query, k:5):
    results = qdrant_client.query_points(
        collection_name=COLLECTION_NAME,
        query=query,
        limit =k,
        with_payload=True
    )
    return results

In [44]:
query = " image of a cat with angry face"

In [47]:
results = search_text(embedder.embed_query(query),k=3)

In [48]:
for point in results.points:
    print(point.payload, "score =", point.score)

{'filename': 'cat.jpeg', 'path': 'D:\\Semantic-Image_Search\\semantic_image_search\\notebooks\\images\\animal\\cat.jpeg', 'category': 'animal'} score = 0.2748592
{'filename': 'cat.jpeg', 'path': 'images\\animal\\cat.jpeg', 'category': 'animal'} score = 0.2748592
{'filename': 'tiger.jpeg', 'path': 'images\\animal\\tiger.jpeg', 'category': 'animal'} score = 0.19327226


In [49]:
results = search_text(embedder.embed_query("active crocodile"),k=3)
for point in results.points:
    print(point.payload, "score =", point.score)

{'filename': 'crocodile_1.png', 'path': 'images\\uncategorized\\crocodile_1.png', 'category': 'uncategorized'} score = 0.31047267
{'filename': 'crocodile_1.png', 'path': 'images\\animal\\crocodile_1.png', 'category': 'animal'} score = 0.31047267
{'filename': 'crocodile_1.png', 'path': 'images\\weapon\\crocodile_1.png', 'category': 'weapon'} score = 0.31047267


In [50]:
results = search_text(embedder.embed_query("YELLOW flower"),k=3)
for point in results.points:
    print(point.payload, "score =", point.score)

{'filename': 'sunflower.jpeg', 'path': 'images\\flower\\sunflower.jpeg', 'category': 'flower'} score = 0.24796422
{'filename': 'marigold.jpeg', 'path': 'images\\flower\\marigold.jpeg', 'category': 'flower'} score = 0.24039708
{'filename': 'lily.jpeg', 'path': 'images\\flower\\lily.jpeg', 'category': 'flower'} score = 0.15264145


#### Image-to-Image

In [51]:
def search_by_image(image_path, k=5):
    emb = embedder.embed_image([image_path])[0]
    results = qdrant_client.query_points(
        collection_name=COLLECTION_NAME,
        query = emb,
        limit = k,
        with_payload = True
    )
    return results

In [52]:
cat_image_path = str(images_root/"animal"/"cat.jpeg")
cat_image_path

'D:\\Semantic-Image_Search\\semantic_image_search\\notebooks\\images\\animal\\cat.jpeg'

In [54]:
query_path = cat_image_path

In [55]:
results = search_by_image(query_path, k=3)

In [57]:
for point in results.points:
    print(point.payload, "score=", point.score)

{'filename': 'cat.jpeg', 'path': 'D:\\Semantic-Image_Search\\semantic_image_search\\notebooks\\images\\animal\\cat.jpeg', 'category': 'animal'} score= 1.0000002
{'filename': 'cat.jpeg', 'path': 'images\\animal\\cat.jpeg', 'category': 'animal'} score= 1.0000002
{'filename': 'tiger.jpeg', 'path': 'images\\animal\\tiger.jpeg', 'category': 'animal'} score= 0.59761214
