## In this notebook we create embeddings of images in our fashion dataset using CLIP model and store those in a Qdrant cloud vector database. The database is then accessed to perform text-to-image search

In [None]:
#!pip install qdrant-client

#!pip install clip

#!pip install transformers

#!pip install protobuf==3.20.0

In [38]:
import os
import torch
import numpy as np
import pandas as pd
from PIL import Image
import IPython.display
import matplotlib.pyplot as plt
from transformers import CLIPProcessor, CLIPModel, CLIPTokenizer
import clip

### Create Qdrant client

In [39]:
from qdrant_client import QdrantClient

# Connect to qdrant client
try:
    client = QdrantClient(
        url="",
        api_key="your_api_key",
        timeout=None
    )
    print("client created successfully")

except QuadrantAPIError as e:
    # Handle API errors, if any
    print("Error creating Quadrant client:", e)
except Exception as e:
    # Handle any other unexpected errors
    print("An unexpected error occurred:", e)

client created successfully


### Create dataframe of articles consisting of their respective ids, captions and image paths

In [5]:
articles=pd.read_csv("C:/Users/dell/Desktop/selected_articles.csv",index_col=0)

In [6]:
# Adding the missing 0 to the article id and converting it to string as per the original dataset
articles['article_id'] = articles['article_id'].astype(str)
articles['article_id']=["0"+x for x in articles['article_id']]

In [7]:
df = articles[['article_id','caption']]

In [8]:
files_1 = os.listdir("C:/Users/dell/Desktop/selected_images")
files_2 = os.listdir("C:/Users/dell/Desktop/selected_images_1")

In [None]:
id_to_path_mapping_1 = {file.split('.')[0]: file for file in files_1}
id_to_path_mapping_2 = {file.split('.')[0]: file for file in files_2}
id_to_path_mapping_1.update(id_to_path_mapping_2)
df['image_path'] = df['article_id'].map(id_to_path_mapping_1)

del id_to_path_mapping_1
del id_to_path_mapping_2

In [10]:
df = df.dropna()
df.reset_index(inplace=True)
df = df.drop(['index'],axis=1)

### Preprocess data to create image embeddings using CLIP model

In [14]:
# Function to extract image from the given image path as PIL object
def get_image(image_path):
    if image_path in files_1:
        image = Image.open('C:/Users/dell/Desktop/selected_images/'+image_path)

    elif image_path in files_2:
        image = Image.open('C:/Users/dell/Desktop/selected_images_1/'+image_path)

    return image

In [15]:
# Function to get CLIP model, processor and tokenizer
def get_model_info(model_ID, device):

    model = CLIPModel.from_pretrained(model_ID).to(device)
    processor = CLIPProcessor.from_pretrained(model_ID)
    tokenizer = CLIPTokenizer.from_pretrained(model_ID)

    #return model, processor, tokenizer
    return model,processor, tokenizer

In [None]:
# Get CLIP model, processor and tokenizer
device = "cuda" if torch.cuda.is_available() else "cpu"
model_ID = "openai/clip-vit-base-patch32"

model,processor, tokenizer = get_model_info(model_ID, device)

In [19]:
from qdrant_client.http import models

# Get image embeddings and create records to be added into the collection
records = []
for idx, row in df.iterrows():
    image_encode = processor(text=None, images=get_image(row['image_path']), return_tensors="pt", padding=True)['pixel_values']
    img_embds = model.get_image_features(image_encode)
    img_embds = img_embds.detach().numpy().tolist()[0]
    records.append(models.Record(id=idx, vector=img_embds, payload={"caption": row['caption']}))

In [20]:
len(records)

54914

### Create a collection in Qdrant

Code commented to prevent modification

In [21]:
# # Create a collection
# from qdrant_client.http.models import VectorParams, Distance

# # Create a new collection
# client.create_collection(
#    collection_name="image_embeddings_by_original_clip",
#    vectors_config={
#        "image": VectorParams(
#            size=512,
#            distance=Distance.COSINE,
#        ),
#    }
# )

True

### Insert data into collection

Code commented to prevent modification

In [22]:
# batch_size=100
# batches=np.array_split(records, len(records) / batch_size)

In [37]:
# # Insert points into collection in batches
# from qdrant_client.http.models import PointStruct

# for batch in batches:
#     client.upsert(
#         collection_name="image_embeddings_by_original_clip",
#         points=models.Batch(
#                 ids=[record.id for record in batch],
#                 vectors= {"image":[record.vector for record in batch]},
#                 payloads=[record.payload for record in batch]
#             ),
#     )

In [40]:
# Get collection info
client.get_collection(collection_name="image_embeddings_by_original_clip")

CollectionInfo(status=<CollectionStatus.GREEN: 'green'>, optimizer_status=<OptimizersStatusOneOf.OK: 'ok'>, vectors_count=54914, indexed_vectors_count=50514, points_count=54914, segments_count=2, config=CollectionConfig(params=CollectionParams(vectors={'image': VectorParams(size=512, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=None)}, shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=None, indexing_threshold=20000, flush_interval_sec=5, max_optimization_threads=1), wal_config=WalConfig(wal_capacity_mb=32, wal_segments_ahead=0), quantization_config=None), p

### Perform text-to-image search

In [41]:
# Create input text(caption) embedding and pass it as a query vector to perform search
text="blue shirt"
inp = tokenizer(text, return_tensors="pt")
text_embeddings = model.get_text_features(**inp).cpu().detach().numpy().tolist()[0]
hits = client.search(
    collection_name="image_embeddings_by_original_clip",
    query_vector=("image",text_embeddings),
    limit=5,
)

In [44]:
# Search results
hits

[ScoredPoint(id=50547, version=506, score=0.29783112, payload={'caption': 'womens Denim Blue Shirt, Blouse in soft, washed Tencel™ lyocell denim with a collar, buttons down the front and short, voluminous puff sleeves with narrow cuffs and covered buttons.'}, vector=None, shard_key=None),
 ScoredPoint(id=38193, version=382, score=0.29590243, payload={'caption': 'mens Solid Dark Blue Shirt, Straight-cut, short-sleeved shirt in a cotton weave with a turn-down collar and classic front. Yoke with a pleat and hanger at the back, and an open chest pocket.'}, vector=None, shard_key=None),
 ScoredPoint(id=14539, version=146, score=0.2925178, payload={'caption': 'mens Treatment Dark Blue T-shirt, Wide T-shirt in cotton jersey with gently dropped shoulders and a chest pocket.'}, vector=None, shard_key=None),
 ScoredPoint(id=19144, version=192, score=0.29228914, payload={'caption': 'mens Melange Blue T-shirt, Short-sleeved sports top in fast-drying functional fabric. Some of the polyester content