In [None]:
!pip install -qU pymongo voyageai google-genai sentence-transformers PyMuPDF Pillow tqdm tenacity


In [None]:
!pip install google-cloud-storage==2.10.0

In [None]:
import getpass
import os


In [None]:
from pymongo import MongoClient
import urllib.parse

username = urllib.parse.quote_plus("")#write your own
password = urllib.parse.quote_plus("")#write your own

MONGODB_URI = f"mongodb+srv://{username}:{password}@ragappcluster1.blhstjs.mongodb.net/?retryWrites=true&w=majority&appName=ragappcluster1"

client = MongoClient(MONGODB_URI)
db = client["mongo_rag_app"]   # database name
collection = db["ragappcluster1"]  # collection name

In [None]:
os.environ["VOYAGE_API_KEY"] = getpass.getpass("Enter your Voyage AI API key: ")

In [None]:
GEMINI_API_KEY = getpass.getpass("Enter your Gemini API key: ")

In [None]:
from io import BytesIO

import pymupdf
import requests

In [None]:
# Download the DeepSeek paper
response = requests.get("https://arxiv.org/pdf/2501.12948")
if response.status_code != 200:
    raise ValueError(f"Failed to download PDF. Status code: {response.status_code}")
# Load the response as an in-memory file-like object
pdf_stream = BytesIO(response.content)
# Open the object as a PDF document
pdf = pymupdf.open(stream=pdf_stream, filetype="pdf")

In [None]:
def upload_image_to_gcs(key: str, data: bytes) -> None:
    """
    Upload image to MongoDB.

    Args:
        key (str): Unique identifier for the image.
        data (bytes): Image bytes to upload.
    """
    collection.insert_one({
        "key": key,
        "image": data
    })


In [None]:
docs = []

In [None]:
from tqdm import tqdm

In [None]:
collection_name = db['ragappcluster1']
collection_name.delete_many({})
print("Collection cleared!")

In [None]:
zoom = 3.0
mat = pymupdf.Matrix(zoom, zoom)
# Iterate through the pages of the PDF
for n in tqdm(range(pdf.page_count)):
    temp = {}
    # Render the PDF as an image
    pix = pdf[n].get_pixmap(matrix=mat)
    # Convert the image to in-memory bytes in PNG format
    img_bytes = pix.tobytes("png")

    mongo_key = f"multimodal-rag/{n+1}.png"
    # Save the image bytes to MongoDB
    upload_image_to_gcs(mongo_key, img_bytes)
    # Extract some image metadata
    temp["image"] = img_bytes
    temp["mongo_key"] = mongo_key
    temp["width"] = pix.width
    temp["height"] = pix.height
    docs.append(temp)


In [None]:
from PIL import Image
import io
from IPython.display import display

# Fetch first 3 documents from MongoDB
for doc in collection.find({}, {"image": 1, "_id": 0}).limit(15):
    img_bytes = doc["image"]  # Get the stored image bytes
    img = Image.open(io.BytesIO(img_bytes))  # Convert bytes to an image
    display(img)  # Show directly inside the notebook


In [None]:
from typing import List

from PIL import Image
from sentence_transformers import SentenceTransformer
from voyageai import Client

In [None]:
# Instantiate the Voyage AI client
voyageai_client = Client()

In [None]:
# Instantiate the CLIP model
clip_model = SentenceTransformer("clip-ViT-B-32")

In [None]:
def get_voyage_embedding(data: Image.Image | str, input_type: str) -> List:
    """
    Get Voyage AI embeddings for images and text.

    Args:
        data (Image.Image | str): An image or text to embed.
        input_type (str): Input type, either "document" or "query".

    Returns:
        List: Embeddings as a list.
    """
    embedding = voyageai_client.multimodal_embed(
        inputs=[[data]], model="voyage-multimodal-3", input_type=input_type
    ).embeddings[0]
    return embedding

In [None]:

def get_clip_embedding(data: Image.Image | str) -> List:
    """
    Get CLIP embeddings for images and text.

    Args:
        data (Image.Image | str): An image or text to embed.

    Returns:
        List: Embeddings as a list.
    """
    embedding = clip_model.encode(data).tolist()
    return embedding

In [None]:

embedded_docs = []

In [None]:
import time
from tqdm import tqdm
from PIL import Image
from io import BytesIO

embedded_docs = []

for doc in tqdm(docs):
    if "image" not in doc:
        # skip docs without image
        continue

    # Open the image from in-memory bytes
    img = Image.open(BytesIO(doc["image"]))

    # Add the Voyage AI and CLIP embeddings
    doc["voyage_embedding"] = get_voyage_embedding(img, "document")
    doc["clip_embedding"] = get_clip_embedding(img)



    embedded_docs.append(doc)

    # Sleep to respect free-tier rate limit (3 requests/min → 20 seconds)
    time.sleep(20)


In [None]:
embedded_docs[0].keys()

In [None]:
DB_NAME = "mongodb_multimodal_new"  # new database
COLLECTION_NAME = "pdf_images_embeddings"  # new collection
VS_INDEX_NAME = "vector_index_new"  # new vector index

In [None]:
from pymongo import MongoClient

mongodb_client = MongoClient(MONGODB_URI)
collection = mongodb_client[DB_NAME][COLLECTION_NAME]


In [None]:
#Run for safe side if garbage is there
from pymongo import MongoClient

# Use your existing client
mongodb_client = MongoClient(MONGODB_URI)

# Get the database object
db = mongodb_client[DB_NAME]  # DB_NAME is the database string

# Get the collection object
collection_name = db[COLLECTION_NAME]  # COLLECTION_NAME is the collection string

# Now you can safely clear the collection
collection_name.delete_many({})
print("Collection cleared!")

In [None]:

collection.insert_many(embedded_docs)


In [None]:
model = {
    "name": VS_INDEX_NAME,
    "type": "vectorSearch",
    "definition": {
        "fields": [
            {
                "type": "vector",
                "path": "voyage_embedding",
                "numDimensions": 1024,
                "similarity": "cosine",
            },
            {
                "type": "vector",
                "path": "clip_embedding",
                "numDimensions": 512,
                "similarity": "cosine",
            },
        ]
    },
}




In [None]:
collection.create_search_index(model=model)#search query

In [None]:
from PIL import Image
from io import BytesIO
from typing import List

# Function to get image bytes from MongoDB instead of GCS
def get_image_from_mongo(mongo_key: str) -> bytes:
    """
    Get image bytes from MongoDB collection.

    Args:
        mongo_key (str): Identifier for the image in the collection.

    Returns:
        bytes: Image bytes.
    """
    doc = collection.find_one({"mongo_key": mongo_key}, {"image": 1, "_id": 0})
    if doc and "image" in doc:
        return doc["image"]
    else:
        raise ValueError(f"Image with mongo_key '{mongo_key}' not found.")


# Function to perform vector search on MongoDB collection
def vector_search(
    user_query: str, model: str, display_images: bool = True
) -> List[str]:
    """
    Perform vector search on MongoDB and display images, returning mongo_keys.

    Args:
        user_query (str): User query (text or image).
        model (str): 'voyage' or 'clip' embedding model.
        display_images (bool): Whether to display results.

    Returns:
        List[str]: List of mongo_keys of matching documents.
    """
    # Get query embedding
    if model == "voyage":
        query_embedding = get_voyage_embedding(user_query, "query")
    elif model == "clip":
        query_embedding = get_clip_embedding(user_query)
    else:
        raise ValueError("Model must be 'voyage' or 'clip'")

    # MongoDB vector search pipeline
    pipeline = [
        {
            "$vectorSearch": {
                "index": VS_INDEX_NAME,
                "queryVector": query_embedding,
                "path": f"{model}_embedding",
                "numCandidates": 150,
                "limit": 5,
            }
        },
        {
            "$project": {
                "_id": 0,
                "mongo_key": 1,
                "width": 1,
                "height": 1,
                "score": {"$meta": "vectorSearchScore"},
            }
        },
    ]

    results = collection.aggregate(pipeline)
    mongo_keys = []

    for result in results:
        mongo_key = result["mongo_key"]

        # Display image if requested
        if display_images:
            img_bytes = get_image_from_mongo(mongo_key)
            img = Image.open(BytesIO(img_bytes))
            print(f"Score: {result['score']}\n")
            display(img)

        mongo_keys.append(mongo_key)

    return mongo_keys


In [None]:
# Test the vector search function with the Voyage AI model
vector_search(
    "Summarize the Pass@1 accuracy of Deepseek R1 against other models.",
    "voyage",
    display_images=True,
)