In [None]:
import os
from pathlib import Path
from byaldi import RAGMultiModalModel
from transformers import BitsAndBytesConfig
import torch

# os.environ["HF_TOKEN"] = "YOUR_HF_TOKEN"

In [None]:
# Choose a quant strategy

quant_strategy = None

if quant_strategy is None:
    bnb_config = None
elif quant_strategy == "8bit":
    bnb_config = BitsAndBytesConfig(
        load_in_8bit=True,
    )
elif quant_strategy == "4bit":
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
    )

In [None]:
# Initialize RAGMultiModalModel
model = RAGMultiModalModel.from_pretrained("vidore/colqwen2-v1.0", quantization_config=bnb_config)

In [None]:
# Let's get everyone's favourite paper in here
!wget https://arxiv.org/pdf/1706.03762
!mkdir docs
!mv 1706.03762 docs/attention.pdf
!cp -r docs/attention.pdf docs/attention_with_a_mustache.pdf

In [None]:
# Test indexing
metadata = [{"filename":file_name} for file_name in os.listdir("docs")]

index_name = "attention_index"
model.index(
    input_path=Path("docs/"),
    index_name=index_name,
    store_collection_with_index=False,
    metadata=metadata,
    overwrite=True
)

# BLEU tables are on page 8 and 9. We've indexed the pdf and its evil mustached twin, so we should see similar scores occur twice for every relevant page.
query = "what's the BLEU score of this new strange method?"
results = model.search(query, k=5)

print(f"Search results for '{query}':")
for result in results:
    print(f"Doc ID: {result.doc_id}, Page: {result.page_num}, Score: {result.score}")

print("Test completed successfully!")

Added page 12 of document 3 to index.


In [None]:
%%timeit
model.search(query, k=3)

In [None]:
# Let's load the index now, to ensure the results are still the same.
from byaldi import RAGMultiModalModel

model = RAGMultiModalModel.from_index("attention_index", quantization_config=bnb_config)

In [None]:
results = model.search(query, k=5)

print(f"Search results for '{query}':")
for result in results:
    print(f"Doc ID: {result.doc_id}, Page: {result.page_num}, Score: {result.score}")

## FILTER BASED ON METADATA

In [None]:
results = model.search(query, k=5,filter_metadata={"filename":"attention.pdf"})

print("Metadata information: ",model.model.doc_id_to_metadata)
print(f"Search results for '{query}':")
for result in results:
   print(f"Doc ID: {result.doc_id}, Page: {result.page_num}, Score: {result.score}")

In [None]:
# Let's see how it looks like with the collection stored with the index, for simpler VLM integration at the cost of memory/storage.
from pathlib import Path
from byaldi import RAGMultiModalModel

model = RAGMultiModalModel.from_pretrained("vidore/colpali", quantization_config=bnb_config)

# Test having base64 in the collection for completely seamless RAG.
pdf_path = Path("docs/attention.pdf")

# Test indexing
index_name = "attention_index_with_collection"
model.index(
    input_path=pdf_path,
    index_name=index_name,
    store_collection_with_index=True,
    overwrite=True
)

# Test searching
# page 6 holds the answer
query = "How does the positional encoding thing work?"
results = model.search(query, k=3)

print(f"Search results for '{query}':")
base_64s = set()
for result in results:
    print(f"Doc ID: {result.doc_id}, Page: {result.page_num}, Score: {result.score}")
    print(f"Base64: {result.base64[:50]}...")
    assert result.base64 not in base_64s
    print("Base64 is unique!")
    base_64s.add(result.base64)
print("Test completed successfully!")

In [None]:
#  Now, let's add another document, which in this case is the same document, but we don't need to tell the model that!

model.add_to_index(pdf_path, store_collection_with_index=True)

In [None]:
results = model.search(query, k=3)
print(f"Search results for '{query}':")
for result in results:
    print(f"Doc ID: {result.doc_id}, Page: {result.page_num}, Score: {result.score}")
    print(f"Base64: {result.base64[:50]}...")
print("Test completed successfully!")