In [1]:
import os
from pathlib import Path
from byaldi import RAGMultiModalModel
from together import Together
import time

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Initialize the ColPali Model
model = RAGMultiModalModel.from_pretrained("vidore/colqwen2-v0.1")

Verbosity is set to 1 (active). Pass verbose=0 to make quieter.


adapter_config.json:   0%|          | 0.00/728 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.17k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/56.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.85G [00:00<?, ?B/s]

`Qwen2VLRotaryEmbedding` can now be fully parameterized by passing the model config through the `config` argument. All other arguments will be removed in v4.46


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/220 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/74.0M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/568 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/4.30k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/392 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/613 [00:00<?, ?B/s]

chat_template.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

In [9]:
knowledge_base_path = "/content/drive/MyDrive/gii"
index_name = "dir_doc_index"

# List all PDF files in the knowledge base directory
files = [f for f in os.listdir(knowledge_base_path) if f.endswith('.pdf')]

# Check if there are any PDF files to index
if not files:
    print("No PDF files found in the specified directory.")
else:
    print("Documents found for indexing:")
    for file in files:
        print(f"- {file}")

    # Index the first document
    first_file_path = Path(knowledge_base_path) / files[0]
    print(f"\nIndexing document: {files[0]}")
    model.index(input_path=first_file_path, index_name=index_name, store_collection_with_index=True, overwrite=False)

    # Add the remaining documents to the same index
    for file in files[1:]:  # Start from the second file
        file_path = Path(knowledge_base_path) / file
        print(f"Indexing document: {file}")
        model.add_to_index(input_item=file_path, store_collection_with_index=True)

    print("All documents indexed successfully!")


Documents found for indexing:
- wipo-pub-2000-2023-section1-en-gii-2023-at-a-glance-global-innovation-index-2023.pdf
- wipo-pub-2000-2022-section1-en-gii-2022-at-a-glance-global-innovation-index-2022-15th-edition.pdf

Indexing document: wipo-pub-2000-2023-section1-en-gii-2023-at-a-glance-global-innovation-index-2023.pdf
Added page 1 of document 1 to index.
Added page 2 of document 1 to index.
Added page 3 of document 1 to index.
Added page 4 of document 1 to index.
Added page 5 of document 1 to index.
Added page 6 of document 1 to index.
Added page 7 of document 1 to index.
Index exported to .byaldi/dir_doc_index
Index exported to .byaldi/dir_doc_index
Indexing document: wipo-pub-2000-2022-section1-en-gii-2022-at-a-glance-global-innovation-index-2022-15th-edition.pdf
Added page 1 of document 2 to index.
Added page 2 of document 2 to index.
Added page 3 of document 2 to index.
Added page 4 of document 2 to index.
Added page 5 of document 2 to index.
Added page 6 of document 2 to index.


In [10]:
# Lets query our index and retrieve the page that has content with the highest similarity to the query

query = "find how india has performed in the rankings in 2022 and 2023"
results = model.search(query, k=5)  # Retrieve more results to cover multiple documents

# Group results by document and find the page with the highest score in each document
best_results = {}
for result in results:
    doc_id = result.doc_id
    if doc_id not in best_results or result.score > best_results[doc_id].score:
        best_results[doc_id] = result
        # Print the document and page information when a new best result is found
        print(f"Document: {result.doc_id}, Page: {result.page_num}, Score: {result.score}")

Document: 1, Page: 2, Score: 24.5
Document: 2, Page: 6, Score: 24.0


In [11]:
# Prepare pages for Llama
pages_to_pass = []
for doc_id, result in best_results.items():
    base64_page = result.base64
    pages_to_pass.append({
        "type": "image_url",
        "image_url": {
            "url": f"data:image/jpeg;base64,{base64_page}"
        },
    })

In [12]:
os.environ["TOGETHER_API_KEY"] = "YOUR TOGETHER API KEY"
api_key = os.environ.get("TOGETHER_API_KEY")
client = Together(api_key = api_key)

In [13]:
# Store individual page results
page_results = []

# --- Start timing for individual page processing ---
start_time_pages = time.time()

# Loop through each page and get Llama's output
for page in pages_to_pass:
    response = client.chat.completions.create(
        model="meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": query},
                    page,  # Pass one page at a time
                ],
            }
        ],
        max_tokens=300,  # Reduced to potentially speed up response
        temperature=0.0,  # Lower temperature for more deterministic and focused responses
    )
    page_results.append(response.choices[0].message.content)

# --- End timing for individual page processing ---
end_time_pages = time.time()
page_processing_time = end_time_pages - start_time_pages

# --- Start timing for final summary ---
start_time_summary = time.time()

# Combine individual page results for final summary with clear instructions
final_query = (
    "Based **only** on the information provided in the following excerpts, "
    "weave together the insights from each excerpt cohesively, "
    "without adding any external knowledge:\n\n"
    + "\n\n".join(page_results)
)

final_response = client.chat.completions.create(
    model="meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
    messages=[
        {
            "role": "user",
            "content": [
                {"type": "text", "text": final_query},
            ],
        }
    ],
    max_tokens=500,  # Reduced to potentially speed up response
    temperature=0.0,  # Lower temperature for more deterministic and focused responses
)

# --- End timing for final summary ---
end_time_summary = time.time()
summary_processing_time = end_time_summary - start_time_summary

print(final_response.choices[0].message.content)

print(f"\nTime taken to process individual pages: {page_processing_time:.2f} seconds")
print(f"Time taken to generate final summary: {summary_processing_time:.2f} seconds")
print(f"Total time taken: {page_processing_time + summary_processing_time:.2f} seconds")

Here's a cohesive summary of the insights from the excerpts:

India has consistently demonstrated strong innovation capabilities, both regionally and globally. In the Central and Southern Asia region, India maintained its position as the top-ranked country in 2022 and 2023, indicating a sustained performance in innovation. Globally, India ranked 40th out of 132 countries in the Global Innovation Index (GII) 2022 report, which was an improvement from the previous year. Although India's ranking slightly declined to 42nd out of 135 countries in the GII 2023 report, it still remained among the top 50 most innovative countries in the world. Overall, India's innovation capabilities have been steadily improving, and it is likely to continue playing a significant role in the global innovation landscape.

Time taken to process individual pages: 18.40 seconds
Time taken to generate final summary: 10.58 seconds
Total time taken: 28.98 seconds
