In [1]:
%load_ext autoreload
%autoreload 2


In [2]:
from dotenv import load_dotenv, find_dotenv
from smolagents import (
    CodeAgent,
    DuckDuckGoSearchTool,
    InferenceClientModel,
    VisitWebpageTool,
    WikipediaSearchTool,
    FinalAnswerTool,
    PythonInterpreterTool,
    Tool,
)
import sys
from pyprojroot import find_root, has_file
_ = load_dotenv(find_dotenv())

In [3]:
project_root_dir = find_root(has_file("README.md")).as_posix()
sys.path.extend([project_root_dir])


In [4]:
from huggingface_hub import model_info, InferenceClient

In [5]:
from PIL import Image
from io import BytesIO
import base64
from pathlib import Path

In [None]:
client = InferenceClient(
    provider="hyperbolic",
    bill_to="VitalNest",
)


In [2]:
all_inference_models = !curl -s https://huggingface.co/api/models | jq ".[].id"

In [29]:

info = model_info("google/gemma-3-27b-it", expand="inference")
info.inference


'warm'

In [22]:
vlm_model = InferenceClientModel(model_id="Qwen/Qwen2.5-VL-72B-Instruct",bill_to="VitalNest",
                                 timeout=300,temperature=.1, max_tokens=10000)

In [2]:
def encode_image_to_base64(image_path):
    """
    Encodes an image file to a base64 string.

    Args:
        image_path (str): Path to the image file.

    Returns:
        str: Base64 encoded image string.
    """
    """Encodes a PIL image to a base64 string."""
    image = Image.open(image_path)
    buffered = BytesIO()
    image.save(buffered, format="PNG")
    return base64.b64encode(buffered.getvalue()).decode("utf-8")


In [36]:
# MAXIMUM of 4 images can be sent
img_paths = [
    Path(
        "../src/pages/optima-restore-revision.pdf/optima-restore-revision.pdf_page_42.png"
    ),
    Path(
        "../src/pages/optima-restore-revision.pdf/optima-restore-revision.pdf_page_12.png"
    ),
    Path(
        "../src/pages/optima-restore-revision.pdf/optima-restore-revision.pdf_page_1.png"
    ),
    # Path(
    #     "../src/pages/optima-restore-revision.pdf/optima-restore-revision.pdf_page_43.png"
    # ),
    # Path(
    #     "../src/pages/optima-restore-revision.pdf/optima-restore-revision.pdf_page_44.png"
    # ),
]

In [37]:
# grouped_images = [Image.open(fp=pth) for pth in img_paths]
grouped_images = [encode_image_to_base64(pth) for pth in img_paths]

In [38]:
text_query = """what critical illnesses are covered under optima restore?
If the answer isn't in these documents, say you don't know."""
chat_template = [
    {
"role":"system", "content":"""You find answers from the relevant documents. Answer only 
from these documents. If answer isn't available return 'Question cannot be answered based
on the documents provided.' """
    },
    {
        "role": "user",
        "content": [
            {
                "type": "image_url",
                "image_url": {"url": f"data:image/jpeg;base64,{image}"},
            }
            for image in grouped_images
        ]
        + [{"type": "text", "text": text_query}],
    }
]


In [39]:
print(vlm_model.generate(messages=chat_template).content)

The critical illnesses covered under Optima Restore include Cancer, Open Chest CABG, First Heart Attack, Kidney Failure, Major Organ/Bone Marrow Transplant, Multiple Sclerosis, Permanent Paralysis of Limbs, and Stroke.


In [41]:
completion = client.chat.completions.create(
    model="Qwen/Qwen2.5-VL-72B-Instruct",
    messages=chat_template,
    temperature=.1,
    max_tokens=10_000
)

print(completion.choices[0].message.content)

# completion = client.chat.completions.create(
#     model="Qwen/Qwen2.5-VL-72B-Instruct",
#     messages=[
#         {
#             "role": "user",
#             "content": [
#                 {"type": "text", "text": "Describe the images in one sentence."},
#                 {
#                     "type": "image_url",
#                     "image_url": {
#                         "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
#                     },
#                 },
#                 {
#                     "type": "image_url",
#                     "image_url": {
#                         "url": "https://upload.wikimedia.org/wikipedia/commons/3/3d/Nicu%C8%99or_Dan_%288_mai_2025%29.jpg"
#                     },
#                 },
#             ],
#         }
#     ],
# )

# print(completion.choices[0].message)


The critical illnesses covered under Optima Restore include Cancer, Open Chest CABG, First Heart Attack, Kidney Failure, Major Organ/Bone Marrow Transplant, Multiple Sclerosis, Permanent Paralysis of Limbs, and Stroke.


In [24]:
class InsuranceInfoRetriever(Tool):
    name = "InsuranceInfoRetriever"
    description = "Retrieves information from insurance documents."
    inputs = {
        "query": {
            "type": "string",
            "description": "The query to search for."
        },
        "search_res": {
            "type": "string",
            "description": "Insurance document search results."
        },
        "answer": {
            "type": "string",
            "description": "Answer to the query."
        }
    }
    output_type = "string"

    def forward(self, query: str, search_res:list[str],answer: str ) -> str:
        def encode_image_to_base64(image_path):
            """
            Encodes an image file to a base64 string.

            Args:
                image_path (str): Path to the image file.

            Returns:
                str: Base64 encoded image string.
            """
            """Encodes a PIL image to a base64 string."""
            image = Image.open(image_path)
            buffered = BytesIO()
            image.save(buffered, format="PNG")
            return base64.b64encode(buffered.getvalue()).decode("utf-8")
        img_paths = [Path(res) for res in search_res]
        client = InferenceClient(
            provider="hyperbolic",
            bill_to="VitalNest",
        )
        grouped_images = [encode_image_to_base64(pth) for pth in img_paths]
        chat_template = [
            {
        "role":"system", "content":"""You find answers from the relevant documents. Answer only 
        from these documents. If answer isn't available return 'Question cannot be answered based
        on the documents provided.' """
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{image}"},
                    }
                    for image in grouped_images
                ]
                + [{"type": "text", "text": query}],
            }
        ]
        completion = client.chat.completions.create(
            model="Qwen/Qwen2.5-VL-72B-Instruct",
            messages=chat_template,
            temperature=0.1,
            max_tokens=10_000,
        )
        answer = completion.choices[0].message.content
        return answer if answer else ""

# class SaveContent(Tool):
#     name = "SaveContent"
#     description = "Saves content to a file."
#     inputs = {
#         "content": {
#             "type": "string",
#             "description": "The content to save."
#         },
#         "filename": {
#             "type": "string",
#             "description": "The filename."
#         }
#     }
#     output_type = "string"

#     def forward(self, content: str, filename: str ):
#         with open(filename, "w") as f:
#             f.write(content)
#         return f"Content saved to {filename}"



In [30]:
insurance_agent = CodeAgent(tools=[InsuranceInfoRetriever(), 
                         PythonInterpreterTool(), FinalAnswerTool()], 
                  model=InferenceClientModel(bill_to="VitalNest"),
                  additional_authorized_imports=["os", "requests", "bs4", "pil", "base64", "io"],
                 max_steps=4, planning_interval=2, verbosity_level=0,
                 name="insurance_agent", 
                 description="You answer insurance questions based on the search results provided to you."
                 )

In [None]:
query = "what critical illnesses are covered under optima restore?"
search_res = [
    (
        "../src/pages/optima-restore-revision.pdf/optima-restore-revision.pdf_page_42.png"
    ),
    (
        "../src/pages/optima-restore-revision.pdf/optima-restore-revision.pdf_page_12.png"
    ),
    (
        "../src/pages/optima-restore-revision.pdf/optima-restore-revision.pdf_page_1.png"
    ),
]
insurance_agent.run(task=f"""
For the insurance search results below, answer the following query:{query}.
search_res: {search_res}
""")

['Cancer',
 'Open Chest CABG',
 'First Heart Attack',
 'Kidney Failure',
 'Major Organ/Bone Marrow Transplant',
 'Multiple Sclerosis',
 'Permanent Paralysis of Limbs',
 'Stroke']

In [2]:
from smolagents import ToolCallingAgent

In [9]:
websearch_agent = ToolCallingAgent(
    model=InferenceClientModel(
        model_id="Qwen/Qwen3-30B-A3B", bill_to="VitalNest", temperature=0.1
    ),
    tools=[
        VisitWebpageTool(max_output_length=20_000),
        DuckDuckGoSearchTool(max_results=5),
    ],
    max_steps=4,
    verbosity_level=0,
    name="web_search_agent",
    planning_interval=2,
    description="Searches the web with a particular query.",
)

In [None]:
# websearch_agent.run("what's multi head latent attention?")

In [11]:
wikipedia_agent = ToolCallingAgent(
    model=InferenceClientModel(model_id="Qwen/Qwen3-30B-A3B", bill_to="VitalNest", temperature=.1),
    tools=[WikipediaSearchTool(user_agent="WikiAssistant (merlin@example.com)")],
    max_steps=3,
    verbosity_level=0,
    name="wikipedia_agent",
    description="Searches Wikipedia for a topic.",
)

In [34]:
manager_agent = CodeAgent(
    tools=[PythonInterpreterTool(), FinalAnswerTool()],
    additional_authorized_imports=["os", "requests", "bs4"],
    model=InferenceClientModel(
        model_id="Qwen/Qwen3-30B-A3B", bill_to="VitalNest", temperature=0.1
    ),  # "Qwen/Qwen3-235B-A22B"
    managed_agents=[websearch_agent, wikipedia_agent, insurance_agent],
    max_steps=10,
    planning_interval=3,
    verbosity_level=0,
)

In [None]:
manager_agent.run(
    task=f"""
For the insurance search results below, answer the following query:{query}.
search_res: {search_res}
"""
    #     """Search for `topic`:multi head latent attention in wikipedia.
    #                   If there's no information about the same in Wikipedia, search the web
    #                   for it and return a summary of the findings along with the search result links.
    # """
)

In [6]:
from src.insurance_assistants.complex_rag import RAG

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
rag_app = RAG()
rag_app.vectordb_id = "policy_wordings"

In [8]:
query = "what critical illnesses are covered under optima restore?"
res = rag_app.search_documents(query)


INFO:src.insurance_assistants.complex_rag:Searching for 1 queries
INFO:src.insurance_assistants.complex_rag:Searching for query: what critical illnesses are covered under optima restore?
INFO:src.insurance_assistants.complex_rag:Processing 1 texts
INFO:src.insurance_assistants.complex_rag:Search result: [] for query: what critical illnesses are covered under optima restore?
2025-05-28 12:38:06,937 [ERROR][handler]: RPC error: [query], <MilvusException: (code=2000, message=Assert "fields_.find(field_id) != fields_.end()" at /workspace/milvus-lite/thirdparty/milvus/internal/core/src/common/Schema.h:129
 => Cannot find field with field_id: 0: segcore error)>, <Time:{'RPC start': '2025-05-28 12:38:06.927572', 'RPC error': '2025-05-28 12:38:06.937616'}> (decorators.py:140)
2025-05-28 12:38:06,938 [ERROR][query]: Failed to query collection: policy_wordings (milvus_client.py:475)


Assert "fields_.find(field_id) != fields_.end()" at /workspace/milvus-lite/thirdparty/milvus/internal/core/src/common/Schema.h:129
 => Cannot find field with field_id: 0
Assert "fields_.find(field_id) != fields_.end()" at /workspace/milvus-lite/thirdparty/milvus/internal/core/src/common/Schema.h:129
 => Cannot find field with field_id: 0


In [None]:
# from pymilvus import  MilvusClient


In [None]:
# MilvusClient((Path(project_root_dir) / f"src/insurance_assistants/milvus_policy_wordings.db").as_posix())

In [1]:
import torch
from PIL import Image
from transformers.utils.import_utils import is_flash_attn_2_available

from colpali_engine.models import ColQwen2_5, ColQwen2_5_Processor


In [None]:

model = ColQwen2_5.from_pretrained(
    pretrained_model_name_or_path="vidore/colqwen2.5-v0.2",
    torch_dtype=torch.bfloat16,
    device_map="cuda:0",  # or "mps" if on Apple Silicon
    attn_implementation="flash_attention_2" if is_flash_attn_2_available() else None,
).eval()
processor = ColQwen2_5_Processor.from_pretrained("vidore/colqwen2.5-v0.2", use_fast=True)


Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:

# Your inputs
images = [
    Image.new("RGB", (128, 128), color="white"),
    Image.new("RGB", (128, 128), color="black"),
]
queries = [
    "Is attention really all you need?",
    "What is the amount of bananas farmed in Salvador?",
]

# Process the inputs
batch_images = processor.process_images(images).to(model.device)
batch_queries = processor.process_queries(queries).to(model.device)

# Forward pass
with torch.no_grad():
    image_embeddings = model(**batch_images)
    query_embeddings = model(**batch_queries)

scores = processor.score_multi_vector(query_embeddings, image_embeddings)
