# Setup LangSmith API
Retrievals can be traced here for easier debugging.

In [5]:
import os
import ollama
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGSMITH_API_KEY'] = '123-123-213-123-123-123'

![LangGraph Flow](../../langgraph%20designs/graph_design_v1.png)

# Graph States

In [17]:
from typing_extensions import TypedDict
from typing import Optional, List, Dict, Any

class GraphState(TypedDict):
    # Core user input
    text_query: str
    image_path: Optional[str]  # Path to uploaded image, if any

    # Routing/intent
    query_type : str # currently being divide into 'emergency'/'Q&A'/'irrelevant'.
    
    # Q&A path
    refined_query: Optional[str]
    sub_queries: Optional[List[str]]
    current_sub_query: Optional[str]
    retrieved_docs: Optional[List[Dict[str, Any]]]  # Results from retrieval
    reranked_docs: Optional[List[Dict[str, Any]]]   # After rerank step

    # Feedback loop
    followup_questions: Optional[List[str]]
    user_responses: Optional[List[str]]
    loop_count: int
    hypotheses: Optional[List[str]]  # Current working hypotheses
    next_action: Optional[str]       # What the agent plans to do next ("ask_user", "retrieve", "final_answer", etc.)
    pending_question: Optional[str]  # If the agent wants to ask the user something
    pending_action: Optional[str]    # If the agent wants to perform a tool/action
    user_actions: Optional[List[str]] # Actions the user has taken (e.g., "smelled ear", "provided photo")
    intermediate_thoughts: Optional[List[str]] # Chain-of-thought or reasoning steps

    # Answer generation
    generated_answer: Optional[str]
    hallucination_check: Optional[bool]
    answer_sufficient: Optional[bool]

    # Emergency path
    emergency_instructions: Optional[str]
    emergency_retrieved_docs: Optional[List[Dict[str, Any]]]

    # Web search
    web_search_results: Optional[List[Dict[str, Any]]]

    # Final output
    final_answer: Optional[str]

    # Misc/trace/debug
    path_taken: Optional[List[str]]
    error: Optional[str]

<h1> Graph Nodes

## Query Handler Node 

Before LLM analyze user query and image, it will be assessed with "Is this veterinary-related?". This will ensure our AI tool will not be used for other purpose.

In [33]:
def query_handler(state):
    text_query = state.get("text_query", "")
    image_path = state.get("image_path", None)

    prompt = (
        "You are a domain classifier for a veterinary assistant. "
        "If an image is provided, understand the image from veterinary point of view."
        "A user query is the combination of text query and image(if there is). "
        "Then, classify the user query into one of three categories:\n"
        "1. 'emergency' — If the user query is about a veterinary emergency (e.g., mass bleeding, serious bone fracture, unconsciousness, severe breathing difficulty, or other life-threatening situations).\n"
        "2. 'Q&A' — If the user query is about is about general veterinary questions, symptom checks, or non-emergency animal health issues.\n\n"
        "3. 'irrelevant' — If the user query is NOT about veterinary, animal health, pet care, etc.\n"
        "Your response must be exactly one of: 'irrelevant', 'emergency', or 'Q&A'. Do not explain your answer or add anything else.\n\n"
        f"User input: {text_query}\n"
    )

    messages = [{
        "role": "user",
        "content": prompt,
        "images": []
    }]

    if image_path and os.path.exists(image_path):
        messages[0]["images"].append(image_path)

    response = ollama.chat(
        model="minicpm-v:8b",
        messages=messages,
        options={"temperature": 0.2}
    )
    result = response['message']['content'].strip().lower()
    # Only allow the three valid outputs
    if result not in ['irrelevant', 'emergency', 'q&a']:
        result = 'irrelevant'
    
    return {"query_type": result}

### test

In [34]:
def test_query_handler_node(query_handler, test_query, image_path=None):
    # Build the initial state
    state = {
        "text_query": test_query,
        "image_path": image_path
    }
    # Call the query handler node
    new_state = query_handler(state)
    # Print the results
    print("Input Query:", test_query)
    if image_path:
        print("Image Path:", image_path)
    print("Updated State:", new_state)
    print("Query Type:", new_state.get("query_type", "N/A"))
    print("-" * 40)

# --- Example usage ---
test_query_handler_node(query_handler, "What vaccines does my cat need?")
test_query_handler_node(query_handler, "My cat is bleeding a lot after being hit by a car.")
test_query_handler_node(query_handler, "How do I fix my car engine?")
test_query_handler_node(query_handler, "What should I do?", image_path="../emergency_cat.jpg")
test_query_handler_node(query_handler, "What should I feed to this cat?", image_path="../skinny_cat.jpg")


Input Query: What vaccines does my cat need?
Updated State: {'text_query': 'What vaccines does my cat need?', 'image_path': None, 'query_type': 'q&a'}
Query Type: q&a
----------------------------------------
Input Query: My cat is bleeding a lot after being hit by a car.
Updated State: {'text_query': 'My cat is bleeding a lot after being hit by a car.', 'image_path': None, 'query_type': 'emergency'}
Query Type: emergency
----------------------------------------
Input Query: How do I fix my car engine?
Updated State: {'text_query': 'How do I fix my car engine?', 'image_path': None, 'query_type': 'irrelevant'}
Query Type: irrelevant
----------------------------------------
Input Query: What should I do?
Image Path: ../emergency_cat.jpg
Updated State: {'text_query': 'What should I do?', 'image_path': '../emergency_cat.jpg', 'query_type': 'emergency'}
Query Type: emergency
----------------------------------------
Input Query: What should I feed to this cat?
Image Path: ../skinny_cat.jpg
Up

# Q&A Path

## Query Refinement

In [14]:
def get_image_summary(image_path):
    prompt = """From a feline veterinary stand point, provide a highly detailed and objective 
                description of the image. Focus on all observable elements, actions, 
                objects, subjects, their attributes (e.g., color, size, texture), 
                their spatial relationships, and any discernible context or implied scene. 
                Also focus on all possible health issue.
                Describe any text present in the image. This description must be exhaustive 
                and purely factual, capturing every significant visual detail to serve as a 
                comprehensive textual representation for further analysis by another AI model. 
                If the image is entirely irrelevant or contains no discernible subject, 
                state "No relevant visual information."""
    messages = [{
        "role": "user",
        "content": prompt,
        "images": [image_path]
    }]
    response = ollama.chat(
        model="minicpm-v:8b",
        messages=messages,
        options={"temperature": 0.2}
    )
    return response['message']['content']

def query_refinement_node(state):
    text_query = state.get("text_query", "")
    image_path = state.get("image_path", None)
    image_summary = get_image_summary(image_path) if image_path else ""

    if image_summary:
        prompt = (
            "You are a veterinary assistant AI. Your task is to rewrite and expand the user's query for a veterinary knowledge base search. "
            "You are NOT being asked to provide medical advice, diagnosis, or treatment recommendations. "
            "Your job is to help formulate a search query that could retrieve relevant veterinary information for a veterinarian or pet owner. "
            "Use the image description to add context, but avoid making assumptions about the specific diagnosis or underlying causes unless explicitly stated. "
            "Frame the refined query in an open-ended, unbiased way, considering a broad range of possible causes, diagnostic steps, and management options. "
            "If the user describes symptoms, include them factually. "
            "Do not presume the animal's overall health status or limit the query to only the most common conditions. "
            "Output ONLY one single, context-rich, and unbiased query as a paragraph, and nothing else.\n\n"
            f"User query: {text_query}\n"
            f"Image description: {image_summary}\n"
            "Refined query:"
        )
    else:
        prompt = (
            f"You are a veterinary assistant AI. Your task is to rewrite and expand the user's query for a veterinary knowledge base search. "
            f"Consider add questions about possible causes, diagnostic considerations, anything that would be helpful in the situation, but combine everything into a single, comprehensive question or query. "
            f"Output ONLY one single, context-rich query as a paragraph, and nothing else.\n\n"
            f"User query: {text_query}\n"
            f"Refined query:"
        )

    messages = [{
        "role": "user",
        "content": prompt
    }]
    response = ollama.chat(
        model="llama3.2:3b",  # or another strong text model
        messages=messages,
        options={"temperature": 0.3}
    )
    return {"refined_query": response['message']['content']}

### test

In [15]:
def test_query_refinement_node(query_refinement_node, test_query, image_path=None):
    # Build the initial state
    state = {
        "text_query": test_query,
        "image_path": image_path
    }
    # Call the query refinement node
    new_state = query_refinement_node(state)
    # Print the results
    print("Input Query:", test_query)
    if image_path:
        print("Image Path:", image_path)
    print("Refined Query:", new_state.get("refined_query", "N/A"))
    print("-" * 40)

# --- Example usage ---
test_query_refinement_node(query_refinement_node, "What happened to my cat ear? It's being it for a long time. Sometimes I even see blood and wounds in its ear. ", image_path="../cat_ear_problem.jpeg")
test_query_refinement_node(query_refinement_node, "My cat has being scratching its ear too often. There are some dark greasy thing in it. It sratch its ear so often and so hard that I see wounds and blood in it. What should I do?")

Input Query: What happened to my cat ear? It's being it for a long time. Sometimes I even see blood and wounds in its ear. 
Image Path: ../cat_ear_problem.jpeg
Refined Query: What are possible causes and diagnostic steps for chronic ear infections or otitis in cats, characterized by visible brownish-orange debris, discharge, and wounds in the ear canal, potentially accompanied by signs of infection such as redness, swelling, and bleeding, and how can these conditions be distinguished from other potential health issues that may affect a cat's auditory system?
----------------------------------------
Input Query: My cat has being scratching its ear too often. There are some dark greasy thing in it. It sratch its ear so often and so hard that I see wounds and blood in it. What should I do?
Refined Query: My cat is exhibiting excessive ear scratching, resulting in visible wounds and bleeding due to the presence of dark, greasy debris, which may indicate a skin infection or allergies; what 

## Query Decomposition

In [17]:
import ollama
import json
from langchain_ollama import ChatOllama
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import JsonOutputParser


def query_decomposition(state):
    refined_query = state['refined_query']

    query_decomposition_prompt = ChatPromptTemplate.from_template(
    """You are an intelligent assistant. Your task is to break down the given complex query
    into a list of simpler, focused sub-queries. Each sub-query should be a standalone question
    that can be used to retrieve specific information from a veterinary knowledge base.

    At the end of your list, add 2-3 additional sub-queries that specifically require visual information or images.
    For example, you might add:
    - "Show me an image of how to pick up a cat."
    - "Show me an image of how to do CPR for a cat."
    - "Show me a diagram of feline anatomy."
    Be creative and make sure these visual sub-queries are relevant to the original complex query.

    Output ONLY a valid JSON array of strings, and nothing else. Do not include any explanations, markdown, or extra text.

    Complex query: {refined_query}
    """
    )

    # Create the query decomposition chain
    query_decomposition_chain = (
        query_decomposition_prompt  
        | ChatOllama(model="llama3.2:3b")  
        | JsonOutputParser() 
    )

    # --- Demonstration of query decomposition ---

    print(f"Original refined query: {refined_query[:300]} ....")

    decomposed_queries = query_decomposition_chain.invoke({"refined_query": refined_query})
    # Try to extract the JSON array from the response

    print("-" * 80)
    # print(f"Decomposed queries:\n{decomposed_queries}")

    print(f"There are {len(decomposed_queries)} queries after decomposition \n")
    print(f"Here's a example of the first one: {decomposed_queries[0]}")

    return {"sub_queries": decomposed_queries}

### Test

In [18]:
def test_query_decomposition(query_decomposition_func, refined_query):
    # Build the initial state
    state = {
        "refined_query": refined_query
    }
    # Call the query decomposition function
    new_state = query_decomposition_func(state)
    # Print the results
    print("Decomposed Sub-Queries:")
    print(new_state['sub_queries'])

# --- Example usage ---
test_query_decomposition(
    query_decomposition,
    "What are possible causes and diagnostic steps for chronic ear infections or otitis in cats, characterized by visible brownish-orange debris, discharge, and wounds in the ear canal, potentially accompanied by signs of infection such as redness, swelling, and bleeding, and how can these conditions be distinguished from other potential health issues that may affect a cat's auditory system?"
)

Original refined query: What are possible causes and diagnostic steps for chronic ear infections or otitis in cats, characterized by visible brownish-orange debris, discharge, and wounds in the ear canal, potentially accompanied by signs of infection such as redness, swelling, and bleeding, and how can these conditions be  ....


Failed to multipart ingest runs: langsmith.utils.LangSmithError: Failed to POST https://api.smith.langchain.com/runs/multipart in LangSmith API. HTTPError('403 Client Error: Forbidden for url: https://api.smith.langchain.com/runs/multipart', '{"error":"Forbidden"}\n')


--------------------------------------------------------------------------------
There are 8 queries after decomposition 

Here's a example of the first one: What are common causes of chronic ear infections or otitis in cats?
Decomposed Sub-Queries:
['What are common causes of chronic ear infections or otitis in cats?', 'Describe diagnostic steps for detecting chronic ear infections or otitis in cats', 'What is the significance of visible brownish-orange debris, discharge, and wounds in the ear canal in cats?', 'How can redness, swelling, and bleeding be distinguished from other feline health issues affecting the auditory system?', 'What are potential complications if left untreated?', 'Show me an image of a cat with signs of otitis externa', "Show me an image of how to properly clean a cat's ear canal", 'Describe the anatomy of the feline ear canal']


Failed to send compressed multipart ingest: langsmith.utils.LangSmithError: Failed to POST https://api.smith.langchain.com/runs/multipart in LangSmith API. HTTPError('403 Client Error: Forbidden for url: https://api.smith.langchain.com/runs/multipart', '{"error":"Forbidden"}\n')


## Contextual Retrievals

Based on decomposed sub queries, we are able to retrieve contexutally close aligned Documents from the vector database. 

### Setup Unified Retriever (Retrieve text, table, images)

In [28]:
from langchain_experimental.open_clip import OpenCLIPEmbeddings
from langchain_chroma import Chroma
from unified_retriever import UnifiedRetriever

def init_retriever():

    persist_directory = '../../chroma/Ears'
    id_key = "doc_id"

    open_clip_embeddings = OpenCLIPEmbeddings(model_name="ViT-g-14", checkpoint="laion2b_s34b_b88k")

    # Vectorstore for summaries (for similarity search)
    vectorstore = Chroma(
        collection_name="summaries_and_images",
        persist_directory=persist_directory,
        embedding_function=open_clip_embeddings
    )
    # Persistent docstore for originals (all modalities)
    docstore = Chroma(
        collection_name="originals",
        persist_directory=persist_directory,
        embedding_function=open_clip_embeddings
    )

    retriever = UnifiedRetriever(vectorstore, docstore, id_key=id_key)
    return retriever

### Retrieval

In [33]:
# Assume decomposed_queries is a list of query strings
# and retriever is already instantiated

seen_doc_ids = set()
all_results = []
retriever = init_retriever()

def contextual_retrieval_flat(state):
    seen_doc_ids = set()
    unique_docs = []
    retriever = init_retriever() 

    for query in state['sub_queries']:
        results = retriever.retrieve_multi_modal(query, k=5, )
        for res in results:
            doc_id = res.get('doc_id') or res.get('summary_metadata', {}).get('doc_id')
            if doc_id and doc_id not in seen_doc_ids:
                seen_doc_ids.add(doc_id)
                unique_docs.append(res)
    print(f"Total unique documents retrieved: {len(unique_docs)}")
    return {"retrieved_docs": unique_docs}

### test

In [38]:
import copy

def test_contextual_retrieval(sub_queries):
    global test_retrived_doc

    test_state = {
        "sub_queries": sub_queries
    }

    # Use the flat contextual retrieval function
    new_state = contextual_retrieval_flat(test_state)
    unique_docs = new_state["retrieved_docs"]
    print("\nSample of unique retrieved docs:")
    for i, doc in enumerate(unique_docs):
        doc_id = doc.get('doc_id') or doc.get('summary_metadata', {}).get('doc_id')
        print(f"Doc {i}:")
        print(f"  Doc ID: {doc_id}")
        # Check if this is an image context doc
        if doc_id and doc_id.endswith('_context'):
            image_path = doc.get('summary_metadata', {}).get('image_path')
            print(f"  [IMAGE CONTEXT] Points to image file: {image_path}")
        print(f"  Type: {(doc.get('original_metadata') or {}).get('type')}")
        print(f"  Score: {doc.get('score')}")
        print(f"  Summary: {doc.get('summary')[:100]}...")
        print(f"  Original: {str(doc.get('original'))[:100]}...")
        print("-" * 40)
    print(f"Total unique docs retrieved: {len(unique_docs)}")
    
    test_retrived_doc = copy.deepcopy(unique_docs)

# Example usage:
test_contextual_retrieval(
   ['What are common causes of chronic ear infections or otitis in cats?', 'Describe diagnostic steps for detecting chronic ear infections or otitis in cats', 'What is the significance of visible brownish-orange debris, discharge, and wounds in the ear canal in cats?', 'How can redness, swelling, and bleeding be distinguished from other feline health issues affecting the auditory system?', 'What are potential complications if left untreated?', 'Show me an image of a cat with signs of otitis externa', "Show me an image of how to properly clean a cat's ear canal", 'Describe the anatomy of the feline ear canal']
)

Total unique documents retrieved: 26

Sample of unique retrieved docs:
Doc 0:
  Doc ID: 19b09196-5c83-493e-b0c2-f2a932daec2f
  Type: image
  Score: 1.1225041151046753
  Summary: ./figures/Ears/figure-2-2.jpg...
  Original: None...
----------------------------------------
Doc 1:
  Doc ID: 5c1b30a0-1ee7-4cf6-9739-449565ffaebe_context
  [IMAGE CONTEXT] Points to image file: None
  Type: image_summary
  Score: 1.1118836402893066
  Summary: The provided local text indicates that this image is part of an educational or informative series fo...
  Original: None...
----------------------------------------
Doc 2:
  Doc ID: cc9ca569-c024-4987-8679-3a64d478b74a_context
  [IMAGE CONTEXT] Points to image file: None
  Type: image_summary
  Score: 1.0678699016571045
  Summary: The image shows the ear of a cat displaying signs of hematoma, which is characterized by swelling an...
  Original: None...
----------------------------------------
Doc 3:
  Doc ID: c97612b4-4a14-4333-b828-59ef2e6d20e8_context


## ReRank

Retrievals returns docs with high similarities based on cosine-similarity. However, we do need to re-rank their improtance on contexual level.

### Getting image, image_summary pair

In [None]:
# truly multimodel [monoqwen], use here If running on Nvidia GPU Machine
# pip install "rerankers[monovlm]" qwen-vl-utils transformers
from rerankers import MonoQwen2VLReranker

def rerank_node_monoqwen(state):
    query = state['refined_query']
    candidates = state['retrieved_docs']

    # Prepare candidates for reranker
    rerank_inputs = []
    for doc in candidates:
        if doc.get("modality") == "text":
            rerank_inputs.append(doc["summary"])
        elif doc.get("modality") in ("image", "image_summary"):
            # Use image path if available, else fallback to summary
            image_path = doc.get("original_metadata", {}).get("image_path")
            if image_path:
                rerank_inputs.append(image_path)
            else:
                rerank_inputs.append(doc["summary"])
        else:
            rerank_inputs.append(doc["summary"])

    # Rerank
    from rerankers import MonoQwen2VLReranker
    reranker = MonoQwen2VLReranker.from_pretrained("Qwen/MonoQwen2-VL-v0.1")
    results = reranker.rerank(query, rerank_inputs, top_k=len(rerank_inputs))

    # Attach scores and sort
    for (idx, score) in results:
        candidates[idx]['rerank_score'] = float(score)
    reranked = sorted(candidates, key=lambda x: x.get('rerank_score', 0), reverse=True)
    return {"reranked_docs": reranked}

In [68]:
#Jina Reranker m0. GPU/CPU, but extremly slow in CPU
import base64
import os
from transformers import AutoModel


def image_to_base64(image_path):
    with open(image_path, "rb") as img_file:
        return base64.b64encode(img_file.read()).decode("utf-8")

def rerank_node_jina_vlm(state):
    query = state['refined_query']
    candidates = state['retrieved_docs']

    documents = []
    doc_types = []
    for doc in candidates:
        if doc.get("modality") in ("image", "image_summary"):
            image_path = doc.get("original_metadata", {}).get("image_path")
            if image_path and os.path.exists(image_path):
                documents.append(image_to_base64(image_path))
                doc_types.append("image")
            else:
                documents.append(doc["summary"])
                doc_types.append("text")
        else:
            documents.append(doc["summary"])
            doc_types.append("text")

    pairs = [[query, doc] for doc in documents]

    model = AutoModel.from_pretrained(
        'jinaai/jina-reranker-m0',
        torch_dtype="auto",
        trust_remote_code=True,
    )
    model.to('cpu')
    model.eval()

    # If most docs are images, use doc_type="image", else "text"
    n_images = doc_types.count("image")
    n_texts = doc_types.count("text")
    doc_type = "image" if n_images > n_texts else "text"

    # If mixed, filter and rerank separately, then merge (advanced)
    # For now, just use the dominant type
    scores = model.compute_score(pairs, max_length=2048, doc_type=doc_type)

    for doc, score in zip(candidates, scores):
        doc['rerank_score'] = float(score)
    reranked = sorted(candidates, key=lambda x: x.get('rerank_score', 0), reverse=True)
    return {"reranked_docs": reranked}

In [83]:
# Hybrid Method: CrossEncoder for text, VLM for image.
from sentence_transformers import CrossEncoder
import os
import ollama
def llm_image_relevance_score(query, image_path, image_summary=None):
    """
    Use Ollama (minicpm-v:8b) to rate the relevance of an image to the query.
    Passes the image file and, if available, the image summary.
    Returns a float score between 0 and 1.
    """
    prompt = f"""
    You are a veterinary assistant AI. Given the following user query and an image, rate how relevant the image is to answering the query.
    - User Query: "{query}"
    """
    if image_summary:
        prompt += f'- Image Summary: "{image_summary}"\n'
    prompt += "Respond with a single float between 0 (not relevant at all) and 1 (highly relevant). Only output the number, nothing else."

    try:
        messages = [{"role": "user", "content": prompt}]
        if image_path and os.path.exists(image_path):
            messages[0]["images"] = [image_path]
        response = ollama.chat(
            model="minicpm-v:8b",
            messages=messages,
            options={"temperature": 0.0}
        )
        content = response['message']['content'].strip()
        score = float(content.split()[0])
        score = max(0.0, min(1.0, score))
        return score
    except Exception as e:
        print(f"[llm_image_relevance_score] Error: {e}. Query: {query[:50]}... Image: {image_path}... Summary: {str(image_summary)[:50]}...")
        return 0.0

def rerank_node_hybrid_v2(state):
    query = state['refined_query']
    candidates = state['retrieved_docs']

    text_indices = []
    text_contents = []
    image_indices = []
    image_info = []

    for idx, doc in enumerate(candidates):
        modality = doc.get("modality") or (doc.get("original_metadata") or {}).get("type")
        if modality == "text":
            text_indices.append(idx)
            doc_id = (doc.get("original_metadata") or {}).get("doc_id") or doc.get("doc_id")
            # Fetch the original document from the docstore
            docstore = retriever.docstore
            original_text = None
            if doc_id and docstore:
                try:
                    original = docstore._collection.get(ids=[doc_id], include=["documents"])
                    original_text = original["documents"][0] if original["documents"] else None
                    print(f"for doc_id :{doc_id}, the original text: {original_text}")
                except Exception as e:
                    print(f"[rerank_node_hybrid_v2] Error fetching original text for doc_id {doc_id}: {e}")
            if not original_text:
                original_text = doc.get("summary", "")
            text_contents.append(original_text)
        elif modality == "image":
            # Use the image file for VLM
            image_path = (doc.get("original_metadata") or {}).get("image_path")
            image_summary = (doc.get("original_metadata") or {}).get("summary", "")
            image_indices.append(idx)
            image_info.append((image_path, image_summary if image_summary else None))
        elif modality == "image_summary":
            # Trace to the image file if possible
            image_path = (doc.get("original_metadata") or {}).get("image_path")
            image_summary = doc.get("summary", "")
            image_indices.append(idx)
            image_info.append((image_path, image_summary))
        else:
            # Fallback: treat as text
            text_indices.append(idx)
            text_contents.append(doc.get("summary", ""))

    # 1. Rerank text docs
    if text_contents:
        model = CrossEncoder("BAAI/bge-reranker-base")
        pairs = [(query, text) for text in text_contents]
        scores = model.predict(pairs)
        for idx, score in zip(text_indices, scores):
            candidates[idx]['rerank_score'] = float(score)

    # 2. Rerank images (and image summaries) with VLM
    for idx, (image_path, image_summary) in zip(image_indices, image_info):
        score = llm_image_relevance_score(query, image_path, image_summary)
        candidates[idx]['rerank_score'] = float(score)

    # 3. Sort all by rerank_score
    reranked = sorted(candidates, key=lambda x: x.get('rerank_score', 0), reverse=True)
    return {"reranked_docs": reranked}

## test

In [None]:
def test_rerank_node(rerank_node, refined_query, retrieved_docs, top_n=5):
    # Build the state as expected by rerank_node
    state = {
      "refined_query": refined_query,
      "retrieved_docs": retrieved_docs,
      "docstore": retriever.docstore  # or whatever your docstore object is
     }
    # Call the rerank node
    new_state = rerank_node(state)
    global reranked_docs
    reranked_docs = new_state.get("reranked_docs", [])
    print(f"Total docs after reranking: {len(reranked_docs)}")
    print(f"Top {top_n} reranked docs (by rerank_score):")
    for i, doc in enumerate(reranked_docs[:top_n]):
        doc_id = doc.get('doc_id') or (doc.get('original_metadata') or {}).get('doc_id')
        modality = (doc.get('original_metadata') or {}).get('type') or doc.get('modality')
        print(f"Doc {i}:")
        print(f"  Doc ID: {doc_id}")
        print(f"  Type: {modality}")
        print(f"  Rerank Score: {doc.get('rerank_score')}")
        print(f"  Summary: {doc.get('summary')[:100]}...")
        print("-" * 40)
    # Optionally, check that scores are sorted descending
    scores = [doc.get('rerank_score') for doc in reranked_docs if doc.get('rerank_score') is not None]
    if scores and scores == sorted(scores, reverse=True):
        print("PASS: Docs are sorted by rerank_score descending.")
    else:
        print("FAIL: Docs are not sorted correctly or scores are missing.")

# Example usage:
refined_query = "What are possible causes and diagnostic steps for chronic ear infections or otitis in cats, characterized by visible brownish-orange debris, discharge, and wounds in the ear canal, potentially accompanied by signs of infection such as redness, swelling, and bleeding, and how can these conditions be distinguished from other potential health issues that may affect a cat's auditory system"
test_rerank_node(rerank_node_hybrid_v2, refined_query, test_retrived_doc)
reranked_docs

In [85]:
def display_top_10_imgs_and_texts(reranked_docs, docstore):
    print("Top 10 Images and Original Texts:\n")
    count = 0
    for doc in reranked_docs:
        if count >= 10:
            break
        modality = (doc.get('original_metadata') or {}).get('type') or doc.get('modality')
        doc_id = doc.get('doc_id') or (doc.get('original_metadata') or {}).get('doc_id')
        score = doc.get('rerank_score')
        print(f"Doc {count}:")
        print(f"  Doc ID: {doc_id}")
        print(f"  Type: {modality}")
        print(f"  Rerank Score: {score}")
        if modality == "text":
            # Fetch original text from docstore
            original_text = None
            if doc_id and docstore:
                try:
                    original = docstore._collection.get(ids=[doc_id], include=["documents"])
                    original_text = original["documents"][0] if original["documents"] else None
                except Exception as e:
                    print(f"    [Error fetching original text for doc_id {doc_id}: {e}]")
            if not original_text:
                original_text = doc.get("summary", "")
            print("  Original Text:")
            print(f"    {original_text[:500]}{'...' if len(original_text) > 500 else ''}")
        elif modality in ("image", "image_summary"):
            image_path = (doc.get("original_metadata") or {}).get("image_path")
            print(f"  Image Path: {image_path}")
            print("  Image Summary:")
            print(f"    {doc.get('summary', '')[:500]}{'...' if len(doc.get('summary', '')) > 500 else ''}")
        else:
            print("  [Unknown modality]")
        print("-" * 60)
        count += 1

# Example usage:
display_top_10_imgs_and_texts(reranked_docs, retriever.docstore)

Top 10 Images and Original Texts:

Doc 0:
  Doc ID: 528a70c2-4030-4712-abf1-8dc979f61282
  Type: text
  Rerank Score: 0.9286244511604309
  Original Text:
    BACTERIAL OTITIS EXTERNA Bacterial infections in the ear canal are frequently caused by scratches to the skin or cat bites. Some begin in an ear canal that contains excessive amounts of wax, cellular debris, or foreign material. Ear mite infections are often the cause of bacterial otitis. Signs of an infected ear canal are shaking the head, scratching at the affected ear, and an unpleasant odor. The cat may tilt or carry her head down on the painful side and exhibit tenderness when the ear is to...
------------------------------------------------------------
Doc 1:
  Doc ID: 4f7562ff-0fb2-49b0-be9b-b7f4afe42ba2
  Type: text
  Rerank Score: 0.8561280369758606
  Original Text:
    EAR POLYPS Ear polyps are growths that are primarily seen in cats between 1 and 4 years of age, although cats of any age can develop them. They may be rel

# Thinking Node

This step is to take all on-hand info and reranked doc to make analysis. Think about user's intent, what they want to know, what they need to know, also what AI need to know.

In [91]:
def thinking_node(state):
    """
    Given the current state, use Qwen3 to reason step-by-step about how to answer the user's question.
    Updates state with intermediate thoughts, hypotheses, and next_action if more info or tools are needed.
    """
    user_query = state.get("text_query", "")
    image_summary = state.get("image_summary", "")
    retrieved_docs = state.get("reranked_docs") or state.get("retrieved_docs") or []

    # Build the prompt
    prompt = (
    "You a veterinary assistant AI, here to help you with user pet's health. "
    "User is a pet owner with little or no veterinary knowledge. "
    "You will explain everything in simple, easy-to-understand language, and only suggest actions user can do at home without professional equipment or skills. "
    "If you think the pet's situation might be serious or require professional care, you will remind user to make an appointment with a veterinarian. "
    "you will not suggest advanced tests like blood, stool, or urine analysis, or anything that requires professional tools. "
    "First, you will analyze the information and outline the steps you would take to answer user question comprehensively. "
    "If you need more information, you will specify what you need and which tool to use. "
    "At the end, you must always choose my next action from this list: [retrieve more info, interpret a new image, ask the user a question, ready to answer]. "
    "you will output it clearly as: Next action: <one of the options>. "
    "you will speak to user directly, as your veterinary assistant, using 'I' and 'you'.\n\n"
    f"User question: {user_query}\n"
    )

    if image_summary:
        prompt += f"Image summary: {image_summary}\n"
    prompt += "Relevant information from veterinary handbook:\n"
    for i, doc in enumerate(retrieved_docs):
        modality = doc.get('modality') or (doc.get('original_metadata') or {}).get('type')
        if modality == 'image':
            summary = (doc.get('original_metadata') or {}).get('summary', '')
        else:
            summary = doc.get('summary', '')
        prompt += f"{i+1}. {summary}\n"
    prompt += "\nStep-by-step reasoning and next actions:"

    # Call Qwen3 via Ollama
    messages = [{"role": "user", "content": prompt}]
    response = ollama.chat(
        model="qwen3:8b",  # or another Qwen3 variant
        messages=messages,
        options={"temperature": 0.2}
    )
    llm_output = response['message']['content']
    print("✨Thinking Node✨: ", llm_output, "\n")

    # Store the LLM output as an intermediate thought
    state["intermediate_thoughts"] = state.get("intermediate_thoughts", []) + [llm_output]

    # Try to extract next_action from the output (simple heuristic, can be improved)
    if "retrieve more info" in llm_output.lower():
        state["next_action"] = "retrieve_more_info"
    elif "interpret a new image" in llm_output.lower() or "interpret new image" in llm_output.lower():
        state["next_action"] = "interpret_image"
    elif "ask the user" in llm_output.lower() or "ask user" in llm_output.lower():
        state["next_action"] = "ask_user"
    else:
        state["next_action"] = None

    # Optionally, extract hypotheses if the LLM lists them
    if "hypothesis" in llm_output.lower():
        state["hypotheses"] = [llm_output]

    return state

## Tools

In [None]:
def book_retrieval_for_more(query, retriever, k=5):
    print("🔩Book Retriever Tool Called! 🔧")

def interpret_image(image_path, model = "minicpm-v:8b"):
    print("📷Image Interpreter Tool Called! ⛰️")


In [92]:
def test_thinking_node():
    """
    Test the thinking_node with a mock state including text query, optional image summary, and mixed retrieved_docs.
    Prints the updated state, intermediate thoughts, and next_action.
    """
    # Example mock state
    mock_state = {
        "text_query": "What are possible causes and diagnostic steps for chronic ear infections or otitis in cats, characterized by visible brownish-orange debris, discharge, and wounds in the ear canal, potentially accompanied by signs of infection such as redness, swelling, and bleeding, and how can these conditions be distinguished from other potential health issues that may affect a cat's auditory system?",
        # Uncomment the next line to test with an image summary
        # "image_summary": "The image shows a cat's ear with visible dark debris and some redness.",
        "reranked_docs": reranked_docs
    }
    print("--- Before thinking_node ---")
    print({k: v for k, v in mock_state.items() if k != 'reranked_docs'})
    print("retrieved_docs count:", len(mock_state["reranked_docs"]))
    print()
    updated_state = thinking_node(mock_state)
    print("--- After thinking_node ---")
    print({k: v for k, v in updated_state.items() if k != 'reranked_docs'})
    print("Intermediate thoughts:")
    for thought in updated_state.get("intermediate_thoughts", []):
        print(thought)
    print("Next action:", updated_state.get("next_action"))

# Example usage:
test_thinking_node()

--- Before thinking_node ---
{'text_query': "What are possible causes and diagnostic steps for chronic ear infections or otitis in cats, characterized by visible brownish-orange debris, discharge, and wounds in the ear canal, potentially accompanied by signs of infection such as redness, swelling, and bleeding, and how can these conditions be distinguished from other potential health issues that may affect a cat's auditory system?"}
retrieved_docs count: 26

✨Thinking Node✨:  <think>
Okay, let's tackle this user's question about chronic ear infections in cats. The user is a pet owner with little veterinary knowledge, so I need to explain things simply. They want to know possible causes, diagnostic steps, and how to distinguish these issues from other health problems affecting the auditory system.

First, I should recall the information from the veterinary handbook provided. The key points are about bacterial otitis, ear polyps, yeast infections, otitis media/interna, and other conditio