# Setup LangSmith API
Retrievals can be traced here for easier debugging.

In [16]:
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGSMITH_API_KEY'] = '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'

![LangGraph Flow](../../langgraph%20designs/graph_design_v1.png)

# Graph States

In [17]:
from typing_extensions import TypedDict
from typing import Optional, List, Dict, Any

class GraphState(TypedDict):
    # Core user input
    text_query: str
    image_path: Optional[str]  # Path to uploaded image, if any

    # Routing/intent
    query_type : str # currently being divide into 'emergency'/'Q&A'/'irrelevant'.
    
    # Q&A path
    refined_query: Optional[str]
    sub_queries: Optional[List[str]]
    current_sub_query: Optional[str]
    retrieved_docs: Optional[List[Dict[str, Any]]]  # Results from retrieval
    reranked_docs: Optional[List[Dict[str, Any]]]   # After rerank step

    # Feedback loop
    followup_questions: Optional[List[str]]
    user_responses: Optional[List[str]]
    loop_count: int

    # Answer generation
    generated_answer: Optional[str]
    hallucination_check: Optional[bool]
    answer_sufficient: Optional[bool]

    # Emergency path
    emergency_instructions: Optional[str]
    emergency_retrieved_docs: Optional[List[Dict[str, Any]]]

    # Web search
    web_search_results: Optional[List[Dict[str, Any]]]

    # Final output
    final_answer: Optional[str]

    # Misc/trace/debug
    path_taken: Optional[List[str]]
    error: Optional[str]

<h1> Graph Nodes

## Query Handler Node 

Before LLM analyze user query and image, it will be assessed with "Is this veterinary-related?". This will ensure our AI tool will not be used for other purpose.

In [33]:
def query_handler(state):
    text_query = state.get("text_query", "")
    image_path = state.get("image_path", None)

    prompt = (
        "You are a domain classifier for a veterinary assistant. "
        "If an image is provided, understand the image from veterinary point of view."
        "A user query is the combination of text query and image(if there is). "
        "Then, classify the user query into one of three categories:\n"
        "1. 'emergency' — If the user query is about a veterinary emergency (e.g., mass bleeding, serious bone fracture, unconsciousness, severe breathing difficulty, or other life-threatening situations).\n"
        "2. 'Q&A' — If the user query is about is about general veterinary questions, symptom checks, or non-emergency animal health issues.\n\n"
        "3. 'irrelevant' — If the user query is NOT about veterinary, animal health, pet care, etc.\n"
        "Your response must be exactly one of: 'irrelevant', 'emergency', or 'Q&A'. Do not explain your answer or add anything else.\n\n"
        f"User input: {text_query}\n"
    )

    messages = [{
        "role": "user",
        "content": prompt,
        "images": []
    }]

    if image_path and os.path.exists(image_path):
        messages[0]["images"].append(image_path)

    response = ollama.chat(
        model="minicpm-v:8b",
        messages=messages,
        options={"temperature": 0.2}
    )
    result = response['message']['content'].strip().lower()
    # Only allow the three valid outputs
    if result not in ['irrelevant', 'emergency', 'q&a']:
        result = 'irrelevant'
    state["query_type"] = result
    return state

### test

In [34]:
def test_query_handler_node(query_handler, test_query, image_path=None):
    # Build the initial state
    state = {
        "text_query": test_query,
        "image_path": image_path
    }
    # Call the query handler node
    new_state = query_handler(state)
    # Print the results
    print("Input Query:", test_query)
    if image_path:
        print("Image Path:", image_path)
    print("Updated State:", new_state)
    print("Query Type:", new_state.get("query_type", "N/A"))
    print("-" * 40)

# --- Example usage ---
test_query_handler_node(query_handler, "What vaccines does my cat need?")
test_query_handler_node(query_handler, "My cat is bleeding a lot after being hit by a car.")
test_query_handler_node(query_handler, "How do I fix my car engine?")
test_query_handler_node(query_handler, "What should I do?", image_path="../emergency_cat.jpg")
test_query_handler_node(query_handler, "What should I feed to this cat?", image_path="../skinny_cat.jpg")


Input Query: What vaccines does my cat need?
Updated State: {'text_query': 'What vaccines does my cat need?', 'image_path': None, 'query_type': 'q&a'}
Query Type: q&a
----------------------------------------
Input Query: My cat is bleeding a lot after being hit by a car.
Updated State: {'text_query': 'My cat is bleeding a lot after being hit by a car.', 'image_path': None, 'query_type': 'emergency'}
Query Type: emergency
----------------------------------------
Input Query: How do I fix my car engine?
Updated State: {'text_query': 'How do I fix my car engine?', 'image_path': None, 'query_type': 'irrelevant'}
Query Type: irrelevant
----------------------------------------
Input Query: What should I do?
Image Path: ../emergency_cat.jpg
Updated State: {'text_query': 'What should I do?', 'image_path': '../emergency_cat.jpg', 'query_type': 'emergency'}
Query Type: emergency
----------------------------------------
Input Query: What should I feed to this cat?
Image Path: ../skinny_cat.jpg
Up

# Q&A Path

## Query Refinement

In [27]:
def get_image_summary(image_path):
    prompt = """From a feline veterinary stand point, provide a highly detailed and objective 
                description of the image. Focus on all observable elements, actions, 
                objects, subjects, their attributes (e.g., color, size, texture), 
                their spatial relationships, and any discernible context or implied scene. 
                Also focus on all possible health issue.
                Describe any text present in the image. This description must be exhaustive 
                and purely factual, capturing every significant visual detail to serve as a 
                comprehensive textual representation for further analysis by another AI model. 
                If the image is entirely irrelevant or contains no discernible subject, 
                state "No relevant visual information."""
    messages = [{
        "role": "user",
        "content": prompt,
        "images": [image_path]
    }]
    response = ollama.chat(
        model="minicpm-v:8b",
        messages=messages,
        options={"temperature": 0.2}
    )
    return response['message']['content']

def query_refinement_node(state):
    text_query = state.get("text_query", "")
    image_path = state.get("image_path", None)
    image_summary = get_image_summary(image_path) if image_path else ""

    if image_summary:
        prompt = (
            f"You are a veterinary assistant AI. Your task is to rewrite and expand the user's query for a veterinary knowledge base search. "
            f"Use the relevant image description to add context, clarify the situation, and incorporate any relevant details or possible causes. "
            f"Consider add questions about possible causes, diagnostic considerations, anything that would be helpful in the situation, but combine everything into a single, comprehensive question or query."
            f"Combine all information into a single, comprehensive question or query that anticipates what a veterinarian or pet owner would want to know. "
            f"Output ONLY one single, context-rich query as a paragraph, and nothing else.\n\n"
            f"User query: {text_query}\n"
            f"Image description: {image_summary}\n"
            f"Refined query:"
        )
    else:
        prompt = (
            f"You are a veterinary assistant AI. Your task is to rewrite and expand the user's query for a veterinary knowledge base search. "
            f"Consider add questions about possible causes, diagnostic considerations, anything that would be helpful in the situation, but combine everything into a single, comprehensive question or query. "
            f"Output ONLY one single, context-rich query as a paragraph, and nothing else.\n\n"
            f"User query: {text_query}\n"
            f"Refined query:"
        )

    messages = [{
        "role": "user",
        "content": prompt
    }]
    response = ollama.chat(
        model="llama3.2:3b",  # or another strong text model
        messages=messages,
        options={"temperature": 0.3}
    )
    state["refined_query"] = response['message']['content']
    return state

### test

In [39]:
def test_query_refinement_node(query_refinement_node, test_query, image_path=None):
    # Build the initial state
    state = {
        "text_query": test_query,
        "image_path": image_path
    }
    # Call the query refinement node
    new_state = query_refinement_node(state)
    # Print the results
    print("Input Query:", test_query)
    if image_path:
        print("Image Path:", image_path)
    print("Refined Query:", new_state.get("refined_query", "N/A"))
    print("-" * 40)

# --- Example usage ---
test_query_refinement_node(query_refinement_node, "What should I feed to this cat?", image_path="../skinny_cat.jpg")
test_query_refinement_node(query_refinement_node, "My cat just passed out, how do I do CPR to my cat?")

Input Query: What should I feed to this cat?
Image Path: ../skinny_cat.jpg
Refined Query: What are the dietary recommendations for an adult cat with light cream fur, possibly prone to skin conditions due to allergies or parasites, that is currently standing on terracotta-colored tiles in a semi-outdoor environment, showing hesitation to step down from another plane, and has an empty green bowl beside it, suggesting potential changes in its eating habits or health status, considering factors such as grooming habits, environmental exposure, medical history, and the presence of fleas and ticks, with no clear information on its diet or feeding schedule provided?
----------------------------------------
Input Query: My cat just passed out, how do I do CPR to my cat?
Refined Query: My cat suddenly collapsed and lost consciousness, what are the possible underlying causes that could lead to this condition in cats, and more importantly, what are the steps I should take to provide emergency care

## Query Decomposition

In [34]:
import ollama
import json
from langchain_ollama import ChatOllama
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import JsonOutputParser


def query_decomposition(state):
    refined_query = state['refined_query']

    query_decomposition_prompt = ChatPromptTemplate.from_template(
    """You are an intelligent assistant. Your task is to break down the given complex query
    into a list of simpler, focused sub-queries. Each sub-query should be a standalone question
    that can be used to retrieve specific information from a veterinary knowledge base.

    At the end of your list, add 2-3 additional sub-queries that specifically require visual information or images.
    For example, you might add:
    - "Show me an image of how to pick up a cat."
    - "Show me an image of how to do CPR for a cat."
    - "Show me a diagram of feline anatomy."
    Be creative and make sure these visual sub-queries are relevant to the original complex query.

    Output ONLY a valid JSON array of strings, and nothing else. Do not include any explanations, markdown, or extra text.

    Complex query: {refined_query}
    """
    )

    # Create the query decomposition chain
    query_decomposition_chain = (
        query_decomposition_prompt  
        | ChatOllama(model="llama3.2:3b")  
        | JsonOutputParser() 
    )

    # --- Demonstration of query decomposition ---

    print(f"Original refined query: {refined_query[:300]} ....")

    decomposed_queries = query_decomposition_chain.invoke({"refined_query": refined_query})
    # Try to extract the JSON array from the response

    print("-" * 80)
    # print(f"Decomposed queries:\n{decomposed_queries}")

    print(f"There are {len(decomposed_queries)} queries after decomposition \n")
    print(f"Here's a example of the first one: {decomposed_queries[0]}")

    state['sub_queries'] = decomposed_queries
    return state

### Test

In [40]:
def test_query_decomposition(query_decomposition_func, refined_query):
    # Build the initial state
    state = {
        "refined_query": refined_query
    }
    # Call the query decomposition function
    new_state = query_decomposition_func(state)
    # Print the results
    print("Decomposed Sub-Queries:")
    print(new_state['sub_queries'])

# --- Example usage ---
test_query_decomposition(
    query_decomposition,
    "My cat suddenly collapsed and lost consciousness, what are the possible underlying causes that could lead to this condition in cats, and more importantly, what are the steps I should take to provide emergency care, including whether or not cardiopulmonary resuscitation (CPR) is even applicable for cats, how to perform it if necessary, and any diagnostic tests or procedures that should be performed immediately to determine the underlying cause of the collapse, such as blood work, imaging studies, or other diagnostic methods, and what are the signs of successful resuscitation in a cat?"
)

Original refined query: My cat suddenly collapsed and lost consciousness, what are the possible underlying causes that could lead to this condition in cats, and more importantly, what are the steps I should take to provide emergency care, including whether or not cardiopulmonary resuscitation (CPR) is even applicable for c ....
--------------------------------------------------------------------------------
There are 10 queries after decomposition 

Here's a example of the first one: What causes loss of consciousness in cats?
Decomposed Sub-Queries:
['What causes loss of consciousness in cats?', 'How to provide emergency care for a collapsed cat?', 'Can CPR be performed on cats?', 'How to perform CPR on a cat?', 'Diagnostic tests for a collapsed cat', 'Imaging studies for a feline collapse', 'Signs of successful resuscitation in cats', 'What is the typical blood work for a cat in cardiac arrest? ', 'Image of a normal heart ECG for comparison', 'Diagram of feline internal injuries causi

## Contextual Retrievals

Based on decomposed sub queries, we are able to retrieve contexutally close aligned Documents from the vector database. 

### Setup Unified Retriever (Retrieve text, table, images)

In [23]:
from langchain_experimental.open_clip import OpenCLIPEmbeddings
from langchain_chroma import Chroma

def init_retriever():

    # Instantiate the retriever
    class UnifiedRetriever:
            def __init__(self, vectorstore, docstore, id_key="doc_id"):
                self.vectorstore = vectorstore
                self.docstore = docstore
                self.id_key = id_key
                self._collection = docstore._collection

            def retrieve(self, query, k=5):
                results = self.vectorstore.similarity_search_with_score(query, k=k)
                output = []
                for doc, score in results:
                    doc_id = doc.metadata.get(self.id_key)
                    try:
                        original = self._collection.get(ids=[doc_id], include=["documents", "metadatas"])
                        original_doc = original["documents"][0] if original["documents"] else None
                        original_meta = original["metadatas"][0] if original["metadatas"] else None
                    except Exception as e:
                        original_doc = None
                        original_meta = None
                        
                    output.append({
                        "summary": doc.page_content,
                        "original": original_doc,
                        "original_metadata": original_meta,
                        "summary_metadata": doc.metadata,
                        "score": score
                    })
                return output
    
    persist_directory = '../../chroma/Emergency_and_InfectiveDisease'
    id_key = "doc_id"

    open_clip_embeddings = OpenCLIPEmbeddings(model_name="ViT-g-14", checkpoint="laion2b_s34b_b88k")

    # Vectorstore for summaries (for similarity search)
    vectorstore = Chroma(
        collection_name="summaries_and_images",
        persist_directory=persist_directory,
        embedding_function=open_clip_embeddings
    )
    # Persistent docstore for originals (all modalities)
    docstore = Chroma(
        collection_name="originals",
        persist_directory=persist_directory,
        embedding_function=open_clip_embeddings
    )

    retriever = UnifiedRetriever(vectorstore, docstore, id_key=id_key)
    return retriever

### Retrieval

In [24]:
# Assume decomposed_queries is a list of query strings
# and retriever is already instantiated

seen_doc_ids = set()
all_results = []
retriever = init_retriever()

def contextual_retrieval_flat(state):
    seen_doc_ids = set()
    unique_docs = []
    retriever = init_retriever() 

    for query in state['sub_queries']:
        results = retriever.retrieve(query, k=5, )
        for res in results:
            doc_id = res.get('doc_id') or res.get('summary_metadata', {}).get('doc_id')
            if doc_id and doc_id not in seen_doc_ids:
                seen_doc_ids.add(doc_id)
                unique_docs.append(res)
    print(f"Total unique documents retrieved: {len(unique_docs)}")
    return unique_docs

### test

In [46]:
def test_contextual_retrieval(sub_queries):
    test_state = {
        "sub_queries": sub_queries
    }

    # Use the flat contextual retrieval function
    unique_docs = contextual_retrieval_flat(test_state)

    print("\nSample of unique retrieved docs:")
    for i, doc in enumerate(unique_docs):
        doc_id = doc.get('doc_id') or doc.get('summary_metadata', {}).get('doc_id')
        print(f"Doc {i}:")
        print(f"  Doc ID: {doc_id}")
        # Check if this is an image context doc
        if doc_id and doc_id.endswith('_context'):
            image_path = doc.get('summary_metadata', {}).get('image_path')
            print(f"  [IMAGE CONTEXT] Points to image file: {image_path}")
        print(f"  Type: {(doc.get('original_metadata') or {}).get('type')}")
        print(f"  Score: {doc.get('score')}")
        print(f"  Summary: {doc.get('summary')[:100]}...")
        print(f"  Original: {str(doc.get('original'))[:100]}...")
        print("-" * 40)
    print(f"Total unique docs retrieved: {len(unique_docs)}")

# Example usage:
test_contextual_retrieval(
   ['What causes loss of consciousness in cats?', 'How to provide emergency care for a collapsed cat?', 'Can ARTIFICIAL RESPIRATION be performed on cats?', 'How to perform ARTIFICIAL RESPIRATION on a cat?', 'Diagnostic tests for a collapsed cat', 'Imaging studies for a feline collapse', 'Signs of successful resuscitation in cats', 'What is the typical blood work for a cat in cardiac arrest? ', 'Image of a normal heart ECG for comparison', 'Diagram of feline internal injuries causing collapse']
)

Total unique documents retrieved: 30

Sample of unique retrieved docs:
Doc 0:
  Doc ID: ce3cd668-89c5-4a70-81bc-88342a8f2191
  Type: text
  Score: 0.662866473197937
  Summary: Infectious diseases in cats are caused by pathogens (bacteria, viruses, protozoa, or fungi) that inv...
  Original: INFECTIOUS DISEASES Infectious diseases are caused by bacteria, viruses, protozoa, or fungi that inv...
----------------------------------------
Doc 1:
  Doc ID: 299fb24a-f763-4f6a-a7ac-de726c101830
  Type: text
  Score: 0.6837778687477112
  Summary: Here's a concise summary:

* Symptoms of viral respiratory infections in cats appear 2-17 days after...
  Original: WY tests. These tests are not always available quickly enough to be of use in plan- ning treatment. ...
----------------------------------------
Doc 2:
  Doc ID: 36bdb17a-a1cc-4a80-b09a-4ad5923ce788
  Type: text
  Score: 0.6842197179794312
  Summary: Cat showing signs of abdominal pain or distension requires immediate veterinary care for p