# Introduction

In this notebook, we will experiment with how user queries are handled in our veterinary information retrieval system. Several collections have already been set up in the Chroma database, allowing us to directly perform information retrieval without additional setup. This environment enables us to test and refine the process of transforming user input into actionable queries and retrieving relevant information from our knowledge base.

In [1]:
from langchain_experimental.open_clip import OpenCLIPEmbeddings
from langchain_chroma import Chroma

persist_directory = '../chroma/textbook_test_Nutrition'
id_key = "doc_id"

open_clip_embeddings = OpenCLIPEmbeddings(model_name="ViT-g-14", checkpoint="laion2b_s34b_b88k")

# Vectorstore for summaries (for similarity search)
vectorstore = Chroma(
    collection_name="summaries",
    persist_directory=persist_directory,
    embedding_function=open_clip_embeddings
)
# Persistent docstore for originals (all modalities)
docstore = Chroma(
    collection_name="originals",
    persist_directory=persist_directory,
    embedding_function=open_clip_embeddings
)

# Instantiate the retriever

class UnifiedRetriever:
        
        def __init__(self, vectorstore, docstore, id_key="doc_id"):
            self.vectorstore = vectorstore
            self.docstore = docstore
            self.id_key = id_key
            self._collection = docstore._collection

        def retrieve(self, query, k=5):
            results = self.vectorstore.similarity_search_with_score(query, k=k)
            output = []
            for doc, score in results:
                doc_id = doc.metadata.get(self.id_key)
                try:
                    original = self._collection.get(ids=[doc_id], include=["documents", "metadatas"])
                    original_doc = original["documents"][0] if original["documents"] else None
                    original_meta = original["metadatas"][0] if original["metadatas"] else None
                except Exception as e:
                    original_doc = None
                    original_meta = None
                output.append({
                    "summary": doc.page_content,
                    "original": original_doc,
                    "original_metadata": original_meta,
                    "summary_metadata": doc.metadata,
                    "score": score
                })
            return output

retriever = UnifiedRetriever(vectorstore, docstore, id_key=id_key)

# Handling User Input: Analysis Image

Let's use a image of a under weighted cat. This cat is considerablly skinny with bones showing. 

  ![Skinney Cat](./skinny_cat.jpg)

In [2]:
query = "What's going on with my cat? What should I do?" 
image_path = "./skinny_cat.jpg"

import base64
import ollama
import os

# --- Configuration for the image ---
# IMPORTANT: Adjust this path if your cat.jpg is in a different location
# image_model = "minicpm-v:8b" # Or "llava:7b" or another suitable vision model you have installed via Ollama
# image_model = "llava:7b" # 

#This instruct version, q8_0 weight format fits MacBook M1 Pro better
image_model = "llama3.2-vision:11b-instruct-q4_K_M" # 



# --- 1. Generate a textual summary of the image using an LLM ---
print(f" ⏳ Processing image: {image_path}")

image_summary = "Could not generate image summary." # Default in case of error
if not os.path.exists(image_path):
    print(f"Error: Image file not found at {image_path}. Please check the path.")
else:
    try:
        # Read and encode image in base64
        with open(image_path, 'rb') as f:
            image_data = base64.b64encode(f.read()).decode('utf-8')

        # Updated prompt for detailed image summarization
        image_summarization_prompt = """From a feline veterinary stand point, provide a highly detailed and objective 
                description of the image. Focus on all observable elements, actions, 
                objects, subjects, their attributes (e.g., color, size, texture), 
                their spatial relationships, and any discernible context or implied scene. 
                Also focus on all possible health issue.
                Describe any text present in the image. This description must be exhaustive 
                and purely factual, capturing every significant visual detail to serve as a 
                comprehensive textual representation for further analysis by another AI model. 
                If the image is entirely irrelevant or contains no discernible subject, 
                state "No relevant visual information."."""

        # Send image to ollama for vision model processing
        response = ollama.chat(
            model=image_model,
            messages=[
                {
                    'role': 'user',
                    'content': image_summarization_prompt,
                    'images': [image_data]
                }
            ]
        )
        image_summary = response['message']['content']
        print("--- Generated Image Summary ---")
        print(image_summary)

    except Exception as e:
        print(f"Error processing image with Ollama: {e}")

# This 'image_summary' can now be used along with your user's text query
# for retrieval or further processing in your RAG pipeline.

 ⏳ Processing image: ./skinny_cat.jpg
--- Generated Image Summary ---
The image depicts a thin, pale orange cat standing on a tiled floor, its head resting on the edge of a pale green bowl. The cat's fur appears sparse and unkempt, with visible signs of malnutrition. It is positioned in front of a black metal frame, with its tail extending to the left and its front paws placed on the floor.

The bowl, which is slightly larger than the cat's head, is positioned to the right of the cat. The cat's posture suggests that it is drinking from the bowl, although the image does not clearly show this.

In the background, a dark brown door is visible, although it is out of focus. The overall atmosphere of the image suggests that the cat is in a home environment, possibly in a kitchen or dining area. The presence of the bowl and the cat's posture imply that it is being fed or is in the process of eating.

From a feline veterinary standpoint, the cat's appearance raises concerns about its health. T

# Handling User Input: Refine Query

In [3]:
from langchain_ollama import ChatOllama
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

# Define the LLM for query refinement (using the same model as your RAG chain if appropriate)
# Compressed, Distilled Qwen, Response often in CoT <think></think>
# query_refinement_model = ChatOllama(model="deepseek-r1:7b-qwen-distill-q8_0")

#compressed that fits m1 pro, use less RAM. No CoT
query_refinement_model = ChatOllama(model="llama3.2:3b")

# Prompt for query refinement
query_refinement_prompt = ChatPromptTemplate.from_template(
    """You are an intelligent assistant. Your task is to rephrase and expand the given user query \
into a more detailed and context-rich query that can be used to retrieve relevant information \
from a veterinary knowledge base. Use the provided image description to add visual context \
and relevant keywords to the refined query. Focus on adding relevant keywords, clarifying intent, \
and anticipating related information that might be helpful. The output should be a single, refined query.

Original query: {original_query}
Image description: {image_summary}"""
)

# Create the query refinement chain
query_refinement_chain = (
    {
        "original_query": RunnablePassthrough(),
        "image_summary": RunnablePassthrough()
    }
    | query_refinement_prompt
    | query_refinement_model
    | StrOutputParser()
)# --- Demonstration of query refinement and then retrieval with scores ---

# print(f"Original user query: {query}")
# print(f"Image Summary: {image_summary}")

refined_query = query_refinement_chain.invoke(
    {"original_query": query, "image_summary": image_summary}
)

print("-"*80)
print(f"Refined query: {refined_query}")


--------------------------------------------------------------------------------
Refined query: Refined query:

"Given the image description of a thin, pale orange cat standing on a tiled floor with visible signs of malnutrition (sparse and unkempt fur) and struggling posture (head resting on the edge of a pale green bowl), I'm seeking guidance on how to address my cat's health concerns. Specifically, I'd like to know:

1. What are the possible causes of my cat's weight loss and poor appetite?
2. How can I identify potential nutritional deficiencies in my cat's diet and what dietary changes can be made to ensure proper nutrition?
3. Are there any signs of dehydration or kidney disease that I should look out for, given the cat's thinness and lackluster condition?
4. What are the necessary steps to take to address malnutrition and poor health in cats, including potential veterinary treatments, supplements, or medications?
5. How can I create a more conducive environment for my cat's reco

# Query Decomposition

In [4]:
from langchain_core.output_parsers import JsonOutputParser
# Prompt for query decomposition
query_decomposition_prompt = ChatPromptTemplate.from_template(
    """You are an intelligent assistant. Your task is to break down the given complex query
into a list of simpler, focused sub-queries. Each sub-query should be a standalone question
that can be used to retrieve specific information from a veterinary knowledge base.

Output ONLY a valid JSON array of strings, and nothing else. Do not include any explanations, markdown, or extra text.

Complex query: {refined_query}
"""
)

# Create the query decomposition chain
query_decomposition_chain = (
    query_decomposition_prompt  
    | query_refinement_model    
    | JsonOutputParser() 
)

# --- Demonstration of query decomposition ---

print(f"Original refined query: {refined_query[:300]} ....")

decomposed_queries = query_decomposition_chain.invoke({"refined_query": refined_query})

print("-" * 80)
# print(f"Decomposed queries:\n{decomposed_queries}")

print(f"There are {len(decomposed_queries)} queries after decomposition \n")
print(f"Here's a example of the first one: {decomposed_queries[0]}")


Original refined query: Refined query:

"Given the image description of a thin, pale orange cat standing on a tiled floor with visible signs of malnutrition (sparse and unkempt fur) and struggling posture (head resting on the edge of a pale green bowl), I'm seeking guidance on how to address my cat's health concerns. Speci ....
--------------------------------------------------------------------------------
There are 6 queries after decomposition 

Here's a example of the first one: What are the possible causes of a cat's weight loss and poor appetite?


# Contextual Retrievals 

In [5]:
# Assume decomposed_queries is a list of query strings
# and retriever is already instantiated

seen_doc_ids = set()
all_results = []

for query in decomposed_queries:
    results = retriever.retrieve(query, k=5)
    unique_results = []
    for res in results:
        doc_id = res.get('doc_id') or res.get('summary_metadata', {}).get('doc_id')
        if doc_id and doc_id not in seen_doc_ids:
            seen_doc_ids.add(doc_id)
            unique_results.append(res)
    if unique_results:
        all_results.append({
            "query": query,
            "results": unique_results
        })

# Summarize all_results
total_unique_docs = sum(len(entry['results']) for entry in all_results)
total_queries_with_results = len(all_results)
all_doc_ids = set()
for entry in all_results:
    for res in entry['results']:
        doc_id = res.get('doc_id') or res.get('summary_metadata', {}).get('doc_id')
        if doc_id:
            all_doc_ids.add(doc_id)

print(f"Total unique documents retrieved: {len(all_doc_ids)}")
print(f"Total queries with at least one unique result: {total_queries_with_results}")
print("Number of unique documents retrieved per query:")
for entry in all_results:
    print(f"  Query: {entry['query'][:60]}... -> {len(entry['results'])} unique docs")



Total unique documents retrieved: 15
Total queries with at least one unique result: 6
Number of unique documents retrieved per query:
  Query: What are the possible causes of a cat's weight loss and poor... -> 5 unique docs
  Query: How can I identify potential nutritional deficiencies in my ... -> 3 unique docs
  Query: Are there any signs of dehydration or kidney disease that I ... -> 4 unique docs
  Query: What are the necessary steps to take to address malnutrition... -> 1 unique docs
  Query: How can I create a more conducive environment for a cat's re... -> 1 unique docs
  Query: Are there any additional tests or examinations (e.g., blood ... -> 1 unique docs


In [6]:
all_results[0]

{'query': "What are the possible causes of a cat's weight loss and poor appetite?",
 'results': [{'summary': 'Switching diets may be necessary for cats with health issues. Accustoming your cat to a new food can help reduce stress during the transition.',
   'original': 'Switching Diets It may become necessary to adjust a cat’s diet and switch to a new food because of a health problem. This is yet another reason to get your cat accustomed to —o—',
   'original_metadata': {'doc_id': '4563452b-4eb0-492e-abc3-367afdd21674',
    'type': 'text'},
   'summary_metadata': {'type': 'text',
    'doc_id': '4563452b-4eb0-492e-abc3-367afdd21674'},
   'score': 0.4774450659751892},
  {'summary': "To introduce a new diet to your cat, mix original and new foods (80% original, 20% new) for a few weeks until the cat accepts it. Gradually increase new food while reducing original food. If the cat refuses, don't starve her, as some cats may refuse certain foods and won't eat at all.",
   'original': 'Switch

# Directly Answering Query with Retrieved Info

In [7]:
# Combine all summaries from all_results into one context
all_context = "\n".join(
    res['summary']
    for entry in all_results
    for res in entry['results']
)


final_answer_prompt = ChatPromptTemplate.from_template(
    """You are a helpful veterinary assistant. Use the provided context to answer the 
    user's question as thoroughly and concisely as possible. Follow the following steps
    when giving an prompt answer to the user.
    1. Describe what was the main issue that you observed from the image summary?
    2. What could be the cause of the main issue?
    3. How can this main issue be solved?

    User's question: {query}

    Image Summary: {image_summary}

    Context:
    {context}

    Answer:"""
    )

final_answer_chain = (
    final_answer_prompt
    | ChatOllama(model="llama3.2:3b")  
    | StrOutputParser()
)

final_answer = final_answer_chain.invoke({
    "query": query,
    "image_summary": image_summary,
    "context": all_context
})

print("Final Answer:")
print(final_answer)

Final Answer:
1. The main issue observed in the image summary is that the cat appears to be malnourished, with sparse and unkempt fur, visible signs of malnutrition, and an unusual posture suggesting it may be struggling to eat or drink.
2. The potential cause of this main issue could be a lack of proper care, inadequate nutrition, or poor quality food, which may lead to malnutrition and health problems in the cat.
3. To solve this issue, further assessments such as blood work, urinalysis, and other diagnostic tests may be necessary to determine the underlying causes of the cat's condition. Additionally, a veterinarian should be consulted to develop a customized diet plan that addresses the cat's specific nutritional needs, such as a prescription diet or a tailored feeding regimen. The veterinarian can also rule out any underlying health issues that may be contributing to the cat's malnutrition and provide guidance on how to improve the cat's overall health and well-being.


# Tool Calling Chain of Thought Way To Answer Query

Previously, We used llama3.2:3b model to answer the query with image summary and retrieved info giving to it. However, the answer can be incomprehensive and missing the target. Here we will try to use reasoning model that outputs chain of thought(CoT) that can solve this issue. For example:
1. deepseek-r1 7b 8b 
2. qwen3 4b 8b

Have model to think of a plan to answer query that helps user to understand the cause, possible underlying issue, recommended next steps, etc. If context not enough, fetch info on Wikipedia, or use Tavily. Let it pause and think, what info gap is there? Does it need more info from the owner? i.e. "is vaccine up-to-date?", "is eating and drinking ok?", "is pooping and peeing ok?",...


# Tool Calling Agent Example

In [8]:
# --- New Cell for a MORE INQUISITIVE Conversational Agent ---

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage, AIMessage
from langchain_ollama import ChatOllama
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain.tools import tool

# --- 1. Tools (same as before) ---
@tool
def search_vet_knowledge_base(query: str) -> str:
    """Searches the veterinary knowledge base for specific information."""
    print(f"--- Calling Vet Knowledge Base with query: {query} ---")
    results = retriever.retrieve(query, k=3)
    if not results:
        return "No information found in the knowledge base for this query."
    return "\\n".join([f"Summary: {res['summary']}" for res in results])

@tool
def web_search(query: str) -> str:
    """Searches the web for general information."""
    print(f"--- Calling Web Search with query: {query} ---")
    return "Web search is not implemented. Tell the user you couldn't find external information."

tools = [search_vet_knowledge_base, web_search]

# --- 2. A More Forceful Agent Prompt ---

# Many models don't support tool calling and reasonings
# but Qwen3:8b dose. So is llama3.2:3b. And Qwen > llama3.2 in this senario
# A fine-tuned specifically for tool calling model llama3-groq-tool-use:8b 
# can be a good choice for tool calling decision making

agent_llm = ChatOllama(model="qwem3:8b", temperature=0)
 
# This new prompt is much more directive.
agent_prompt = ChatPromptTemplate.from_messages([
    ("system", """You are an expert veterinary assistant. Your PRIMARY GOAL is to gather enough information to help the user. Do not provide a final answer until you are confident you have all necessary details.

**Your Action Plan:**
1.  **First, Identify Critical Missing Information:** Review the conversation. Before using any tool or giving advice, you MUST determine if you know the following:
    - The cat's approximate **age** and **sex**.
    - **How long** the cat has been in this condition.
    - If the cat is **eating and drinking**. If so, what and how much?
    - The cat's **litter box habits** (peeing/pooping). Are they normal?
    - Any known **medical history** (vaccines, past issues).

2.  **Ask Questions First:** If you are missing any of the critical information above, your immediate next step is to ASK THE USER for it. Do not use other tools yet.

3.  **Use Tools Later:** Only after you have the basic information from the user should you use `search_vet_knowledge_base` to investigate potential conditions.

4.  **Synthesize a Final Answer:** Once all questions are answered and you've gathered research, provide a final, comprehensive answer that explains potential issues and recommends next steps."""),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{input}"),
    MessagesPlaceholder(variable_name="agent_scratchpad"),
])

# --- 3. Create and Run the Agent (same as before) ---
agent = create_tool_calling_agent(agent_llm, tools, agent_prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

# --- 4. The Chat Loop (same as before) ---
chat_history = []
initial_input = f"""My User Query: "{query}"

Here is a summary of the image I provided:
{image_summary}
"""

print("--- Starting conversation with Vet Assistant Agent ---")
print("Agent: Hello! I've reviewed your query and the image summary. I will do my best to help.")

# We will run the first turn here to start the conversation
response = agent_executor.invoke({
    "input": initial_input,
    "chat_history": chat_history
})
agent_response = response['output']
print(f"\\nAgent: {agent_response}")

# Add the first interaction to history
chat_history.append(HumanMessage(content=initial_input))
chat_history.append(AIMessage(content=agent_response))

# Now, you can continue the chat in a new cell by getting user input
# and calling agent_executor.invoke again with the updated chat_history.
# For a true "chat" experience in a notebook, you would continue the loop.
# This setup is ready for that.



--- Starting conversation with Vet Assistant Agent ---
Agent: Hello! I've reviewed your query and the image summary. I will do my best to help.


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mBased on the information provided, I recommend performing a complete physical examination, including blood work and urinalysis, to further assess your cat's health. These tests will help identify any underlying conditions contributing to its malnutrition and poor health.[0m

[1m> Finished chain.[0m
\nAgent: Based on the information provided, I recommend performing a complete physical examination, including blood work and urinalysis, to further assess your cat's health. These tests will help identify any underlying conditions contributing to its malnutrition and poor health.


: 

: 