## Introduction


This notebook explores how to wrap the chatbot in a gradio chat interface

###  Setting up paths and loading vector store

In [1]:
# === 0) Imports & manifest ===
from pathlib import Path
import yaml, json
from typing import Optional, List, Dict
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.tools import tool
from langchain.schema import Document

# If you want the prebuilt ReAct agent:
from langgraph.checkpoint.memory import MemorySaver
from langgraph.prebuilt import create_react_agent

# If you prefer to also expose tools directly on the LLM:
from langchain.chat_models import init_chat_model

# --- Set your pack root + manifest ---
ROOT = Path("/Users/ktejwani/Personal CS Projects/Summer 2025/Offline AI Kiosk/Offline-AI-Kiosk/first_aid_pack_demo_v2")
MANIFEST = ROOT / "manifest.yaml"

with open(MANIFEST, "r", encoding="utf-8") as f:
    manifest = yaml.safe_load(f)

# --- Resolve FAISS paths from manifest ---
faiss_dir_text = ROOT / manifest["precomputed_indices"]["text"]["faiss"]["dir"]
faiss_dir_image = ROOT / manifest["precomputed_indices"]["images"]["faiss"]["dir"]
# --- Create embeddings *matching the store* ---
embed_model_name = manifest["embedding_config"]["text"]["model"]     #same for text and image
emb = OllamaEmbeddings(model=embed_model_name)

# --- Load FAISS store + retriever ---
text_vs = FAISS.load_local(str(faiss_dir_text), emb, allow_dangerous_deserialization=True)
text_retriever = text_vs.as_retriever(search_kwargs={"k": 4})  # default k; tool will override if provided

image_vs = FAISS.load_local(str(faiss_dir_image), emb, allow_dangerous_deserialization=True)
# results = image_vs.similarity_search_with_score(query, k=4)






### Defining helper and context() Tool  

In [2]:
# === 1) Helpers ===
def format_chunk(doc: Document, max_chars: int = 400) -> Dict:
    """Return a dict with compact text + key metadata for prompting & audit."""
    txt = doc.page_content.strip()
    if len(txt) > max_chars:
        txt = txt[:max_chars].rstrip() + " …"
    m = doc.metadata
    return {
        "id": m.get("chunk_id"),
        "topic_id": m.get("topic_id"),
        "file_id": m.get("file_id"),
        "locale": m.get("locale"),
        "path": m.get("path"),
        "citations": [c.get("title", "") for c in m.get("citations", [])],
        "text": txt
    }

def format_context_block(chunks: List[Dict]) -> str:
    """Human/LLM-friendly context block the agent can drop into its reasoning."""
    lines = []
    lines.append("### Retrieved Context (use only what is relevant)")
    for i, c in enumerate(chunks, 1):
        cite_str = "; ".join([t for t in c["citations"] if t]) or "—"
        head = f"[{i}] {c['topic_id']} · {c['file_id']} · {c['locale']} · {c['path']}"
        lines.append(head)
        lines.append(c["text"])
        lines.append(f"Source(s): {cite_str}")
        lines.append("")  # blank line
    return "\n".join(lines).strip()


# === 2) The @tool: context() ===
@tool
def context(
    query: str,
    k: int = 4,
    topic_id: Optional[str] = None,
    locale: Optional[str] = None
) -> dict:
    """
    Retrieve up to k relevant knowledge-pack chunks for 'query' and return a formatted
    context block + structured per-chunk data for citations. You must use this tool for any prompt that is 
    important to wellbeing or safety of user. 

    Args:
        query: Natural language question or keywords.
        k: Top-k chunks to return (default 4).
        topic_id: Optional manifest topic filter (e.g., 'bleed-control').
        locale: Optional locale filter (e.g., 'hi_en' or 'en').

    Returns:
        {
          "query": str,
          "k": int,
          "filters": {"topic_id":..., "locale":...},
          "context_block": str,     # pasteable into prompts
          "chunks": [ {id, topic_id, file_id, path, locale, citations[], text}, ... ]
        }
    """
    # Build a metadata filter if provided
    _filter = {}
    # if topic_id:
    #     _filter["topic_id"] = topic_id
    # if locale:
    #     _filter["locale"] = locale

    # Run retrieval (override k)
    local_ret = text_vs.as_retriever(search_kwargs={"k": k})
    hits: List[Document] = local_ret.invoke(query) if not _filter else local_ret.invoke(query, filter=_filter)

    formatted = [format_chunk(d) for d in hits]
    ctx_block = format_context_block(formatted)
    return {
        "query": query,
        "k": k,
        "filters": _filter,
        "context_block": ctx_block,
        "chunks": formatted
    }


### Math Tools

In [3]:
@tool
def add(a: float, b: float) -> float:
    """Add two numbers.
    
    Args:
        a: First float
        b: Second float
    """
    return a + b

@tool
def subtract(a: float, b: float) -> float:
    """Subtract first number by second number.

    Args:
        a: First float
        b: Second float
    """
    return a - b

@tool
def multiply(a: float, b: float) -> float:
    """Multiply two numbers.

    Args:
        a: First float
        b: Second float
    """
    return a * b

@tool
def divide(a : float, b: float) -> float:
    """Divide first number by second number.
    
    Args:
        a: First float
        b: Second float
    """
    if b == 0:
        return 0
    else:
        return a/b


### Defining knowledgeMeta() tool

In [4]:
from langchain.tools import tool
from pathlib import Path
import yaml
from typing import Optional

@tool
def knowledgeMeta(pack_dir: Optional[str] = None) -> dict:
    """
    Read a knowledge pack manifest and return metadata for trust and recency. 

    Args:
      pack_dir: Absolute or relative path to the pack folder (containing manifest.yaml).
                If omitted, uses the default ROOT pack path.

    Returns:
      {
        "name": str,
        "version": str,
        "date": str,
        "locales": [..],
        "topics_count": int,
        "manifest_path": str
      }
    """
    # default to your earlier ROOT if not provided
    base = Path(pack_dir) if pack_dir else ROOT
    manifest_path = base / "manifest.yaml"
    if not manifest_path.exists():
        return {"error": f"manifest.yaml not found at {manifest_path}"}

    with open(manifest_path, "r", encoding="utf-8") as f:
        m = yaml.safe_load(f)

    name = m.get("name", str(base.name))
    version = m.get("version", "unknown")
    date = m.get("date", "unknown")
    locales = m.get("locales", [])
    topics = m.get("index_of_topics", []) or []
    return {
        "name": name,
        "version": version,
        "date": date,
        "locales": locales,
        "topics_count": len(topics),
        "manifest_path": str(manifest_path)
    }


### getImage tool

In [5]:
from langchain_core.tools import tool
from IPython.display import Image, display
from pathlib import Path
from typing import Dict, Any

HIGH_SCORE_THRESHOLD = 0.55  # tune as needed

@tool
def getImage(query: str) -> Dict[str, Any]:
    """
    Retrieve and display exactly one image for the user's query.
    Applies a high-confidence threshold; returns NO_IMAGE if nothing clears the bar.
    """
    q = (query or "").strip()
    pack_name    = manifest.get("name", "")
    pack_ver     = manifest.get("version", "")
    pack_date    = manifest.get("date", "")
    pack_locales = manifest.get("locales", [])

    if not q:
        return {
            "status": "NO_IMAGE",
            "version": pack_ver,
            "date": pack_date,
            "locales": pack_locales,
            "pack_name": pack_name,
            "image_path": "",
            "score": None,
            "citations": []
        }

    # Use similarity_search_with_score to get confidence scores
    foundImage = False
    minScore = 0.32
    finalDoc  = Document(page_content="")
    results = image_vs.similarity_search_with_score(query, k=4)
    finScore = 0
    from IPython.display import Image, display

    for i, (d, score) in enumerate(results, 1):
        
        if score >= minScore:
            finScore = score
            finalDoc = d
            foundImage = True
            break
    

    if not foundImage:
        print("NOT FOUND")
        return {
            "status": "NO_IMAGE",
            "version": pack_ver,
            "date": pack_date,
            "locales": pack_locales,
            "pack_name": pack_name,
            "image_path": "",
            "score": None,
            "citations": []
        }
    else:
        print("Found FOUND")
        img_path = ROOT / finalDoc.metadata['path']
        # try:
        #     display(Image(filename=img_path))
        # except Exception:
        #     pass
            
        return {
            "status": "OK",
            "version": pack_ver,
            "date": pack_date,
            "locales": pack_locales,
            "pack_name": pack_name,
            "image_path": str(img_path),
            "score": float(finScore),
            "citations": finalDoc.metadata.get("citations", [])
        }
        


### List of Available Tools

In [6]:
# Keep a Tools list for whichever orchestration you choose:
TOOLS = [context,add,multiply,subtract,divide,knowledgeMeta, getImage]

### Setting Up LLM

In [7]:
from langchain.chat_models import init_chat_model

llm = init_chat_model(
    model="ollama:gpt-oss:20b",       
    temperature=0.2  # lower = more deterministic
)

llm_with_tools = llm.bind_tools(TOOLS) #llm_with_tools is a new wrapped llm
model = llm_with_tools

### Testing Tool Requests

In [8]:
# from langchain_core.messages import HumanMessage
# #message classes on lang chain inlcude human massage ,ai message, system message, and tool message
# query = "call get image to show torniquet on leg"

# messages = [HumanMessage(query)]

# ai_msg = llm_with_tools.invoke(messages) #llm_with_tools looks at history(currently only 1 human message) and then builds prompt
# print("AI MESSAEGE CALLS")
# print(ai_msg)
# print("JUST AI TOOL CALLS")
# print(ai_msg.tool_calls) 
# #Now we add the bots message to the chat history
# messages.append(ai_msg)

In [9]:
# for tool_call in ai_msg.tool_calls: #Actually running all the tool calls ai requested in last cell
#     tools_dict = {"context": context, "knowledgeMeta":knowledgeMeta,"add":add,
#             "subtract":subtract,"divide":divide,"multiply":multiply, "getImage":getImage}
#     selected_tool = tools_dict[tool_call["name"]]
#     tool_msg = selected_tool.invoke(tool_call)
#     messages.append(tool_msg) #Add the tool message to chat history

# messages


### Reason - Act Agent Sim

In [10]:
from typing import (
    Annotated, #Extra metadata
    Sequence, #generic list container
    TypedDict, #Lets you define a dictionary type with fixed keys and types
) #a dictionary type has a specific format of what key and valye types can be 
from langchain_core.messages import BaseMessage #lang chain message format (Human,Systen,AI)
from langgraph.graph.message import add_messages


class AgentState(TypedDict):
    """The state of the agent."""

    # add_messages is a reducer
    # See https://langchain-ai.github.io/langgraph/concepts/low_level/#reducers
    messages: Annotated[Sequence[BaseMessage], add_messages]
#define a state dictionary, 1 key names messages
#messages is a list of langchain base messages
#wrapping it in Annotated tells it to not replace entire list but use add_messages to merge it in
#The accepected value  for this dictionary will be Sequence[BaseMessage]

### Defining nodes and edges in "Lang graph"

Lang graph:
- Imagine you're in a big construction truck and you have a map of stops
- Each stop pick up something/do a 
- Folow the street signs to get to all the stops
- Take a photo to save state during important jobs

The big truck carries chat histroy. The road signs are edges between nodes where nodes are the stops. The photos are state changes you can reload to(checkpoint in video game)

In [11]:
import json
from langchain_core.messages import ToolMessage, SystemMessage
from langchain_core.runnables import RunnableConfig

tools_dict = {"context": context, "knowledgeMeta":knowledgeMeta,"add":add,
            "subtract":subtract,"divide":divide,"multiply":multiply, "getImage":getImage}

tools_by_name = tools_dict


# Define our tool node
def tool_node(state: AgentState): #Agent State is the conversation history
    outputs = []
    for tool_call in state["messages"][-1].tool_calls: #Look at the last message in history(should be AIMessage)
        tool_result = tools_by_name[tool_call["name"]].invoke(tool_call["args"]) #Actually call the tool
        outputs.append( #Add a json output to conco hsitory
            ToolMessage(
                content=json.dumps(tool_result),
                name=tool_call["name"],
                tool_call_id=tool_call["id"],
            )
        )
    return {"messages": outputs}


# Define the node that calls the model
def call_model(
    state: AgentState,
    config: RunnableConfig,
):
    temp_system_str = "For ANY health or safety query, you MUST call the `context` tool.  Do not answer from memory."
    # this is similar to customizing the create_react_agent with 'prompt' parameter, but is more flexible
    system_prompt = SystemMessage(
        "You are a helpful AI assistant, please respond to the users query to the best of your ability!" + temp_system_str
    )
    response = model.invoke([system_prompt] + state["messages"], config)
    # We return a list, because this will get added to the existing list
    return {"messages": [response]}


# Define the conditional edge that determines whether to continue or not
def should_continue(state: AgentState):
    messages = state["messages"]
    last_message = messages[-1]
    # If there is no function call, then we finish
    if not last_message.tool_calls:
        return "end"
    # Otherwise if there is, we continue
    else:
        return "continue"

### Defining new Graph

In [12]:
from langgraph.graph import StateGraph, END

# Define a new graph
workflow = StateGraph(AgentState)

# Define the two nodes we will cycle between
workflow.add_node("agent", call_model)
workflow.add_node("tools", tool_node)

# Set the entrypoint as `agent`
# This means that this node is the first one called
workflow.set_entry_point("agent")

# We now add a conditional edge
workflow.add_conditional_edges(
    # First, we define the start node. We use `agent`.
    # This means these are the edges taken after the `agent` node is called.
    "agent",
    # Next, we pass in the function that will determine which node is called next.
    should_continue,
    # Finally we pass in a mapping.
    # The keys are strings, and the values are other nodes.
    # END is a special node marking that the graph should finish.
    # What will happen is we will call `should_continue`, and then the output of that
    # will be matched against the keys in this mapping.
    # Based on which one it matches, that node will then be called.
    {
        # If `tools`, then we call the tool node.
        "continue": "tools",
        # Otherwise we finish.
        "end": END,
    },
)

# We now add a normal edge from `tools` to `agent`.
# This means that after `tools` is called, `agent` node is called next.
workflow.add_edge("tools", "agent")

# Now we can compile and visualize our graph
graph = workflow.compile()

from IPython.display import Image, display

try:
    display(Image(graph.get_graph().draw_mermaid_png()))
except Exception:
    # This requires some extra dependencies and is 
    print(Exception)
    pass
# display(Image(graph.get_graph().draw_mermaid_png()))

<class 'Exception'>


### Print output stream function

In [13]:
# Helper function for formatting the stream nicely
def print_stream(stream):
    for s in stream:
        message = s["messages"][-1]
        prev_msg = ""
        if len(s['messages']) > 1:
            prev_msg = s["messages"][-2]

        if isinstance(message, tuple):
            print(message)
        else:
            message.pretty_print()
        # if prev_msg != "":
        #     if isinstance(prev_msg, tuple):
        #         print(prev_msg)
        #     else:
        #         prev_msg.pretty_print()


# inputs = {"messages": [("user", "What to do if bleed? When Knowledge updated? What 42*321 and 32/31?")]}
# print_stream(graph.stream(inputs, stream_mode="values"))

### Infinte Loop Chat Bot

After eahc exit please restart kernel and clear all outputs to ensure a fresh conversation.

In [14]:
# === Simple REPL loop for Jupyter ===
def chat_loop():
    history = []
    print("Type a message. Commands: /reset, /exit\n")
    while True:
        user = input("You: ").strip()
        if not user:
            continue
        if user.lower() in {"/exit", "/quit"}:
            print("Bye!")
            break
        if user.lower() == "/reset":
            history = []
            print("(conversation cleared)")
            continue

        # Append user turn
        inputs = {"messages": history + [("user", user)]}

        # Use YOUR print_stream exactly as written
        print_stream(graph.stream(inputs, stream_mode="values"))

        # Capture updated history so context persists
        final_state = graph.invoke(inputs)
        history = final_state["messages"]

# Run this cell in your notebook to start chatting
chat_loop()


Type a message. Commands: /reset, /exit

Bye!


### Using gradio with langchain



This is a simple general-purpose chatbot built on top of LangChain and Gradio.


In [21]:
from langchain.schema import AIMessage, HumanMessage  
import gradio as gr

lightModel = init_chat_model(
    model="ollama:llama3.1",       
    temperature=0.2  # lower = more deterministic
)

def predict(message, history):
    history_langchain_format = []
    for msg in history:
        if msg['role'] == "user":
            history_langchain_format.append(HumanMessage(content=msg['content']))
        elif msg['role'] == "assistant":
            history_langchain_format.append(AIMessage(content=msg['content']))
    history_langchain_format.append(HumanMessage(content=message))
    llm_response = lightModel.invoke(history_langchain_format)
    return llm_response.content

demo = gr.ChatInterface(
    predict,
    type="messages"
)

demo.launch()


* Running on local URL:  http://127.0.0.1:7864
* To create a public link, set `share=True` in `launch()`.




### Gradio and agents

In [16]:
import gradio as gr

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(
        type="messages",
        value=[
            gr.ChatMessage(
                role="user", 
                content="What is the weather in San Francisco?"
            ),
            gr.ChatMessage(
                role="assistant", 
                content="I need to use the weather API tool?",
                metadata={"title":  "🧠 Thinking"}
            )
        ]
    )

demo.launch()

* Running on local URL:  http://127.0.0.1:7866
* To create a public link, set `share=True` in `launch()`.




### Working UI with tool calling and sources

Images is next 

In [22]:
from langchain import hub
from langchain.agents import AgentExecutor, create_tool_calling_agent
# from langchain_openai import ChatOpenAI
from gradio import ChatMessage
import gradio as gr
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder


lightModel = init_chat_model(
    model="ollama:llama3.1",       
    temperature=0.2, # lower = more deterministic
    streaming = True  
)

heavyModel = init_chat_model(
    model="ollama:gpt-oss:20b",       
    temperature=0.2  # lower = more deterministic
)

tools = TOOLS


from langchain.tools.render import render_text_description

# 1) Render tool descriptions and names
tool_desc = render_text_description(TOOLS)         # Render the tool name and description in plain text.
tool_names = ", ".join([t.name for t in TOOLS])    # exact callable names

# 2) Build the prompt and PARTIAL the missing variables
prompt = ChatPromptTemplate.from_messages([
    ("system",
     """You are a helpful assistant. 
You can use the following tools if they are useful:
{tools}

Always call tools by their exact name from: {tool_names}.
If no tool is needed, just answer directly.
"""),
    MessagesPlaceholder("chat_history", optional=True),
    ("human", "{input}"),
    MessagesPlaceholder("agent_scratchpad"),
]).partial(tools=tool_desc, tool_names=tool_names)

agent = create_tool_calling_agent(
    llm=heavyModel,
    tools=tools,
    prompt=prompt,
)

agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    verbose=True,        # helpful while wiring things up
).with_config({"run_name": "Agent"})


# OLD UI

In [18]:
def _format_sources_from_context_obs(obs: dict) -> str:
    # obs is `context` tool return;  it is a dict.
    chunks = obs.get("chunks", []) if isinstance(obs, dict) else []
    if not chunks:
        return ""
    lines = []
    for ch in chunks:
        file_id = ch.get("file_id", "unknown")
        path = ch.get("path", "")
        cites = ch.get("citations", [])
        cite_str = ", ".join(cites) if cites else "—"
        lines.append(f"- `{file_id}` · {path} — {cite_str}")
    return "**Sources**\n" + "\n".join(lines)


def _maybe_render_image_from_obs(obs) -> ChatMessage | None:
    """
    Accepts a tool observation dict and returns a ChatMessage with an image
    if it finds a usable image path/url. Otherwise returns None.
    """
    if not isinstance(obs, dict):
        return None

    # Common keys your chain/logs might produce
    path = (
        obs.get("image_path")
        or obs.get("path")
        or obs.get("file_path")
        or obs.get("local_path")
        or None
    )
    url = obs.get("image_url") or obs.get("url")

    # Prefer local path if available; Gradio will serve files inside the app folder.
    media = path or url
    if not media:
        return None

    # Option A: return a component (nice rendering)
    return ChatMessage(role="assistant", content=gr.Image(value=media))

    # Option B (alternative): return a file dict that Gradio renders as an image
    # return ChatMessage(role="assistant", content={"path": media})

# --- Async streaming handler for Gradio Chatbot(type="messages") ---
async def interact_with_langchain_agent(user_text, history):
    # history is a list[ChatMessage]
    history.append(ChatMessage(role="user", content=user_text))
    yield history

    # Stream agent steps & final output
    async for chunk in agent_executor.astream({"input": user_text}):
        if "steps" in chunk:
            for step in chunk["steps"]:
                history.append(ChatMessage(
                    role="assistant",
                    content=step.action.log,
                    metadata={"title": f"🛠️ Used tool {step.action.tool}"}
                ))


                # 2) If we have an observation/result for this step, show it.
                #    LangChain includes it on the same step object in many runners;
                #    otherwise it may appear on a subsequent step — this handles both.
                obs = getattr(step, "observation", None)
                if obs is not None:

                    # NEW: try rendering an image if present
                    img_msg = _maybe_render_image_from_obs(obs)
                    if img_msg is not None:
                        history.append(img_msg)
                        yield history
                    # If it's the `context` tool, render sources from its dict payload
                    if step.action.tool == "context":
                        sources_md = _format_sources_from_context_obs(obs)
                        if sources_md:
                            history.append(ChatMessage(role="assistant", content=sources_md))
                            yield history
                    else:
                        # For other tools you can also render their observations if you want
                        pass


                yield history
        if "output" in chunk:
            history.append(ChatMessage(role="assistant", content=chunk["output"]))
            yield history


# --- Gradio UI ---
with gr.Blocks() as demo:
    gr.Markdown("# Beacon - Offline Knolwedge Agent(Village First Aid Demo)")

    chatbot = gr.Chatbot(
        type="messages",
        label="Agent",
        avatar_images=(None, "https://image.pngaaa.com/10/5148010-middle.png"),
        height=400,
    )
    textbox = gr.Textbox(lines=1, label="Chat Message", placeholder="Ask something…")
    # Clear the textbox after submit so it feels chatty
    def _clear():
        return gr.update(value="")

    # Submit: wire the *actual* components you created
    textbox.submit(
        interact_with_langchain_agent,
        inputs=[textbox, chatbot],     # (prompt, messages)
        outputs=[chatbot]              # updates the Chatbot
    ).then(
        _clear, None, [textbox]
    )

demo.queue().launch()   # queue() is recommended for async callbacks


* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.




# Curr UI

In [23]:
def _format_sources_md(obs: dict, tool_name: str) -> str:
    """
    Returns a single Markdown block with sources/citations for either:
      - getImage-style obs: { citations: [{title,id,url,license}] }
      - context-style obs: { chunks: [{file_id,path,url,license,citations:[...]}] }
    """
    if not isinstance(obs, dict):
        return ""

    lines = []

    # A) Direct citations (e.g., from getImage)
    cites = obs.get("citations", [])
    if isinstance(cites, (list, tuple)):
        for c in cites:
            if not isinstance(c, dict):
                continue
            title = c.get("title") or c.get("id") or "Source"
            url = c.get("url")
            lic = c.get("license") or "—"
            if url:
                lines.append(f"- [{title}]({url}) · {lic}")
            else:
                lines.append(f"- {title} · {lic}")

    # B) Context chunks (if this step came from the 'context' tool)
    if tool_name == "context":
        chunks = obs.get("chunks", [])
        if isinstance(chunks, (list, tuple)):
            for ch in chunks:
                if not isinstance(ch, dict):
                    continue
                file_id = ch.get("file_id", "")
                path = ch.get("path", "")
                url = ch.get("url", "")
                lic = ch.get("license", "") or "—"
                title = file_id or path or "Source"
                meta_tail = f" · {path}" if path and title != path else ""
                if url:
                    lines.append(f"- [{title}]({url}) · {lic}{meta_tail}")
                else:
                    lines.append(f"- {title} · {lic}{meta_tail}")

                # chunk-level citations (optional)
                ch_cites = ch.get("citations", [])
                if isinstance(ch_cites, (list, tuple)):
                    for c in ch_cites:
                        if not isinstance(c, dict):
                            continue
                        t2 = c.get("title") or c.get("id") or "Source"
                        u2 = c.get("url", "")
                        l2 = c.get("license", "") or "—"
                        if u2:
                            lines.append(f"- [{t2}]({u2}) · {l2}")
                        else:
                            lines.append(f"- {t2} · {l2}")

    if not lines:
        return ""
    return "**Sources**\n" + "\n".join(lines)




# --- Async streaming handler for Gradio Chatbot(type="messages") ---
async def interact_with_langchain_agent(user_text, history):
    # history is a list[ChatMessage]
    history.append(ChatMessage(role="user", content=user_text))
    yield history

    # Stream agent steps & final output
    async for chunk in agent_executor.astream({"input": user_text}):
        if "steps" in chunk:
            for step in chunk["steps"]:
                history.append(ChatMessage(
                    role="assistant",
                    content=step.action.log,
                    metadata={"title": f"🛠️ Used tool {step.action.tool}"}
                ))


                obs = getattr(step, "observation", None)
                if obs is not None:
                    # Try rendering an image if present (unchanged)
                    img_msg = _maybe_render_image_from_obs(obs)
                    if img_msg is not None:
                        history.append(img_msg)
                        yield history

                    # NEW: one merged sources renderer for both getImage + context
                    sources_md = _format_sources_md(obs, step.action.tool)
                    if sources_md:
                        history.append(ChatMessage(role="assistant", content=sources_md))
                        yield history

                yield history
                #     else:
                #         # For other tools you can also render their observations if you want
                #         pass


                # yield history
        if "output" in chunk:
            history.append(ChatMessage(role="assistant", content=chunk["output"]))
            yield history


# --- Gradio UI ---
with gr.Blocks() as demo:
    gr.Markdown("# Beacon - Offline Knolwedge Agent(Village First Aid Demo)")

    chatbot = gr.Chatbot(
        type="messages",
        label="Agent",
        avatar_images=(None, "https://image.pngaaa.com/10/5148010-middle.png"),
        height=400,
    )
    textbox = gr.Textbox(lines=1, label="Chat Message", placeholder="Ask something…")
    # Clear the textbox after submit so it feels chatty
    def _clear():
        return gr.update(value="")

    # Submit: wire the *actual* components you created
    textbox.submit(
        interact_with_langchain_agent,
        inputs=[textbox, chatbot],     # (prompt, messages)
        outputs=[chatbot]              # updates the Chatbot
    ).then(
        _clear, None, [textbox]
    )

demo.queue().launch()   # queue() is recommended for async callbacks


* Running on local URL:  http://127.0.0.1:7865
* To create a public link, set `share=True` in `launch()`.






[1m> Entering new Agent chain...[0m
[32;1m[1;3mIf you’re experiencing heavy bleeding, it’s important to seek medical help right away. Call emergency services (for example, 911 in the U.S.) or go to the nearest emergency department. If you’re unable to call, ask someone nearby to help you get urgent care. Stay calm, keep the area clean, and avoid any activities that could worsen the bleeding until you can be evaluated by a healthcare professional.[0m

[1m> Finished chain.[0m


[1m> Entering new Agent chain...[0m
[32;1m[1;3m
Invoking: `context` with `{'k': 4, 'locale': 'en', 'query': 'bleeding first aid', 'topic_id': None}`


[0m[36;1m[1;3m{'query': 'bleeding first aid', 'k': 4, 'filters': {}, 'context_block': '### Retrieved Context (use only what is relevant)\n[1] bleed-control · guide-bleed-overview · hi_en · core/bleed-control/hi_en/bleeding_control_overview.md\n# Severe Bleeding Control\nSevere bleeding can quickly become life-threatening if not controlled.  \nApply f