### Introduction


This notebook will create the context() tool for the llm agent. The context tool should return similar chunks based from querry using the faiss vector store avaible in the knowledge pack. It should also format the chunks and add them to chat history. 

In [None]:
# === 0) Imports & manifest ===
from pathlib import Path
import yaml, json
from typing import Optional, List, Dict
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.tools import tool
from langchain.schema import Document

# If you want the prebuilt ReAct agent:
from langgraph.checkpoint.memory import MemorySaver
from langgraph.prebuilt import create_react_agent

# If you prefer to also expose tools directly on the LLM:
from langchain.chat_models import init_chat_model

# --- Set your pack root + manifest ---
ROOT = Path("/Users/ktejwani/Personal CS Projects/Summer 2025/Offline AI Kiosk/Offline-AI-Kiosk/first_aid_pack_demo_v2")
MANIFEST = ROOT / "manifest.yaml"

with open(MANIFEST, "r", encoding="utf-8") as f:
    manifest = yaml.safe_load(f)

# --- Resolve FAISS paths from manifest ---
faiss_dir = ROOT / manifest["precomputed_indices"]["text"]["faiss"]["dir"]

# --- Create embeddings *matching the store* ---
embed_model_name = manifest["embedding_config"]["text"]["model"]     # e.g., "granite-embedding:30m"
emb = OllamaEmbeddings(model=embed_model_name)

# --- Load FAISS store + retriever ---
vs = FAISS.load_local(str(faiss_dir), emb, allow_dangerous_deserialization=True)
retriever = vs.as_retriever(search_kwargs={"k": 4})  # default k; tool will override if provided

# === 1) Helpers ===
def format_chunk(doc: Document, max_chars: int = 400) -> Dict:
    """Return a dict with compact text + key metadata for prompting & audit."""
    txt = doc.page_content.strip()
    if len(txt) > max_chars:
        txt = txt[:max_chars].rstrip() + " …"
    m = doc.metadata
    return {
        "id": m.get("chunk_id"),
        "topic_id": m.get("topic_id"),
        "file_id": m.get("file_id"),
        "locale": m.get("locale"),
        "path": m.get("path"),
        "citations": [c.get("title", "") for c in m.get("citations", [])],
        "text": txt
    }

def format_context_block(chunks: List[Dict]) -> str:
    """Human/LLM-friendly context block the agent can drop into its reasoning."""
    lines = []
    lines.append("### Retrieved Context (use only what is relevant)")
    for i, c in enumerate(chunks, 1):
        cite_str = "; ".join([t for t in c["citations"] if t]) or "—"
        head = f"[{i}] {c['topic_id']} · {c['file_id']} · {c['locale']} · {c['path']}"
        lines.append(head)
        lines.append(c["text"])
        lines.append(f"Source(s): {cite_str}")
        lines.append("")  # blank line
    return "\n".join(lines).strip()

# === 2) The @tool: context() ===
@tool
def context(
    query: str,
    k: int = 4,
    topic_id: Optional[str] = None,
    locale: Optional[str] = None
) -> dict:
    """
    Retrieve up to k relevant knowledge-pack chunks for 'query' and return a formatted
    context block + structured per-chunk data for citations. You must use this tool for any prompt that is 
    important to wellbeing or safety of user. If you call this tool and the chunks are relevent, 
    citations must go in the answer.

    Args:
        query: Natural language question or keywords.
        k: Top-k chunks to return (default 4).
        topic_id: Optional manifest topic filter (e.g., 'bleed-control').
        locale: Optional locale filter (e.g., 'hi_en' or 'en').

    Returns:
        {
          "query": str,
          "k": int,
          "filters": {"topic_id":..., "locale":...},
          "context_block": str,     # pasteable into prompts
          "chunks": [ {id, topic_id, file_id, path, locale, citations[], text}, ... ]
        }
    """
    # Build a metadata filter if provided
    _filter = {}
    # if topic_id:
    #     _filter["topic_id"] = topic_id
    # if locale:
    #     _filter["locale"] = locale

    # Run retrieval (override k)
    local_ret = vs.as_retriever(search_kwargs={"k": k})
    hits: List[Document] = local_ret.invoke(query) if not _filter else local_ret.invoke(query, filter=_filter)

    formatted = [format_chunk(d) for d in hits]
    ctx_block = format_context_block(formatted)
    return {
        "query": query,
        "k": k,
        "filters": _filter,
        "context_block": ctx_block,
        "chunks": formatted
    }

# Keep a Tools list for whichever orchestration you choose:
TOOLS = [context]


### Testing Tool Requests

In [11]:
from langchain.chat_models import init_chat_model

llm = init_chat_model(
    model="ollama:llama3.1",       
    temperature=0.2  # lower = more deterministic
)

llm_with_tools = llm.bind_tools(TOOLS) #llm_with_tools is a new wrapped llm
query = "What do if bleeding?"
llm_with_tools.invoke(query) 


AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'llama3.1', 'created_at': '2025-08-27T02:32:00.357152Z', 'done': True, 'done_reason': 'stop', 'total_duration': 2763482792, 'load_duration': 49036042, 'prompt_eval_count': 376, 'prompt_eval_duration': 1929492833, 'eval_count': 32, 'eval_duration': 783962042, 'model_name': 'llama3.1'}, id='run--9797052b-16f8-412a-93cd-0a67bef6843d-0', tool_calls=[{'name': 'context', 'args': {'k': 4, 'locale': 'en', 'query': 'What to do if bleeding'}, 'id': '61c9642b-7b2c-4913-b830-ce36572722bf', 'type': 'tool_call'}], usage_metadata={'input_tokens': 376, 'output_tokens': 32, 'total_tokens': 408})

### Full Agent Sim

In [12]:
from langchain_core.messages import HumanMessage
#message classes on lang chain inlcude human massage ,ai message, system message, and tool message
query = "What do if bleeding? What to do if snakebite?"

messages = [HumanMessage(query)]

ai_msg = llm_with_tools.invoke(messages) #llm_with_tools looks at history(currently only 1 human message) and then builds prompt
print("AI MESSAEGE CALLS")
print(ai_msg)
print("JUST AI TOOL CALLS")
print(ai_msg.tool_calls) 
#Now we add the bots message to the chat history
messages.append(ai_msg)

AI MESSAEGE CALLS
content='' additional_kwargs={} response_metadata={'model': 'llama3.1', 'created_at': '2025-08-27T02:32:20.127773Z', 'done': True, 'done_reason': 'stop', 'total_duration': 1158136666, 'load_duration': 48535041, 'prompt_eval_count': 383, 'prompt_eval_duration': 330862333, 'eval_count': 32, 'eval_duration': 777265875, 'model_name': 'llama3.1'} id='run--9e239c07-1c65-4209-8220-c82324adf4e4-0' tool_calls=[{'name': 'context', 'args': {'k': '4', 'locale': 'en', 'query': 'bleeding, snakebite'}, 'id': '97e4cf2c-0054-4925-b2f6-0af4eb412c0d', 'type': 'tool_call'}] usage_metadata={'input_tokens': 383, 'output_tokens': 32, 'total_tokens': 415}
JUST AI TOOL CALLS
[{'name': 'context', 'args': {'k': '4', 'locale': 'en', 'query': 'bleeding, snakebite'}, 'id': '97e4cf2c-0054-4925-b2f6-0af4eb412c0d', 'type': 'tool_call'}]


In [13]:
for tool_call in ai_msg.tool_calls: #Actually running all the tool calls ai requested in last cell
    tools_dict = {"context": context}
    selected_tool = tools_dict[tool_call["name"].lower()]
    tool_msg = selected_tool.invoke(tool_call)
    messages.append(tool_msg) #Add the tool message to chat history

messages




[HumanMessage(content='What do if bleeding? What to do if snakebite?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'llama3.1', 'created_at': '2025-08-27T02:32:20.127773Z', 'done': True, 'done_reason': 'stop', 'total_duration': 1158136666, 'load_duration': 48535041, 'prompt_eval_count': 383, 'prompt_eval_duration': 330862333, 'eval_count': 32, 'eval_duration': 777265875, 'model_name': 'llama3.1'}, id='run--9e239c07-1c65-4209-8220-c82324adf4e4-0', tool_calls=[{'name': 'context', 'args': {'k': '4', 'locale': 'en', 'query': 'bleeding, snakebite'}, 'id': '97e4cf2c-0054-4925-b2f6-0af4eb412c0d', 'type': 'tool_call'}], usage_metadata={'input_tokens': 383, 'output_tokens': 32, 'total_tokens': 415}),
 ToolMessage(content='{"query": "bleeding, snakebite", "k": 4, "filters": {}, "context_block": "### Retrieved Context (use only what is relevant)\\n[1] snakebite · guide-snakebite-donts · hi_en · core/snakebite/hi_en/snakebite

In [14]:
ans = llm_with_tools.invoke(messages)
ans

AIMessage(content='If you are bleeding:\n\n1. Apply firm direct pressure with a clean cloth or sterile gauze to the injured area.\n2. If bleeding soaks through, add more cloths without removing the first.\n3. Elevate the injured limb if possible while maintaining pressure.\n4. Use a tourniquet if direct pressure fails and bleeding is from an arm or leg.\n\nIf you have been bitten by a snake:\n\n1. Keep the patient calm and lying still to slow the spread of venom.\n2. Remove rings, bangles, or tight clothing from the bitten limb.\n3. Immobilize the limb with a splint and keep it at heart level.\n4. Do not cut, suck, burn, or apply ice to the bite.\n5. Do not apply tight tourniquets; they can cause more harm.\n6. Transport the patient quickly and safely to the nearest hospital with antivenom.\n\nRemember to always seek medical attention if you are unsure of how to treat a bleeding wound or snakebite.', additional_kwargs={}, response_metadata={'model': 'llama3.1', 'created_at': '2025-08-2

In [15]:
ans.content

'If you are bleeding:\n\n1. Apply firm direct pressure with a clean cloth or sterile gauze to the injured area.\n2. If bleeding soaks through, add more cloths without removing the first.\n3. Elevate the injured limb if possible while maintaining pressure.\n4. Use a tourniquet if direct pressure fails and bleeding is from an arm or leg.\n\nIf you have been bitten by a snake:\n\n1. Keep the patient calm and lying still to slow the spread of venom.\n2. Remove rings, bangles, or tight clothing from the bitten limb.\n3. Immobilize the limb with a splint and keep it at heart level.\n4. Do not cut, suck, burn, or apply ice to the bite.\n5. Do not apply tight tourniquets; they can cause more harm.\n6. Transport the patient quickly and safely to the nearest hospital with antivenom.\n\nRemember to always seek medical attention if you are unsure of how to treat a bleeding wound or snakebite.'

### Test

In [3]:
# === 3A) Agent via LangGraph ===
memory = MemorySaver()

# Pick a local or remote model; examples:
# model_name = "ollama/llama3.1"
# model_name = "ollama/gpt-oss-20b"
# or an OpenAI model if you have keys: "gpt-4o"
model_name = "ollama:llama3.1"
llm = init_chat_model(model_name, temperature=0.2)

# Create agent that knows about the context() tool
graph = create_react_agent(llm, tools=TOOLS, checkpointer=memory)

# Single-turn example (thread_id ties memory across turns)
q = "What to do for bleeding?"
result = graph.invoke(
    {"messages": [("user", q)]},
    config={"configurable": {"thread_id": "kp-demo-thread"}}
)
print(result["messages"][-1].content)


Based on the context and parameters provided, it seems like you're asking about how to stop bleeding. Here are some general steps that may help:

1.  **Apply pressure**: Use a clean cloth or bandage to apply firm pressure directly over the wound.
2.  **Elevate the injured area**: If possible, elevate the injured area above heart level to reduce blood flow to the area.
3.  **Use a tourniquet (if necessary)**: In severe cases of bleeding, a tourniquet may be used to stop the bleeding. However, this should only be done by trained medical professionals.
4.  **Apply a cold compress**: Applying a cold compress or an ice pack wrapped in a cloth can help constrict blood vessels and reduce bleeding.

Please note that these are general steps and not specific medical advice. If you're experiencing severe bleeding or if the bleeding doesn't stop after applying pressure, it's essential to seek immediate medical attention.

If you have any further questions or need more specific guidance, please let

In [4]:
msg = llm_with_tools.invoke("tourniquet steps")

print(msg.tool_calls)
# might be: [{'name': 'context', 'args': {'query': 'tourniquet steps', 'k': 4}, ...}]


NameError: name 'llm_with_tools' is not defined