In [None]:
# Cell 1 - init Groq client
from groq import Groq
import os, json, re
from dotenv import load_dotenv
load_dotenv()



# set your key here or make sure it's in your environment
os.environ["GROQ_API_KEY"] = "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"

client = Groq()


In [24]:
os.environ["GROQ_API_KEY"]


'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'

In [14]:
# Cell 2 - safe calculator using ast (no raw eval)
import ast
import operator as op

# allowed operators mapping
_ALLOWED_OPS = {
    ast.Add: op.add,
    ast.Sub: op.sub,
    ast.Mult: op.mul,
    ast.Div: op.truediv,
    ast.Pow: op.pow,
    ast.USub: op.neg,
    ast.UAdd: op.pos,
    ast.Mod: op.mod,
    ast.FloorDiv: op.floordiv,
}

def _eval_ast(node):
    if isinstance(node, ast.Num):           # <number>
        return node.n
    if isinstance(node, ast.Constant):      # Python 3.8+
        return node.value
    if isinstance(node, ast.BinOp):         # <left> <op> <right>
        left = _eval_ast(node.left)
        right = _eval_ast(node.right)
        op_type = type(node.op)
        if op_type in _ALLOWED_OPS:
            return _ALLOWED_OPS[op_type](left, right)
    if isinstance(node, ast.UnaryOp):       # - <operand> or + <operand>
        operand = _eval_ast(node.operand)
        op_type = type(node.op)
        if op_type in _ALLOWED_OPS:
            return _ALLOWED_OPS[op_type](operand)
    raise ValueError("Unsupported expression")

def safe_calculate(expr: str):
    """
    Safely evaluate a math expression containing + - * / ** % // and parentheses.
    Raises ValueError for invalid constructs.
    """
    try:
        parsed = ast.parse(expr, mode='eval')
        return _eval_ast(parsed.body)
    except Exception as e:
        raise ValueError(f"Invalid math expression: {e}")


In [15]:
# Cell 3 - search tool using retriever if available, else a simple keyword fallback
import numpy as np

# If you have a retriever from Chroma earlier, it will be used.
# If not, define a small docs list as fallback (replace with your data).
try:
    retriever  # noqa: F821
    _HAS_RETRIEVER = True
except NameError:
    _HAS_RETRIEVER = False
    docs = [
        "Retrieval Augmented Generation (RAG) combines external knowledge with LLM generation.",
        "Vector databases store embeddings that represent meaning instead of exact words.",
        "Chunking splits large documents into smaller pieces to improve retrieval accuracy.",
        "Embeddings are numerical representations of text that capture semantic meaning."
    ]

def search_tool(query: str, top_k: int = 3):
    """
    Return a short summary of top_k results.
    """
    if _HAS_RETRIEVER:
        results = retriever.get_relevant_documents(query)  # uses your Chroma retriever
        snippets = [r.page_content for r in results[:top_k]]
    else:
        # simple keyword score fallback
        query_lower = query.lower().split()
        def score(text):
            t = text.lower()
            return sum(t.count(w) for w in query_lower)
        ranked = sorted(docs, key=score, reverse=True)
        snippets = ranked[:top_k]
    # combine snippets into a useful context string
    return "\n---\n".join(snippets)


In [17]:
# Cell 4 - function to ask LLM which action to take
def ask_llm_for_action(question: str, tool_results: str = ""):
    """
    Ask Groq to pick either:
      - {"action": "calculate", "input": "<expression>"}
      - {"action": "search", "input": "<query>"}
      - {"action": "final_answer", "output": "<final text answer>"}
    The model must return JSON only. We provide available tools and a short example.
    tool_results can include previous tool outputs to give context.
    """
    system_prompt = (
        "You are an assistant that ONLY returns JSON indicating one of three choices:\n"
        "1) call the calculator: {\"action\": \"calculate\", \"input\": \"2+2\"}\n"
        "2) call the search tool: {\"action\": \"search\", \"input\": \"query text\"}\n"
        "3) return the final answer: {\"action\": \"final_answer\", \"output\": \"Final text\"}\n\n"
        "Rules:\n"
        "- Respond ONLY with valid JSON (no surrounding text).\n"
        "- Use calculator for arithmetic expressions (like 12*(5+3)).\n"
        "- Use search when you need factual context from the document store.\n"
        "- If you already have enough info, return final_answer.\n"
        "- current tool outputs (if any) are provided for reference.\n"
    )
    if tool_results:
        user_msg = f"Question: {question}\n\nTool results:\n{tool_results}\n\nReturn the JSON now."
    else:
        user_msg = f"Question: {question}\n\nReturn the JSON now."

    response = client.chat.completions.create(
        model="llama-3.1-8b-instant",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_msg}
        ],
        # you can tune temperature/response length if needed:
        max_tokens=300,
        temperature=0.0
    )
    text = response.choices[0].message.content.strip()
    # try to extract JSON substring (in case model wraps text)
    try:
        return json.loads(text)
    except Exception:
        # attempt to pull JSON-like substring
        m = re.search(r"(\{.*\})", text, re.S)
        if m:
            try:
                return json.loads(m.group(1))
            except Exception as e:
                raise ValueError(f"Failed to parse JSON from model output: {e}\nRaw: {text}")
        raise ValueError(f"Model did not return valid JSON. Raw output:\n{text}")


In [18]:
# Cell 5 - core loop that runs the agent until final_answer or max steps
def run_agent(question: str, max_steps: int = 5):
    tool_results = ""   # accumulate outputs from tools to give back to the LLM
    for step in range(max_steps):
        print(f"\n== Step {step+1} asking LLM ==")
        decision = ask_llm_for_action(question, tool_results)
        print("LLM decision:", decision)

        action = decision.get("action")
        if action == "calculate":
            expr = decision.get("input", "")
            try:
                calc_res = safe_calculate(expr)
                tool_out = f"Calculator result for `{expr}`: {calc_res}"
            except Exception as e:
                tool_out = f"Calculator error: {e}"
            print("Tool output:", tool_out)
            # append to tool_results and continue
            tool_results += f"\n[Calculator output]: {tool_out}"
            continue

        elif action == "search":
            q = decision.get("input", "")
            if not q:
                tool_out = "Search tool received empty query."
            else:
                tool_out = search_tool(q)
            print("Tool output (search snippets):\n", tool_out)
            tool_results += f"\n[Search output]: {tool_out}"
            continue

        elif action == "final_answer":
            out = decision.get("output", "")
            # if output empty, treat as error
            if not out:
                raise ValueError("final_answer action returned empty output.")
            return out

        else:
            raise ValueError(f"Unknown action returned by LLM: {action}")

    raise RuntimeError("Max steps reached without final answer.")


In [25]:
# Cell 6 - examples

print("Example 1: calculator")
res1 = run_agent("Compute 12 * (5 + 3) and give the numeric answer.")
print("\nFINAL ANSWER:\n", res1)

print("\n\nExample 2: search + summarize")
res2 = run_agent("Find information about chunking in the docs and summarize the key point in one sentence.")
print("\nFINAL ANSWER:\n", res2)


Example 1: calculator

== Step 1 asking LLM ==
LLM decision: {'action': 'calculate', 'input': '12 * (5 + 3)'}
Tool output: Calculator result for `12 * (5 + 3)`: 96

== Step 2 asking LLM ==
LLM decision: {'action': 'final_answer', 'output': '96'}

FINAL ANSWER:
 96


Example 2: search + summarize

== Step 1 asking LLM ==
LLM decision: {'action': 'search', 'input': 'chunking'}
Tool output (search snippets):
 Chunking splits large documents into smaller pieces to improve retrieval accuracy.
---
Retrieval Augmented Generation (RAG) combines external knowledge with LLM generation.
---
Vector databases store embeddings that represent meaning instead of exact words.

== Step 2 asking LLM ==
LLM decision: {'action': 'final_answer', 'output': 'Chunking splits large documents into smaller pieces to improve retrieval accuracy.'}

FINAL ANSWER:
 Chunking splits large documents into smaller pieces to improve retrieval accuracy.
