In [1]:
import sys
from pathlib import Path
import os

sys.path.append(str(Path.cwd().parent.parent.resolve()))

from core.config import load_env_vars

load_env_vars()

## Making sample data

In [None]:
import csv

filename = "sales_sample.csv"

headers = [
    "order_id",
    "customer",
    "region",
    "product",
    "category",
    "order_date",
    "quantity",
    "unit_price"
]

rows = [
    [1, "Alice",   "North", "Laptop",    "Electronics", "2025-01-02", 1, 1200.00],
    [2, "Bob",     "West",  "Mouse",     "Electronics", "2025-01-05", 2,   25.00],
    [3, "Charlie", "East",  "Desk",      "Furniture",   "2025-01-06", 1,  300.00],
    [4, "Alice",   "North", "Monitor",   "Electronics", "2025-01-10", 2,  200.00],
    [5, "Diana",   "South", "Chair",     "Furniture",   "2025-02-01", 4,  150.00],
    [6, "Evan",    "West",  "Laptop",    "Electronics", "2025-02-11", 1, 1300.00],
    [7, "Frank",   "East",  "Mouse",     "Electronics", "2025-03-02", 5,   20.00],
    [8, "Grace",   "South", "Desk",      "Furniture",   "2025-03-15", 2,  280.00],
    [9, "Helen",   "North", "Chair",     "Furniture",   "2025-03-20", 1,  160.00],
    [10,"Ian",     "West",  "Monitor",   "Electronics", "2025-03-25", 3,  210.00],
]

with open(filename, mode="w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(headers)
    writer.writerows(rows)

print(f"Created {filename} with {len(rows)} rows.")


## Running coding agent

In [5]:
import io
import contextlib

import pandas as pd
from langchain_core.tools import tool
from core.model import make_text_generation_model_open_router
from langgraph.prebuilt import create_react_agent


# -------- Tool: run arbitrary Python for CSV analysis --------
@tool
def python_runner(code: str) -> str:
    """
    Execute Python code that analyzes 'sales_sample.csv' and prints an answer.

    The LLM should:
    - load 'sales_sample.csv' with pandas (pd.read_csv)
    - compute whatever is needed
    - print ONLY the final answer (no extra labels or explanations)

    This function captures stdout and returns it as a string.
    NOTE: This is unsafe in production (it runs arbitrary code) and is only
    meant for local testing of code-writing agents.
    """
    global_env = {
        "__name__": "__main__",
        "pd": pd,  # make pandas available as `pd`
    }

    print(f"\ngenerated code:\n{code}")
    
    buf = io.StringIO()
    try:
        with contextlib.redirect_stdout(buf):
            exec(code, global_env, {})
    except Exception as e:
        return f"ERROR: {e.__class__.__name__}: {e}"

    return buf.getvalue().strip()


# -------- System prompt for the agent --------
SYSTEM_PROMPT = """
You are a coding assistant that answers questions about a local CSV file
called 'sales_sample.csv'.

When needed, you MUST:
- write a short Python script that uses pandas (available as `pd`)
  to load 'sales_sample.csv' from the current directory
- compute the answer to the user's question
- call the `python_runner` tool, passing that code as a string

The Python code you send to `python_runner` must:
- be self-contained
- assume the file is named exactly 'sales_sample.csv'
- print ONLY the final answer (no extra text, labels, or explanation)

After the tool finishes, read its textual output and:
- explain in natural language what the answer is
- include the numeric result from the tool in your explanation.
""".strip()


# -------- Explicit ChatOpenAI model --------
llm = make_text_generation_model_open_router( # get vision language models here : https://openrouter.ai/models?fmt=cards&input_modalities=image&max_price=0
        # model_id='z-ai/glm-4.5-air:free'
        # model_id='google/gemini-2.0-flash-exp:free'
        model_id='nvidia/nemotron-nano-12b-v2-vl:free'
    )

# -------- Build the LangGraph ReAct-style agent --------
agent = create_react_agent(
    llm,                       # explicit ChatOpenAI instance
    tools=[python_runner],
    prompt=SYSTEM_PROMPT,
)


# -------- Simple CLI demo --------
if __name__ == "__main__":
    # Example natural language task to test your coding agent:
    user_query = (
        "Using sales_sample.csv, which region has the highest total sales "
        "(where sales = quantity * unit_price), and what is that total?"
    )

    inputs = {
        "messages": [
            {"role": "user", "content": user_query}
        ]
    }

    result_state = agent.invoke(inputs)
    messages = result_state["messages"]
    final_message = messages[-1]

    print("\n=== Agent answer ===")
    # Handle both dict and LangChain Message
    content = (
        final_message["content"]
        if isinstance(final_message, dict)
        else final_message.content
    )
    print(content)


C:\Users\skuma\AppData\Local\Temp\ipykernel_75368\1594787048.py:72: LangGraphDeprecatedSinceV10: create_react_agent has been moved to `langchain.agents`. Please update your import to `from langchain.agents import create_agent`. Deprecated in LangGraph V1.0 to be removed in V2.0.
  agent = create_react_agent(



generated code:
import pandas as pd

df = pd.read_csv('sales_sample.csv')

df['total_sales'] = df['quantity'] * df['unit_price']
region_sales = df.groupby('region')['total_sales'].sum().reset_index()
max_sales = region_sales.loc[region_sales['total_sales'].idxmax()]
print(f"{max_sales['region']} {max_sales['total_sales']}")

=== Agent answer ===
The region with the highest total sales is **West** with a total of **1980.0**.

