In [1]:
import pandas as pd

df = pd.read_csv("Bakery sales.csv")
print(df.shape)
print(df.columns.tolist())

(234005, 7)
['Unnamed: 0', 'date', 'time', 'ticket_number', 'article', 'Quantity', 'unit_price']


In [None]:
from langchain_community.utilities import SQLDatabase
from sqlalchemy import create_engine

engine = create_engine("sqlite:///bakery.db")
df.to_sql("Bakery sales", engine, index=False)

In [3]:
db = SQLDatabase(engine=engine)
print(db.dialect)
print(db.get_usable_table_names())

sqlite
['Bakery sales']


In [93]:
import getpass
import os

if not os.environ.get("GROQ_API_KEY"):
  os.environ["GROQ_API_KEY"] = getpass.getpass("Enter API key for Groq: ")

from langchain.chat_models import init_chat_model

llm = init_chat_model("openai/gpt-oss-120b", model_provider="groq")

In [48]:
from langchain_community.agent_toolkits import create_sql_agent

agent_executor = create_sql_agent(llm, db=db, agent_type="openai-tools", verbose=True)

In [49]:
agent_executor.invoke({"input": "which article sold the most?"})



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_list_tables` with `{'tool_input': ''}`


[0m[38;5;200m[1;3mBakery sales[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `{'table_names': 'Bakery sales'}`


[0m[33;1m[1;3m
CREATE TABLE "Bakery sales" (
	"Unnamed: 0" BIGINT, 
	date TEXT, 
	time TEXT, 
	ticket_number FLOAT, 
	article TEXT, 
	"Quantity" FLOAT, 
	unit_price TEXT
)

/*
3 rows from Bakery sales table:
Unnamed: 0	date	time	ticket_number	article	Quantity	unit_price
0	2021-01-02	08:38	150040.0	BAGUETTE	1.0	0,90 €
1	2021-01-02	08:38	150040.0	PAIN AU CHOCOLAT	3.0	1,20 €
4	2021-01-02	09:14	150041.0	PAIN AU CHOCOLAT	2.0	1,20 €
*/[0m[32;1m[1;3m
Invoking: `sql_db_query_checker` with `{'query': 'SELECT article, SUM(Quantity) AS total_quantity FROM "Bakery sales" GROUP BY article ORDER BY total_quantity DESC LIMIT 1;'}`


[0m[36;1m[1;3mSELECT article, SUM(Quantity) AS total_quantity FROM "Bakery sales" GROUP BY article ORDER BY total_quantit

{'input': 'which article sold the most?',
 'output': 'The article with the highest total quantity sold is **“TRADITIONAL BAGUETTE”**, with a total of **117,463 units** sold.'}

In [101]:
import pandas as pd
from langchain_core.prompts import ChatPromptTemplate
from langchain_experimental.tools import PythonAstREPLTool

df = pd.read_csv("Bakery sales.csv")
tool = PythonAstREPLTool(locals={"df": df})
tool.invoke("df['Quantity'].mean()")

np.float64(1.538377385098609)

In [105]:
print(tool.args)

{'query': {'description': 'code snippet to run', 'title': 'Query', 'type': 'string'}}


In [106]:
schema = tool.get_input_schema()
print(schema.schema_json(indent=2))

{
  "description": "Python inputs.",
  "properties": {
    "query": {
      "description": "code snippet to run",
      "title": "Query",
      "type": "string"
    }
  },
  "required": [
    "query"
  ],
  "title": "PythonInputs",
  "type": "object"
}


C:\Users\jayan\AppData\Local\Temp\ipykernel_15760\15192852.py:2: PydanticDeprecatedSince20: The `schema_json` method is deprecated; use `model_json_schema` and json.dumps instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  print(schema.schema_json(indent=2))


In [97]:
from langchain_groq import ChatGroq
from langchain_core.prompts.chat import ChatPromptTemplate
from langgraph.prebuilt import create_react_agent

In [98]:
system_prompt = ChatPromptTemplate.from_messages([
    ("system", """You may ONLY respond with either (a) a direct final answer OR (b) a tool call. Nothing else.
     You have access to PythonAstREPLTool,use it to answer the user's questions."""),
    ("user", "{messages}")
])

In [99]:
pandas_agent = create_react_agent(model=llm,tools=[tool],prompt=system_prompt)

In [100]:
pandas_agent.invoke({"messages": "Use the tool if needed.I have a dataframe 'df'. what is the mean of 'unit_price' column? "})

BadRequestError: Error code: 400 - {'error': {'message': "Tool call validation failed: tool call validation failed: parameters for tool python_repl_ast did not match schema: errors: [missing properties: 'query']", 'type': 'invalid_request_error', 'code': 'tool_use_failed', 'failed_generation': '{"name": "python_repl_ast", "arguments": {\n  "code": "df.head()"\n}}'}}