In [None]:
# ✅ Environment setup
import os
import pandas as pd
import sqlite3
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_experimental.agents.agent_toolkits import create_csv_agent
from langchain.sql_database import SQLDatabase
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.agents import create_sql_agent
from sqlalchemy import create_engine

In [None]:
# 🔐 Load API keys
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [7]:
csv_path = '/Users/keshavsaraogi/Desktop/indorama/eureka-data/clean-csv/cleaned_inventory_packaging.csv'
df = pd.read_csv(csv_path)

  df = pd.read_csv(csv_path)


In [None]:
llm = ChatOpenAI(
    model="gpt-4o",
    temperature=0.5
)

In [None]:
csv_agent = create_csv_agent(
    llm,
    csv_path,
    verbose=True,
    allow_dangerous_code=True,
    max_iterations=20,
    max_execution_time=60,
    handle_parsing_errors=True
)

In [9]:
engine = create_engine("sqlite:///:memory:")
df.to_sql("finance_packaging_data", engine, index=False, if_exists="replace")
db = SQLDatabase(engine)

In [10]:
# 🧠 Create SQL Agent
sql_agent = create_sql_agent(
    llm,
    toolkit=SQLDatabaseToolkit(db=db, llm=llm),
    verbose=True,
    handle_parsing_errors=True,
    max_iterations=20,
    max_execution_time=60
)

In [11]:
# 🧭 Routing function
def route_query(query: str):
    query = query.lower()
    if any(keyword in query for keyword in ["sum", "avg", "group", "filter", "division", "unit price"]):
        return "sql"
    else:
        return "csv"

In [12]:
# 🤖 Master agent logic
def master_agent(query: str):
    source = route_query(query)
    if source == "sql":
        return sql_agent.run(query)
    else:
        return csv_agent.invoke(query)

In [15]:
master_agent("Which Company ID has the highest total stock? Give me top 3 companies with their answers.")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mTo determine the top 3 Company IDs with the highest total stock, we need to group the data by `Company ID` and sum the `Total Stock (KG)` for each company. Then, we will sort the results in descending order and select the top 3 entries.

Let's perform these operations using pandas.

Thought: I will group the dataframe by `Company ID`, sum the `Total Stock (KG)`, sort the results in descending order, and then select the top 3 companies.
Action: python_repl_ast
Action Input: 
```python
df.groupby('Company ID')['Total Stock (KG)'].sum().sort_values(ascending=False).head(3)
```[0m[36;1m[1;3mCompany ID
PH10    4.141457e+07
NG10    1.810819e+07
EG11    8.994206e+06
Name: Total Stock (KG), dtype: float64[0m[32;1m[1;3mI now know the final answer. 

Final Answer: The top 3 Company IDs with the highest total stock are:
1. PH10 with a total stock of 41,414,570 KG
2. NG10 with a total stock of 18,108,190 KG
3. EG11 with a total sto

{'input': 'Which Company ID has the highest total stock? Give me top 3 companies with their answers.',
 'output': 'The top 3 Company IDs with the highest total stock are:\n1. PH10 with a total stock of 41,414,570 KG\n2. NG10 with a total stock of 18,108,190 KG\n3. EG11 with a total stock of 8,994,206 KG'}