In [None]:
# --- 1. Load Environment ---
import os
from dotenv import load_dotenv

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

if not OPENAI_API_KEY:
    raise ValueError("❌ OPENAI_API_KEY is not loaded from .env")
print("✅ API key successfully loaded")

: 

In [None]:
import pandas as pd

DATA_PATH = "/Users/keshavsaraogi/Desktop/indorama/eureka-data/clean-csv/cleaned_sales_packaging.csv"
df = pd.read_csv(DATA_PATH, parse_dates=["Sales Invoice Date"])

# Enrichment
df['Year'] = df['Sales Invoice Date'].dt.year
df['Month'] = df['Sales Invoice Date'].dt.to_period("M")

: 

In [None]:
# --- 3. Compute Analytical Summaries ---
def summarize_data(df):
    recent_months = df.groupby('Month')['Invoice Net value'].sum().sort_index().tail(6).to_dict()
    top_customers = df.groupby('Customer')['Invoice Net value'].sum().sort_values(ascending=False).head(3).to_dict()
    correlation = df.corr(numeric_only=True).stack().sort_values(ascending=False)
    correlation = correlation[(correlation < 0.95) & (correlation > 0.6)].head(3).to_dict()
    
    return {
        "monthly_trend": recent_months,
        "top_customers": top_customers,
        "correlations": correlation
    }


: 

In [None]:
# --- 4. Define LangChain Tool ---
from langchain.chat_models import ChatOpenAI
from langchain.tools import Tool

data_summary = summarize_data(df)

def deep_data_insight(question: str) -> str:
    llm = ChatOpenAI(model="gpt-4", temperature=0.4)

    prompt = f"""
You are a senior financial analyst with access to sales data.

User question: "{question}"

Below is computed insight from the dataset:
- 🗓 Monthly Net Value (Last 6 Months): {data_summary['monthly_trend']}
- 👥 Top 3 Customers by Net Invoice Value: {data_summary['top_customers']}
- 📈 Strong Correlations Detected: {data_summary['correlations']}

Provide a deep, specific, and evidence-based answer. Avoid generic statements.
"""
    return llm.predict(prompt)

insight_tool = Tool(
    name="DeepSalesInsightTool",
    func=deep_data_insight,
    description="Uses data trends, top customers, and correlations to uncover hidden patterns in the dataset."
)


: 

In [None]:
# --- 5. Create Agent ---
from langchain.agents import initialize_agent

llm = ChatOpenAI(model="gpt-4", temperature=0)
agent = initialize_agent(
    tools=[insight_tool],
    llm=llm,
    agent="zero-shot-react-description",
    verbose=True
)


: 

In [None]:
# --- 6. Run the Agent with Real Data Insight ---
response = agent.run("What unexpected trend is occurring in the dataset for each querter?")
print(response)


: 

In [None]:
agent.run("Which company has been the most profitable in the last 6 months? What is the trend and explain the calculations?")

: 

In [None]:
agent.run("Which company has been able to minimise their costs effectively in the last 6 months? What is the trend and explain the calculations?")

: 

: 