In [10]:
import os
os.chdir("..")
from langchain import hub
from langchain.agents import create_tool_calling_agent, AgentExecutor
from langchain_openai import ChatOpenAI
from openai import OpenAI
from dotenv import load_dotenv

from src.crud import CRUD
from src.eda import EDAFeatures
from utils import stderr_print

In [11]:

class ChatBot:
    def __init__(self, past_know: str, pred_know: str):
        load_dotenv()
        self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
        self.past_know: str = past_know
        self.pred_know: str = pred_know

    def set_past_know(self, past_know: str):
        self.past_know = past_know

    def set_pred_know(self, pred_know: str):
        self.pred_know = pred_know

    def get_gpt35_response(self, question: str, temperature: float = 0.7, max_tokens: int = 1024, model: str = "gpt-3.5-turbo") -> str:
        past_know = self.past_know
        pred_know = self.pred_know
        client = self.client
        stderr_print(f"Past know: {past_know}\nPred know: {pred_know}\nQuestion: {question}")
        prompt = f"""
You are 'RetailMind,' an expert AI business consultant. Your goal is to provide concise, data-driven analysis and strategic advice to retail decision-makers.

You have been provided with two key data summaries:
- **Historical Sales Data:**
{past_know}
- **Predicted Sales Forecast:**
{pred_know}

Based ONLY on the information in these summaries, you must synthesize the data, identify key business insights, and answer the user's question. If the data is insufficient to answer, state that clearly.

Structure your response using the following professional format:

Executive Summary
[Directly and concisely answer the user's question here.]

---

Key Analytical Insights
- **Performance Trend:** Compare a key metric (e.g., average sales, peak/low points) from the historical data against the forecast. Is performance expected to improve, decline, or stay consistent?
- **Holiday & Anomaly Impact:** State the observed or predicted impact of holidays on sales. Mention any standout peaks or dips.
- **Correlation Insights:** Highlight the most significant relationship noted in the data (e.g., with unemployment) and identify any stores that are strong outliers.

---

Strategic Recommendations
- Based on your analysis, provide one or two concrete, actionable recommendations. This could be a potential business opportunity to explore or a risk to investigate further.

Maintain a professional and direct tone. Use the specific figures from the provided summaries to support your analysis.

**User Question:** {question}
"""

        response = client.chat.completions.create(model=model,
                                                  messages=[
                                                      {"role": "user", "content": prompt}
                                                  ],
                                                  temperature=temperature,
                                                  max_tokens=max_tokens)
        return response.choices[0].message.content.strip()


In [12]:

class RetailAgent:
    """
    AI agent that uses CRUD and EDA to answer questions about the retail dataset.
    """
    def __init__(self, crud_obj: CRUD, eda_obj: EDAFeatures):
        """
        :param crud_obj: instantiated CRUD object
        :param eda_obj: instantiated EDAFeatures object
        """
        load_dotenv()
        self.llm = ChatOpenAI(
            model="gpt-4o",
            temperature=0.7,
            api_key=os.getenv("OPENAI_API_KEY")
        )

        self.tools = [
            crud_obj.sales_by_year_tool,
            crud_obj.sales_by_month_tool,
            crud_obj.sales_by_date_tool,
            eda_obj.sales_t_tool,
            eda_obj.holiday_impact_tool,
            eda_obj.top_performing_stores_tool,
            eda_obj.department_analysis_holiday_tool,
            eda_obj.department_analysis_no_holiday_tool,
            eda_obj.analyze_economic_headwinds_tool,
        ]

        prompt = hub.pull("hwchase17/openai-tools-agent")
        agent = create_tool_calling_agent(self.llm, self.tools, prompt)
        self.executor = AgentExecutor(agent=agent, tools=self.tools, verbose=True)

    def ask(self, question: str) -> str:
        """
        Asks the agent a question and returns an answer.
        :param question (str): The user's question in natural language.
        :return: answer
        """
        response = self.executor.invoke({"input": question})
        return response['output']

# Basic Testing


In [13]:
print(os.getcwd())
crud_manager = CRUD()
eda_analyzer = EDAFeatures(gen_df=crud_manager.gen_df, spec_df=crud_manager.spec_df)

/Users/adityagandhi/Development/RetailMind


/Users/adityagandhi/Development/RetailMind
adding sales data
adding external factors
        Store  Dept        Date  Weekly_Sales  IsHoliday
0           1     1  2010-02-05      24924.50      False
1           1     1  2010-02-12      46039.49       True
2           1     1  2010-02-19      41595.55      False
3           1     1  2010-02-26      19403.54      False
4           1     1  2010-03-05      21827.90      False
...       ...   ...         ...           ...        ...
421565     45    98  2012-09-28        508.37      False
421566     45    98  2012-10-05        628.10      False
421567     45    98  2012-10-12       1061.02      False
421568     45    98  2012-10-19        760.01      False
421569     45    98  2012-10-26       1076.80      False

[421570 rows x 5 columns]


In [14]:
retail_chatbot = RetailAgent(crud_obj=crud_manager, eda_obj=eda_analyzer)



In [15]:

question1 = "What were the total sales for store 2 in 2011?"
answer1 = retail_chatbot.ask(question1)
print(f"\nAnswer: {answer1}")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `get_sales_for_years` with `{'storeIDs': [2], 'years': [2011]}`


[0m[36;1m[1;3m98607881[0m[32;1m[1;3mThe total sales for store 2 in 2011 were $98,607,881.[0m

[1m> Finished chain.[0m

Answer: The total sales for store 2 in 2011 were $98,607,881.


In [16]:
question2 = "Show me an analysis of the top 3 performing stores."
answer2 = retail_chatbot.ask(question2)
print(f"\nAnswer: {answer2}")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `top_performing_stores_analyzer` with `{'top_n': 3}`


[0m[38;5;200m[1;3mStore 20.0 was the top performer with total sales of $301,397,792. In contrast, Store 33.0 had the lowest sales with a total of $37,160,222.
[0m

generating top performers bar graph


[32;1m[1;3mThe analysis of the top 3 performing stores reveals the following:

- **Store 20**: This store was the top performer with total sales amounting to $301,397,792.
- **Store 33**: In contrast, this store had the lowest sales, with a total of $37,160,222.

This analysis highlights the significant difference in sales performance between the top and bottom stores.[0m

[1m> Finished chain.[0m

Answer: The analysis of the top 3 performing stores reveals the following:

- **Store 20**: This store was the top performer with total sales amounting to $301,397,792.
- **Store 33**: In contrast, this store had the lowest sales, with a total of $37,160,222.

This analysis highlights the significant difference in sales performance between the top and bottom stores.


In [18]:
question3 = "Tell me everything you can about this data, including stores, holiday performance, top performers, retail headwinds, etc."
answer3 = retail_chatbot.ask(question3)
print(f"\nAnswer: {answer3}")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `sales_over_time_chart` with `{'tool_input': ''}`


[0m
[36;1m[1;3mOverall sales peaked at $80,931,416 on the week of 2010-12-24 00:00:00. The lowest point was $39,599,853 on 2011-01-28 00:00:00. The average weekly sales across the entire period was $47,113,419.
[0m[32;1m[1;3m
Invoking: `holiday_impact_analyzer` with `{'tool_input': ''}`


[0m[33;1m[1;3mOn average, sales during holiday weeks ($17,036) were 7.1% higher than sales during non-holiday weeks ($15,901).
[0m[32;1m[1;3m
Invoking: `top_performing_stores_analyzer` with `{'tool_input': ''}`


[0m[38;5;200m[1;3mStore 20.0 was the top performer with total sales of $301,397,792. In contrast, Store 33.0 had the lowest sales with a total of $37,160,222.
[0m[32;1m[1;3m
Invoking: `economic_headwinds_analyzer` with `{'tool_input': ''}`


[0m   Store       Date  Weekly_Sales  Temperature  Fuel_Price         CPI  \
0      1 2010-02-05      24924.50  

generating sales graph
        Store  Dept       Date  Weekly_Sales  IsHoliday
0           1     1 2010-02-05      24924.50      False
1           1     1 2010-02-12      46039.49       True
2           1     1 2010-02-19      41595.55      False
3           1     1 2010-02-26      19403.54      False
4           1     1 2010-03-05      21827.90      False
...       ...   ...        ...           ...        ...
421565     45    98 2012-09-28        508.37      False
421566     45    98 2012-10-05        628.10      False
421567     45    98 2012-10-12       1061.02      False
421568     45    98 2012-10-19        760.01      False
421569     45    98 2012-10-26       1076.80      False

[421570 rows x 5 columns]
generating holiday impact box plot
generating top performers bar graph


KeyboardInterrupt: 