In [1]:
import json
import os
import sqlite3
from dataclasses import asdict, dataclass
from datetime import datetime, timezone
from pathlib import Path
from textwrap import dedent
from typing import Any, Dict, List, Tuple, Union

import pandas as pd
from crewai import Agent, Crew, Process, Task
from crewai_tools import tool
from langchain.schema import AgentFinish
from langchain.schema.output import LLMResult
from langchain_community.tools.sql_database.tool import (
    InfoSQLDatabaseTool,
    ListSQLDatabaseTool,
    QuerySQLCheckerTool,
    QuerySQLDataBaseTool,
)
import pickle
from langchain_community.utilities.sql_database import SQLDatabase
from langchain_core.callbacks.base import BaseCallbackHandler
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

In [2]:
from crewai import LLM

In [None]:
user_api_key = ''
os.environ["OPENAI_API_KEY"] = user_api_key

In [4]:
db_host = "localhost"
db_name = "marico1"
db_user = "root"  # Replace with your actual username
db_password = "cronlabs"  # Replace with your actual password
db_port = "3306"

# MySQL connection URI
db_url = f'mysql+pymysql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}'
db = SQLDatabase.from_uri(db_url)

In [5]:
@dataclass
class Event:
    event: str
    timestamp: str
    text: str


def _current_time() -> str:
    return datetime.now(timezone.utc).isoformat()


class LLMCallbackHandler(BaseCallbackHandler):
    def __init__(self, log_path: Path):
        self.log_path = log_path

    def on_llm_start(
        self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
    ) -> Any:
        """Run when LLM starts running."""
        assert len(prompts) == 1
        event = Event(event="llm_start", timestamp=_current_time(), text=prompts[0])
        with self.log_path.open("a", encoding="utf-8") as file:
            file.write(json.dumps(asdict(event)) + "\n")

    def on_llm_end(self, response: LLMResult, **kwargs: Any) -> Any:
        """Run when LLM ends running."""
        generation = response.generations[-1][-1].message.content
        event = Event(event="llm_end", timestamp=_current_time(), text=generation)
        with self.log_path.open("a", encoding="utf-8") as file:
            file.write(json.dumps(asdict(event)) + "\n")
     

In [None]:
# llm = ChatOpenAI(model="gpt-4o", temperature=0.0)
# llm = 'ollama/qwen2.5-coder:14b'
# llm = 'ollama/qwen2.5-coder:14b'
llm = LLM(
    model="gemini/gemini-exp-1121",
    temperature=0.0,
    verbose=True,
    max_tokens=None,
    timeout=None,
    api_key='',
)

In [7]:
@tool("list_tables")
def list_tables() -> str:
    """List the available tables in the database"""
    return ListSQLDatabaseTool(db=db).invoke("")

In [8]:
list_tables.run()

Using Tool: list_tables


'poc_2_new'

In [9]:
@tool("tables_schema")
def tables_schema(tables: str) -> str:
    """
    Input is a comma-separated list of tables, output is the schema and sample rows
    for those tables. Be sure that the tables actually exist by calling `list_tables` first!
    Example Input: table1, table2, table3, table4
    """
    tool = InfoSQLDatabaseTool(db=db)
    return tool.invoke(tables)

In [10]:
@tool("execute_sql")
def execute_sql(sql_query: str) -> str:
    """Execute a SQL query against the database. Returns the result"""
    return QuerySQLDataBaseTool(db=db).invoke(sql_query)

In [11]:
@tool("check_sql")
def check_sql(sql_query: str) -> str:
    """
    Use this tool to double check if your query is correct before executing it. Always use this
    tool before executing a query with `execute_sql`.
    """
    return QuerySQLCheckerTool(db=db, llm=llm).invoke({"query": sql_query})

In [12]:
# sql_dev = Agent(
#     role="Precision SQL Analyst",
#     goal="Construct and execute highly accurate SQL queries for comprehensive analysis of product ordering, billing, and regional distribution data.",
#     backstory="""
#     You are an expert SQL Analyst specializing in order, billing, and distribution data across multiple regions, brands, and product lines. Your expertise is in constructing flexible and precise SQL queries that handle variations in product naming conventions, discrepancies in order fulfillment, and dynamic regional and retailer-level filtering.

#     Core Competencies:
#     1. Advanced Retailer Analysis: Utilize the RTRCODE and RTRNAME columns to track orders and billing data at a retailer level, enhancing insights into distribution and performance by individual retailers.
#     2. Expert Use of LIKE Operator: Skilled in using the LIKE operator with wildcards for SKU_NAME to accurately capture variations in SKU details and flexible model naming across packaging or regional conventions.
#     3. Dynamic Query Construction: Craft flexible SQL queries tailored to user needs, utilizing the new columns like RTRCODE and RTRNAME for granular tracking.
#     4. Enhanced Data Validation: Implement checks to manage any inconsistencies or missing data in SKU_NAME, RTRNAME, and ORDER_STATUS, ensuring complete and accurate reporting.
#     5. Query Optimization: Balance query complexity with LIKE flexibility for efficient SKU matching, optimizing performance across potentially large datasets.

#     Approach to Queries:
#     1. Requirement Analysis: Interpret requests with a focus on retailer (RTRCODE, RTRNAME), SKU, and regional details.
#     2. Schema Exploration: Use list_tables and tables_schema to identify columns such as SKU_CODE, SKU_NAME, RTRCODE, RTRNAME, and ORDER_STATUS.
#     3. Query Design: Construct SQL queries with LIKE statements for SKU variations, now with the ability to add retailer-level filters using RTRCODE and RTRNAME.
#     4. Validation: Confirm query accuracy with check_sql, ensuring all specified columns, including RTRCODE and RTRNAME, are accurately represented.
#     5. Execution and Review: Execute queries with execute_sql and validate results, particularly for completeness in capturing SKU variations, order statuses, and accurate regional or retailer-based data.

#     Important Columns in the Dataset:
#     - month_year: Combined month and year for temporal analysis.
#     - month and Year: Separate month and year fields for granular time-based analysis.
#     - DISTCODE: Distributor code for tracking orders by distributor.
#     - RTRCODE and RTRNAME: Retailer identifiers and names for retailer-level analysis.
#     - SKU_CODE and SKU_NAME: Product identifiers, with SKU_NAME needing LIKE patterns for flexible matching.
#     - BRAND_NAME: Brand associated with each SKU.
#     - ORDER_STATUS: Indicates order fulfillment status (e.g., "Fully Serviced").
#     - ORDER_QTY, BILLED_QTY, BALANCE_QTY: Metrics on ordered, billed, and remaining quantities.
#     - ORDER_VALUE, BILLED_VALUE, LOSS_VALUE: Financial metrics, with LOSS_VALUE indicating potential unbilled amounts.
#     - CUSTOMER_CODE: Identifier for customers linked to orders.
#     - REGION_NAME and STATE_NAME: Regional and state-level data for geolocation-based analysis.
#     - Area_Type: Classification of the customer’s location as either "Urban" or "Rural," providing insight into urban-rural trends in demand and order fulfillment patterns
#     - channel: Classifying transactions as either GT (General Trade) or MT (Modern Trade), which is essential for trade-type analysis and segmentation.
    
#     Key Reminders:
#     1. Flexible SKU and Retailer Matching: Use LIKE with wildcards for flexible SKU and retailer name filtering.
#     2. Order Status Variants: Include ORDER_STATUS for analyzing fulfillment status, such as "Fully Serviced."
#     3. Price and Quantity Metrics: Display financial data in Lakhs and Crores and quantities in thousands (K).
#     4. Retailer and Regional Filters: Use RTRCODE, RTRNAME, REGION_NAME, and STATE_NAME for targeted retailer and geographical analysis.
#     5. Data Validation: Include checks for missing or null data, returning a clear message if relevant records are absent (e.g., "No data found for specified SKU").

#     SKU_NAME Prefix Indicators:
#     - 'P' at the start of SKU_NAME indicates a Parachute product.
#     - 'S' at the start of SKU_NAME indicates a Saffola product.
#     - 'NHR' or 'NSA' at the start of SKU_NAME indicates a Nihar product.
#     - 'Livon' at the start of SKU_NAME indicates a Livon product.
#     - 'SW' at the start of SKU_NAME indicates a Set Wet product.
#     - 'SF' at the start of SKU_NAME indicates a Saffola product, commonly used for Saffola foods like soya chunks and oats.
#     - 'PCNO' at the start of SKU_NAME indicates a Parachute Coconut Oil product.
#     - 'REV' at the start of SKU_NAME indicates a Revive product.
#     - 'H&C' at the start of SKU_NAME indicates a Hair & Care product.
#     - 'MALT' at the start of SKU_NAME suggests a Maltova product.

#     Scope and Data Presentation:
#     - Response Handling: Answer queries specifically related to order, billing, SKU, and retailer data.
#     - Currency and Quantity Format: Display financials in Lakhs and Crores, with quantities in thousands (K).

#     Database Structure:
#     This dataset contains a single primary table with order, billing, SKU, and retailer details, ideal for granular SKU-level, retailer-level, and regional analysis.

#     Query Types:
#     1. Order and Billing Data: Focus on ORDER_QTY, BILLED_QTY, BALANCE_QTY, and related financial metrics.
#     2. Temporal Analysis: Use month_year, month, and Year for date-specific insights.
#     3. Retailer and Regional Analysis: Filter by RTRCODE, RTRNAME, REGION_NAME, and STATE_NAME.
#     4. SKU Variations: Utilize LIKE for SKU_NAME variations across products and packaging.
#     """,
    
#     tools=[list_tables, tables_schema, execute_sql, check_sql],
#     allow_delegation=True,
#     llm=llm,
# )


In [13]:
sql_dev = Agent(
    role="Precision SQL Analyst",
    goal="Construct and execute highly accurate SQL queries for comprehensive analysis of product ordering, billing, and regional distribution data.",
    backstory="""
You are an expert SQL Analyst specializing in order, billing, and distribution data across multiple regions, brands, and product lines. Your expertise is in constructing flexible and precise SQL queries that handle variations in product naming conventions, discrepancies in order fulfillment, and dynamic regional and retailer-level filtering.

Core Competencies:
1. Advanced Retailer Analysis: Utilize the RTRCODE and RTRNAME columns to track orders and billing data at a retailer level, enhancing insights into distribution and performance by individual retailers.
2. Expert Use of LIKE Operator: Skilled in using the LIKE operator with wildcards for SKU_NAME to accurately capture variations in SKU details and flexible model naming across packaging or regional conventions.
3. Dynamic Query Construction: Craft flexible SQL queries tailored to user needs, utilizing the new columns like RTRCODE and RTRNAME for granular tracking.
4. Enhanced Data Validation: Implement checks to manage any inconsistencies or missing data in SKU_NAME, RTRNAME, and ORDER_STATUS, ensuring complete and accurate reporting.
5. Query Optimization: Balance query complexity with LIKE flexibility for efficient SKU matching, optimizing performance across potentially large datasets.
6. Sales Volume Forecasting: Utilize past six months of order and billing data to project next month's sales volume accurately, leveraging trends, seasonal variations, and historical patterns in the data.

Approach to Queries:
1. Requirement Analysis: Interpret requests with a focus on retailer (RTRCODE, RTRNAME), SKU, and regional details.
2. Schema Exploration: Use list_tables and tables_schema to identify columns such as SKU_CODE, SKU_NAME, RTRCODE, RTRNAME, and ORDER_STATUS.
3. Query Design: Construct SQL queries with LIKE statements for SKU variations, now with the ability to add retailer-level filters using RTRCODE and RTRNAME.
4. Validation: Confirm query accuracy with check_sql, ensuring all specified columns, including RTRCODE and RTRNAME, are accurately represented.
5. Execution and Review: Execute queries with execute_sql and validate results, particularly for completeness in capturing SKU variations, order statuses, and accurate regional or retailer-based data.
6. Sales Forecasting: Analyze temporal data (month_year, month, and Year), SKU-level trends, and regional factors to predict sales volume for upcoming periods, particularly focusing on the next month.

Important Columns in the Dataset:
- month_year: Combined month and year for temporal analysis.
- month and Year: Separate month and year fields for granular time-based analysis.
- DISTCODE: Distributor code for tracking orders by distributor.
- RTRCODE and RTRNAME: Retailer identifiers and names for retailer-level analysis.
- SKU_CODE and SKU_NAME: Product identifiers, with SKU_NAME needing LIKE patterns for flexible matching.
- BRAND_NAME: Brand associated with each SKU.
- ORDER_STATUS: Indicates order fulfillment status (e.g., "Fully Serviced").
- ORDER_QTY, BILLED_QTY, BALANCE_QTY: Metrics on ordered, billed, and remaining quantities.
- ORDER_VALUE, BILLED_VALUE, LOSS_VALUE: Financial metrics, with LOSS_VALUE indicating potential unbilled amounts.
- CUSTOMER_CODE: Identifier for customers linked to orders.
- REGION_NAME and STATE_NAME: Regional and state-level data for geolocation-based analysis.
- Area_Type: Classification of the customer’s location as either "Urban" or "Rural," providing insight into urban-rural trends in demand and order fulfillment patterns.
- channel: Classifying transactions as either GT (General Trade) or MT (Modern Trade), which is essential for trade-type analysis and segmentation.

Key Reminders:
1. Flexible SKU and Retailer Matching: Use LIKE with wildcards for flexible SKU and retailer name filtering.
2. Order Status Variants: Include ORDER_STATUS for analyzing fulfillment status, such as "Fully Serviced."
3. Price and Quantity Metrics: Display financial data in Lakhs and Crores and quantities in thousands (K).
4. Retailer and Regional Filters: Use RTRCODE, RTRNAME, REGION_NAME, and STATE_NAME for targeted retailer and geographical analysis.
5. Data Validation: Include checks for missing or null data, returning a clear message if relevant records are absent (e.g., "No data found for specified SKU").
6. Forecast Accuracy: Ensure that sales projections are data-driven, leveraging recent order and billing trends to provide actionable forecasts.

SKU_NAME Prefix Indicators:
- 'P' at the start of SKU_NAME indicates a Parachute product.
- 'S' at the start of SKU_NAME indicates a Saffola product.
- 'NHR' or 'NSA' at the start of SKU_NAME indicates a Nihar product.
- 'Livon' at the start of SKU_NAME indicates a Livon product.
- 'SW' at the start of SKU_NAME indicates a Set Wet product.
- 'SF' at the start of SKU_NAME indicates a Saffola product, commonly used for Saffola foods like soya chunks and oats.
- 'PCNO' at the start of SKU_NAME indicates a Parachute Coconut Oil product.
- 'REV' at the start of SKU_NAME indicates a Revive product.
- 'H&C' at the start of SKU_NAME indicates a Hair & Care product.
- 'MALT' at the start of SKU_NAME suggests a Maltova product.

Scope and Data Presentation:
- Response Handling: Answer queries specifically related to order, billing, SKU, and retailer data.
- Currency and Quantity Format: Display financials in Lakhs and Crores, with quantities in thousands (K).

Database Structure:
This dataset contains a single primary table with order, billing, SKU, and retailer details, ideal for granular SKU-level, retailer-level, and regional analysis.

Query Types:
1. Order and Billing Data: Focus on ORDER_QTY, BILLED_QTY, BALANCE_QTY, and related financial metrics.
2. Temporal Analysis: Use month_year, month, and Year for date-specific insights.
3. Retailer and Regional Analysis: Filter by RTRCODE, RTRNAME, REGION_NAME, and STATE_NAME.
4. SKU Variations: Utilize LIKE for SKU_NAME variations across products and packaging.
5. Sales Volume Forecasting: Analyze trends in the past six months of data to predict next month's sales volume, identifying patterns across SKUs, regions, and order channels.

    """,
    
    tools=[list_tables, tables_schema, execute_sql, check_sql],
    allow_delegation=True,
    llm=llm,
)


In [14]:
extract_data = Task(
    description="Extract data that is required for the query {query}.",
    expected_output="Database result for the query",
    agent=sql_dev,
)

In [15]:
crew = Crew(
    agents=[sql_dev],
    tasks=[extract_data],
    process=Process.sequential,
    verbose=1,
    memory=False,
    max_rpm=None,
    max_iter=25,
    output_log_file="crew.log",
)

In [16]:
result = crew.kickoff(inputs={"query": "Which sales channels has the highest revenue growth in 2024?"})

Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json
Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json


[1m[95m# Agent:[00m [1m[92mPrecision SQL Analyst[00m
[95m## Task:[00m [92mExtract data that is required for the query Which sales channels has the highest revenue growth in 2024?.[00m


Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json
Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json




[1m[95m# Agent:[00m [1m[92mPrecision SQL Analyst[00m
[95m## Thought:[00m [92mThought: To determine which sales channel has the highest revenue growth in 2024, I need to calculate the total billed value for each channel (GT and MT) for each month in 2024, then calculate the month-over-month growth rate, and finally identify the channel with the highest growth. I'll start by listing the tables to understand the database structure and then inspect the schema of the relevant table to identify the necessary columns.[00m
[95m## Using tool:[00m [92mlist_tables[00m
[95m## Tool Input:[00m [92m
"{}"[00m
[95m## Tool Output:[00m [92m
poc_2_new[00m


Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json
Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json




[1m[95m# Agent:[00m [1m[92mPrecision SQL Analyst[00m
[95m## Thought:[00m [92mThought: There's one table named 'poc_2_new'. I will inspect its schema to identify the relevant columns for my analysis.[00m
[95m## Using tool:[00m [92mtables_schema[00m
[95m## Tool Input:[00m [92m
"{\"tables\": \"poc_2_new\"}"[00m
[95m## Tool Output:[00m [92m

CREATE TABLE poc_2_new (
	month_year TEXT, 
	month INTEGER, 
	`Year` INTEGER, 
	`DISTCODE` INTEGER, 
	`RTRCODE` TEXT, 
	`RTRNAME` TEXT, 
	`SKU_CODE` TEXT, 
	`SKU_NAME` TEXT, 
	`BRAND_NAME` TEXT, 
	`ORDER_STATUS` TEXT, 
	`ORDER_QTY` INTEGER, 
	`BILLED_QTY` INTEGER, 
	`BALANCE_QTY` INTEGER, 
	`ORDER_VALUE` DOUBLE, 
	`BILLED_VALUE` DOUBLE, 
	`LOSS_VALUE` DOUBLE, 
	`CUSTOMER_CODE` TEXT, 
	`REGION_NAME` TEXT, 
	`STATE_NAME` TEXT, 
	`Area_Type` TEXT, 
	channel TEXT
)ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci DEFAULT CHARSET=utf8mb4

/*
3 rows from poc_2_new table:
month_year	month	Year	DISTCODE	RTRCODE	RTRNAME	SKU_CODE	SKU_NAME	BRAND_NAM

Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json
Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json




[1m[95m# Agent:[00m [1m[92mPrecision SQL Analyst[00m
[95m## Using tool:[00m [92mcheck_sql[00m
[95m## Tool Input:[00m [92m
"{\"sql_query\": \"SELECT channel, Year, month, SUM(BILLED_VALUE) AS total_billed_value FROM poc_2_new WHERE Year = 2024 GROUP BY channel, Year, month ORDER BY channel, Year, month\"}"[00m
[95m## Tool Output:[00m [92m

I encountered an error while trying to use the tool. This was the error: 2 validation errors for QuerySQLCheckerTool
llm.is-instance[Runnable]
  Input should be an instance of Runnable [type=is_instance_of, input_value=<crewai.llm.LLM object at 0x00000254A5A3AB50>, input_type=LLM]
    For further information visit https://errors.pydantic.dev/2.9/v/is_instance_of
llm.is-instance[Runnable]
  Input should be an instance of Runnable [type=is_instance_of, input_value=<crewai.llm.LLM object at 0x00000254A5A3AB50>, input_type=LLM]
    For further information visit https://errors.pydantic.dev/2.9/v/is_instance_of.
 Tool check_sql accepts th

Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json
Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json




[1m[95m# Agent:[00m [1m[92mPrecision SQL Analyst[00m
[95m## Using tool:[00m [92mexecute_sql[00m
[95m## Tool Input:[00m [92m
"{\"sql_query\": \"SELECT channel, Year, month, SUM(BILLED_VALUE) AS total_billed_value FROM poc_2_new WHERE Year = 2024 GROUP BY channel, Year, month ORDER BY channel, Year, month\"}"[00m
[95m## Tool Output:[00m [92m
[('GT', 2024, 1, 2964948.7786610085), ('GT', 2024, 2, 2070609.0032189908), ('GT', 2024, 3, 1870390.0538899982), ('GT', 2024, 4, 2746034.8266460015), ('GT', 2024, 5, 3623225.055726016), ('GT', 2024, 6, 2938368.0509640053), ('GT', 2024, 7, 2551444.2464789925), ('GT', 2024, 8, 1646295.9502859954), ('GT', 2024, 9, 4802812.220453982), ('GT', 2024, 10, 3174947.671124993), ('GT', 2024, 11, 68235.87670799997), ('MT', 2024, 1, 1500949.0937129972), ('MT', 2024, 2, 2256389.852535998), ('MT', 2024, 3, 1852199.6210399969), ('MT', 2024, 4, 3090471.7882939987), ('MT', 2024, 5, 3332634.262219027), ('MT', 2024, 6, 4125675.1573510137), ('MT', 2024, 

Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json
Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json




[1m[95m# Agent:[00m [1m[92mPrecision SQL Analyst[00m
[95m## Using tool:[00m [92mcheck_sql[00m
[95m## Tool Input:[00m [92m
"{\"sql_query\": \"WITH MonthlySales AS (SELECT channel, Year, month, SUM(BILLED_VALUE) AS total_billed_value FROM poc_2_new WHERE Year = 2024 GROUP BY channel, Year, month)SELECT channel, Year, month, total_billed_value, (total_billed_value - LAG(total_billed_value, 1, 0) OVER (PARTITION BY channel ORDER BY Year, month)) / LAG(total_billed_value, 1, 0) AS growth_rate FROM MonthlySales ORDER BY channel, Year, month\"}"[00m
[95m## Tool Output:[00m [92m

I encountered an error while trying to use the tool. This was the error: 2 validation errors for QuerySQLCheckerTool
llm.is-instance[Runnable]
  Input should be an instance of Runnable [type=is_instance_of, input_value=<crewai.llm.LLM object at 0x00000254A5A3AB50>, input_type=LLM]
    For further information visit https://errors.pydantic.dev/2.9/v/is_instance_of
llm.is-instance[Runnable]
  Input sho

Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json
Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json




[1m[95m# Agent:[00m [1m[92mPrecision SQL Analyst[00m
[95m## Using tool:[00m [92mexecute_sql[00m
[95m## Tool Input:[00m [92m
"{\"sql_query\": \"WITH MonthlySales AS (SELECT channel, Year, month, SUM(BILLED_VALUE) AS total_billed_value FROM poc_2_new WHERE Year = 2024 GROUP BY channel, Year, month)SELECT channel, Year, month, total_billed_value, (total_billed_value - LAG(total_billed_value, 1, 0) OVER (PARTITION BY channel ORDER BY Year, month)) / LAG(total_billed_value, 1, 0) AS growth_rate FROM MonthlySales ORDER BY channel, Year, month\"}"[00m
[95m## Tool Output:[00m [92m
Error: (pymysql.err.ProgrammingError) (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'AS growth_rate FROM MonthlySales ORDER BY channel, Year, month' at line 1")
[SQL: WITH MonthlySales AS (SELECT channel, Year, month, SUM(BILLED_VALUE) AS total_billed_value FROM poc_2_new WHERE Year = 2024 GROUP BY chann

Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json
Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json




[1m[95m# Agent:[00m [1m[92mPrecision SQL Analyst[00m
[95m## Using tool:[00m [92mexecute_sql[00m
[95m## Tool Input:[00m [92m
"{\"sql_query\": \"WITH MonthlySales AS (SELECT channel, Year, month, SUM(BILLED_VALUE) AS total_billed_value FROM poc_2_new WHERE Year = 2024 GROUP BY channel, Year, month) SELECT channel, Year, month, total_billed_value, (total_billed_value - LAG(total_billed_value, 1, 0) OVER (PARTITION BY channel ORDER BY Year, month)) / LAG(total_billed_value, 1, 0) FROM MonthlySales ORDER BY channel, Year, month\"}"[00m
[95m## Tool Output:[00m [92m
Error: (pymysql.err.ProgrammingError) (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'FROM MonthlySales ORDER BY channel, Year, month' at line 1")
[SQL: WITH MonthlySales AS (SELECT channel, Year, month, SUM(BILLED_VALUE) AS total_billed_value FROM poc_2_new WHERE Year = 2024 GROUP BY channel, Year, month) SELECT chann

Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json
Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json




[1m[95m# Agent:[00m [1m[92mPrecision SQL Analyst[00m
[95m## Using tool:[00m [92mexecute_sql[00m
[95m## Tool Input:[00m [92m
"{\"sql_query\": \"WITH MonthlySales AS (SELECT channel, Year, month, SUM(BILLED_VALUE) AS total_billed_value FROM poc_2_new WHERE Year = 2024 GROUP BY channel, Year, month) SELECT channel, Year, month, total_billed_value, LAG(total_billed_value, 1, 0) OVER (PARTITION BY channel ORDER BY Year, month) AS previous_month_sales FROM MonthlySales ORDER BY channel, Year, month\"}"[00m
[95m## Tool Output:[00m [92m
[('GT', 2024, 1, 2964948.7786610085, 0.0), ('GT', 2024, 2, 2070609.0032189908, 2964948.7786610085), ('GT', 2024, 3, 1870390.0538899982, 2070609.0032189908), ('GT', 2024, 4, 2746034.8266460015, 1870390.0538899982), ('GT', 2024, 5, 3623225.055726016, 2746034.8266460015), ('GT', 2024, 6, 2938368.0509640053, 3623225.055726016), ('GT', 2024, 7, 2551444.2464789925, 2938368.0509640053), ('GT', 2024, 8, 1646295.9502859954, 2551444.2464789925), ('GT',

Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json
Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json




[1m[95m# Agent:[00m [1m[92mPrecision SQL Analyst[00m
[95m## Using tool:[00m [92mexecute_sql[00m
[95m## Tool Input:[00m [92m
"{\"sql_query\": \"WITH MonthlySales AS (SELECT channel, Year, month, SUM(BILLED_VALUE) AS total_billed_value FROM poc_2_new WHERE Year = 2024 GROUP BY channel, Year, month) SELECT channel, Year, month, total_billed_value, LAG(total_billed_value, 1, 0) OVER (PARTITION BY channel ORDER BY Year, month) AS previous_month_sales FROM MonthlySales ORDER BY channel, Year, month\"}"[00m
[95m## Tool Output:[00m [92m
I tried reusing the same input, I must stop using this action input. I'll try something else instead.

[00m


[1m[95m# Agent:[00m [1m[92mPrecision SQL Analyst[00m
[95m## Final Answer:[00m [92m
```tool_code
import pandas as pd

data = [('GT', 2024, 1, 2964948.7786610085, 0.0), ('GT', 2024, 2, 2070609.0032189908, 2964948.7786610085), ('GT', 2024, 3, 1870390.0538899982, 2070609.0032189908), ('GT', 2024, 4, 2746034.8266460015, 1870390.0

In [17]:
print(result)

```tool_code
import pandas as pd

data = [('GT', 2024, 1, 2964948.7786610085, 0.0), ('GT', 2024, 2, 2070609.0032189908, 2964948.7786610085), ('GT', 2024, 3, 1870390.0538899982, 2070609.0032189908), ('GT', 2024, 4, 2746034.8266460015, 1870390.0538899982), ('GT', 2024, 5, 3623225.055726016, 2746034.8266460015), ('GT', 2024, 6, 2938368.0509640053, 3623225.055726016), ('GT', 2024, 7, 2551444.2464789925, 2938368.0509640053), ('GT', 2024, 8, 1646295.9502859954, 2551444.2464789925), ('GT', 2024, 9, 4802812.220453982, 1646295.9502859954), ('GT', 2024, 10, 3174947.671124993, 4802812.220453982), ('GT', 2024, 11, 68235.87670799997, 3174947.671124993), ('MT', 2024, 1, 1500949.0937129972, 0.0), ('MT', 2024, 2, 2256389.852535998, 1500949.0937129972), ('MT', 2024, 3, 1852199.6210399969, 2256389.852535998), ('MT', 2024, 4, 3090471.7882939987, 1852199.6210399969), ('MT', 2024, 5, 3332634.262219027, 3090471.7882939987), ('MT', 2024, 6, 4125675.1573510137, 3332634.262219027), ('MT', 2024, 7, 2133426.9905

In [18]:
import os
import io
import pandas as pd
from openai import OpenAI
client = OpenAI()
def extract_business_insights(data):
    prompt = f"""
    Extract meaningful business insights. 
    Highlight trends, potential opportunities, and any notable patterns.
    {data}
    Provide your insights in bullet points format.
    """
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are an expert data analyst who provides business insights."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.0
    )
    return response.choices[0].message.content.strip()
insights = extract_business_insights(result)

In [19]:
print(insights)

- The provided data includes total billed values for two channels (GT and MT) for each month in the year 2024.
- A growth rate column was calculated based on the difference between the total billed value and the previous month's sales.
- The channel with the highest revenue growth in 2024 is identified as 'MT' based on the maximum growth rate calculated for each channel.
- 'MT' channel had a significant growth rate of 50.3% in February, indicating a potential opportunity for further analysis and investment.
- 'GT' channel experienced fluctuating growth rates throughout the year, with a notable peak in September (191.5%) and a sharp decline in November (-97.9%).
- There is a pattern of alternating positive and negative growth rates for the 'GT' channel, suggesting potential seasonality or specific market conditions affecting sales.
- Further investigation into the factors influencing the growth rates of each channel could provide valuable insights for strategic decision-making and optim