## Best Practices

In [7]:
import pandas as pd
import duckdb as db
import os
import sys

sys.path.append("../")
from sql_ai_agent.api_handler import SqlAgent

In [8]:
retail_sales = pd.read_csv("../data/Red30 Tech US online retail sales.csv")
db.register("retail_sales", retail_sales)

openai_base_url = "https://api.openai.com/v1/"
openai_api_key = os.environ.get("OPENAI_API_KEY")

my_agent = SqlAgent(
    api_key=openai_api_key,
    base_url=openai_base_url,
    model="gpt-5",
    tbl_name="retail_sales",
    max_token=5000,
)

### Context

- Table name
  - Bad: TABLE_A
  - Good: RETAIL_SALES
- Fields names
  - Bad: VAR_1, VAR_2, VAR_3
  - Good: STATE, DATE, TOTAL_SALES...

In [4]:
retail_sales.head()

Unnamed: 0,OrderNum,OrderDate,OrderType,CustomerType,CustName,CustState,ProdCategory,ProdNumber,ProdName,Quantity,Price,Discount,OrderTotal
0,1100934,9/1/2017,Wholesale,Business,Gusikowski Group,North Carolina,Blueprints,BP102,Bsquare Robot Blueprint,10,8.99,1.8,88.1
1,1100935,9/1/2017,Retail,Individual,Spencer Educators,Delaware,Drone Kits,DK204,BYOD-300,2,89.0,0.0,178.0
2,1100936,9/1/2017,Wholesale,Business,Schinner Inc.,Florida,Training Videos,TV801,Aerial Security,10,36.99,7.4,362.5
3,1100937,9/1/2017,Retail,Individual,Saxon Laviss,Virginia,Robot Kits,RK602,BYOR-1000,1,189.0,0.0,189.0
4,1100938,9/1/2017,Retail,Business,Wilderman Technologies,Texas,eBooks,EB502,Building Your First Robot,4,24.95,0.0,99.8


### Leverage the Power of Prompt and LLMs

In [5]:
my_agent.ask_question(
    question="Could you please summarize the total sales by product category in California? "
)


SELECT ProdCategory, SUM(OrderTotal) AS total_sales
FROM retail_sales
WHERE CustState = 'CA'
GROUP BY ProdCategory
ORDER BY total_sales DESC;
┌──────────────┬─────────────┐
│ ProdCategory │ total_sales │
│   varchar    │   double    │
├──────────────┴─────────────┤
│           0 rows           │
└────────────────────────────┘



In [6]:
my_agent.ask_question(
    question="Could you please summarize the total sales by product category in California? Please use state full name"
)


SELECT
  ProdCategory,
  'California' AS State,
  SUM(COALESCE(OrderTotal, Quantity * Price - COALESCE(Discount, 0))) AS TotalSales
FROM retail_sales
WHERE UPPER(CustState) IN ('CALIFORNIA', 'CA')
GROUP BY ProdCategory
ORDER BY TotalSales DESC;
┌─────────────────┬────────────┬────────────────────┐
│  ProdCategory   │   State    │     TotalSales     │
│     varchar     │  varchar   │       double       │
├─────────────────┼────────────┼────────────────────┤
│ Robots          │ California │  271672.4000000001 │
│ Robot Kits      │ California │ 102867.59999999996 │
│ Drones          │ California │            61494.0 │
│ Drone Kits      │ California │  48553.21999999999 │
│ Training Videos │ California │  31583.04000000002 │
│ eBooks          │ California │           16709.36 │
│ Blueprints      │ California │  7405.899999999994 │
└─────────────────┴────────────┴────────────────────┘



### User Onboarding and Feedback

- Consider onboard and educate your end users about what they can do and the limitation of SQL AI Agent
- Collect user feedback

### Model and API Documentation

<br>
<figure>
 <img src="assets/documentation.png" width="100%" align="center"/></a>
<figcaption> API Documentation </figcaption>
</figure>

<br>