**Import library**

In [1]:
from openai import AzureOpenAI
import pyodbc
import pandas as pd

import os
from dotenv import load_dotenv
from pathlib import Path

import re

**Locate env path**

In [2]:
script_dir = Path(os.getcwd())
env_path = script_dir / '.env'
load_dotenv(env_path)

print(f'Script Path:\t{script_dir}\nenv Path:\t{env_path}\nLoad env:\t{load_dotenv(env_path)}')

Script Path:	c:\Users\Admins\Desktop\Tae\work\azure-sql-2025-tester
env Path:	c:\Users\Admins\Desktop\Tae\work\azure-sql-2025-tester\.env
Load env:	True


In [3]:
server = os.getenv('azure_server')
database = os.getenv('azure_database')
username = os.getenv('azure_username')
password = os.getenv('azure_password')

**Connection Database**

In [4]:
conn_str = f"""Driver={{ODBC Driver 17 for SQL Server}}; Server={server}; Database={database}; Uid={username}; Pwd={password}; Encrypt=yes; TrustServerCertificate=no; Connection Timeout=30;"""

In [5]:
conn = pyodbc.connect(conn_str)
conn

<pyodbc.Connection at 0x2c7c6502e00>

**Ai Prompt**

In [6]:
schema_query = """
SELECT TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME, DATA_TYPE
FROM INFORMATION_SCHEMA.COLUMNS
ORDER BY TABLE_SCHEMA, TABLE_NAME, ORDINAL_POSITION;
"""

schema_df = pd.read_sql_query(schema_query, conn)

schema_text = ""
for (schema, table), group in schema_df.groupby(['TABLE_SCHEMA', 'TABLE_NAME']):
    columns = ", ".join(f"{row.COLUMN_NAME} ({row.DATA_TYPE})" for _, row in group.iterrows())
    schema_text += f"Table {schema}.{table}: {columns}\n"


  schema_df = pd.read_sql_query(schema_query, conn)


In [7]:
user_prompt = "I want to know the top 10 most spend customer in the database."

In [8]:
model_name = os.getenv("openai_model_name")

client = AzureOpenAI(
    api_key=os.getenv("openai_model_api_key"),
    api_version=os.getenv("openai_api_version"),
    azure_endpoint=os.getenv("openai_api_endpoint")
)

In [9]:
system_message = f"""
You are a helpful assistant that writes SQL Server queries.
Here is the database schema:

{schema_text}
"""

In [10]:
response = client.chat.completions.create(
    model=model_name,
    messages=[
        {"role": "system", "content": system_message},
        {"role": "user", "content": f"Write a SQL Server query for this: {user_prompt}"}
    ]
)

**Generate Output**

In [11]:
generated_sql = response.choices[0].message.content
print("Generated SQL:\n", generated_sql)

Generated SQL:
 Sure! You can use the following SQL query to find the top 10 highest spending customers in the database:

```sql
SELECT TOP 10
    c.CustomerID,
    c.FirstName,
    c.LastName,
    SUM(od.LineTotal) AS TotalSpent
FROM SalesLT.Customer AS c
JOIN SalesLT.SalesOrderHeader AS o ON c.CustomerID = o.CustomerID
JOIN SalesLT.SalesOrderDetail AS od ON o.SalesOrderID = od.SalesOrderID
GROUP BY c.CustomerID, c.FirstName, c.LastName
ORDER BY TotalSpent DESC;
```

This query calculates the total amount spent by each customer by joining the `Customer`, `SalesOrderHeader`, and `SalesOrderDetail` tables. Then, it groups the results by customer and orders them in descending order of total amount spent. The `TOP 10` keyword ensures that only the top 10 highest spending customers are returned in the result set.


In [12]:
# Extract just the SQL code between ```sql and ```
sql_code_blocks = re.findall(r"```sql\s*(.*?)```", response.choices[0].message.content, re.DOTALL)

if sql_code_blocks:
    generated_sql = sql_code_blocks[0].strip()
else:
    generated_sql = response.choices[0].message.content.strip()  # fallback

print(generated_sql)

SELECT TOP 10
    c.CustomerID,
    c.FirstName,
    c.LastName,
    SUM(od.LineTotal) AS TotalSpent
FROM SalesLT.Customer AS c
JOIN SalesLT.SalesOrderHeader AS o ON c.CustomerID = o.CustomerID
JOIN SalesLT.SalesOrderDetail AS od ON o.SalesOrderID = od.SalesOrderID
GROUP BY c.CustomerID, c.FirstName, c.LastName
ORDER BY TotalSpent DESC;


In [13]:
query = f"""{generated_sql}"""

**Final Dataframe**

In [14]:
try:
    df = pd.read_sql_query(query, conn)
    print("Query Results:")
    display(df)
except Exception as e:
    print("Error running the query:", e)

Query Results:


  df = pd.read_sql_query(query, conn)


Unnamed: 0,CustomerID,FirstName,LastName,TotalSpent
0,29736,Terry,Eminhizer,89869.276314
1,30050,Krishna,Sunkammurali,79589.616024
2,29546,Christopher,Beck,74160.228
3,29957,Kevin,Liu,65683.367986
4,29796,Jon,Grande,65123.463418
5,29929,Jeffrey,Kurtz,59894.2092
6,29932,Rebecca,Laszlo,53248.692
7,29660,Anthony,Chor,47848.026
8,29938,Frank,Campbell,34118.5356
9,29485,Catherine,Abel,33319.986
