**Import library**

In [107]:
from openai import AzureOpenAI
import pyodbc
import pandas as pd

import os
from dotenv import load_dotenv
from pathlib import Path

import re
from datetime import datetime

**Locate env path**

In [108]:
script_dir = Path(os.getcwd())
env_path = script_dir / '.env'
load_dotenv(env_path)

print(f'Script Path:\t{script_dir}\nenv Path:\t{env_path}\nLoad env:\t{load_dotenv(env_path)}')

Script Path:	c:\Users\Admins\Desktop\Tae\work\azure-sql-2025-tester
env Path:	c:\Users\Admins\Desktop\Tae\work\azure-sql-2025-tester\.env
Load env:	True


In [109]:
server = os.getenv('azure_server')
database = os.getenv('azure_database')
username = os.getenv('azure_username')
password = os.getenv('azure_password')

**Connection Database**

In [110]:
conn_str = f"""Driver={{ODBC Driver 17 for SQL Server}}; Server={server}; Database={database}; Uid={username}; Pwd={password}; Encrypt=yes; TrustServerCertificate=no; Connection Timeout=30;"""

In [111]:
conn = pyodbc.connect(conn_str)
conn

<pyodbc.Connection at 0x1d3a4661440>

**Ai Prompt**

In [112]:
# Path to your Schema.sql file
sql_file_path = script_dir / "sql_script" / "Schema.sql"

# Read the file content into a Python string
with open(sql_file_path, 'r', encoding='utf-8') as file:
    schema_query = file.read()

print(schema_query)

-- Author:		TANAKRIT-THONGPENG
-- Create date: 27/06/2025
-- Description:	SCHEMA QUERY

SELECT TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME, DATA_TYPE
FROM INFORMATION_SCHEMA.COLUMNS
ORDER BY TABLE_SCHEMA, TABLE_NAME, ORDINAL_POSITION;


In [113]:
schema_df = pd.read_sql_query(schema_query, conn)

schema_text = ""
for (schema, table), group in schema_df.groupby(['TABLE_SCHEMA', 'TABLE_NAME']):
    columns = ", ".join(f"{row.COLUMN_NAME} ({row.DATA_TYPE})" for _, row in group.iterrows())
    schema_text += f"Table {schema}.{table}: {columns}\n"


  schema_df = pd.read_sql_query(schema_query, conn)


In [114]:
user_prompt = "I want to know which products is selling the most"

In [115]:
model_name = os.getenv("openai_model_name")

client = AzureOpenAI(
    api_key=os.getenv("openai_model_api_key"),
    api_version=os.getenv("openai_api_version"),
    azure_endpoint=os.getenv("openai_api_endpoint")
)

In [116]:
system_message = f"""
You are a helpful assistant that writes SQL Server queries.
Here is the database schema:

{schema_text}
"""

In [117]:
response = client.chat.completions.create(
    model=model_name,
    messages=[
        {"role": "system", "content": system_message},
        {"role": "user", "content": f"Write a SQL Server query for this: {user_prompt}"}
    ]
)

**Generate Output**

In [118]:
generated_sql = response.choices[0].message.content
print("Generated SQL:\n", generated_sql)

Generated SQL:
 To find out which products are selling the most, we can use the following SQL query:

```sql
SELECT TOP 1 
    P.Name AS ProductName, 
    SUM(OD.OrderQty) AS TotalQuantitySold
FROM 
    SalesLT.Product AS P
JOIN 
    SalesLT.SalesOrderDetail AS OD ON P.ProductID = OD.ProductID
GROUP BY 
    P.ProductID, P.Name
ORDER BY 
    SUM(OD.OrderQty) DESC;
```

This query selects the product with the highest total quantity sold by joining the `Product` and `SalesOrderDetail` tables, grouping by product, summing the order quantities, and then ordering the results by total quantity sold in descending order. The `TOP 1` keyword limits the results to only the product that sold the most.


In [119]:
# Extract just the SQL code between ```sql and ```
sql_code_blocks = re.findall(r"```sql\s*(.*?)```", response.choices[0].message.content, re.DOTALL)
description_blocks = re.split(r"```(?:sql)?[\s\S]*?```", response.choices[0].message.content)

if sql_code_blocks:
    generated_sql = sql_code_blocks[0].strip()
    generated_description = description_blocks[1].strip()
else:
    generated_sql = response.choices[0].message.content.strip()  # fallback
    generated_description = response.choices[0].message.content.strip()  # fallback

print(f"{generated_sql}\n\n{generated_description}")

SELECT TOP 1 
    P.Name AS ProductName, 
    SUM(OD.OrderQty) AS TotalQuantitySold
FROM 
    SalesLT.Product AS P
JOIN 
    SalesLT.SalesOrderDetail AS OD ON P.ProductID = OD.ProductID
GROUP BY 
    P.ProductID, P.Name
ORDER BY 
    SUM(OD.OrderQty) DESC;

This query selects the product with the highest total quantity sold by joining the `Product` and `SalesOrderDetail` tables, grouping by product, summing the order quantities, and then ordering the results by total quantity sold in descending order. The `TOP 1` keyword limits the results to only the product that sold the most.


In [120]:
query = f"""{generated_sql}"""

**Final Dataframe**

In [121]:
try:
    df = pd.read_sql_query(query, conn)
    print("Query Results:")
    display(df)
except Exception as e:
    print("Error running the query:", e)

Query Results:


  df = pd.read_sql_query(query, conn)


Unnamed: 0,ProductName,TotalQuantitySold
0,"Classic Vest, S",87


**Create SQL Script from Output**

In [122]:
Author = "AI Generated SQL Query"

day = datetime.now().day
month = datetime.now().month
year = datetime.now().year
date_obj = datetime(year, month, day)
create_date = date_obj.strftime("%d/%m/%Y")

description = generated_description


In [123]:
header = f"""
-- =============================================
-- Author:		{Author}
-- Create date: {create_date}
-- Description:	{description}
-- =============================================
"""

In [124]:
header



In [125]:
query_string = f"""{generated_sql}"""

In [126]:
query_string

'SELECT TOP 1 \n    P.Name AS ProductName, \n    SUM(OD.OrderQty) AS TotalQuantitySold\nFROM \n    SalesLT.Product AS P\nJOIN \n    SalesLT.SalesOrderDetail AS OD ON P.ProductID = OD.ProductID\nGROUP BY \n    P.ProductID, P.Name\nORDER BY \n    SUM(OD.OrderQty) DESC;'

In [127]:
def find_latest_script_number(folder_path):
    folder = Path(folder_path)
    max_number = 0

    # Pattern to match filenames like Script-<number>.sql
    pattern = re.compile(r"Script-(\d+)\.sql", re.IGNORECASE)

    for file in folder.glob("Script-*.sql"):
        match = pattern.match(file.name)
        if match:
            number = int(match.group(1))
            if number > max_number:
                max_number = number

    return max_number

In [128]:
folder_path = script_dir / "sql_script"
latest_number = find_latest_script_number(folder_path)
print("Latest script number:", latest_number)

Latest script number: 2


In [129]:
output_path = script_dir / "sql_script" / f"Script-{latest_number+1}.sql"

In [130]:
# Make sure the directory exists, create if not
output_path.parent.mkdir(parents=True, exist_ok=True)

In [131]:
full_sql = header + "\n" + query_string

# Write to file
with open(output_path, "w", encoding="utf-8") as f:
    f.write(full_sql)

print(f"SQL file saved to: {output_path}")

SQL file saved to: c:\Users\Admins\Desktop\Tae\work\azure-sql-2025-tester\sql_script\Script-3.sql
