In [23]:
import os
from dotenv import load_dotenv
from openai import OpenAI



In [24]:
# Load .env and get the API key
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

# Initialize OpenAI client
client = OpenAI(api_key=api_key)


In [25]:
# 1️⃣ Define Generic Schema (Mocked Metadata)
def load_generic_schema():
    return [
        {
            "source": "GCP_BigQuery",
            "table_name": "customers",
            "description": "Customer details including region and demographics.",
            "columns": [
                {"name": "customer_id", "type": "STRING"},
                {"name": "name", "type": "STRING"},
                {"name": "region", "type": "STRING"},
                {"name": "signup_date", "type": "DATE"},
            ]
        },
        {
            "source": "AWS_Redshift",
            "table_name": "orders",
            "description": "Order transactions placed by customers.",
            "columns": [
                {"name": "order_id", "type": "STRING"},
                {"name": "customer_id", "type": "STRING"},
                {"name": "product_id", "type": "STRING"},
                {"name": "order_date", "type": "DATE"},
                {"name": "amount", "type": "FLOAT"},
            ]
        },
        {
            "source": "AWS_RDS",
            "table_name": "products",
            "description": "Product catalog.",
            "columns": [
                {"name": "product_id", "type": "STRING"},
                {"name": "product_name", "type": "STRING"},
                {"name": "category", "type": "STRING"},
                {"name": "price", "type": "FLOAT"},
            ]
        }
    ]

#

In [26]:
# 2️⃣ Format Schema for LLM Context
def format_schema(schema):
    formatted = ""
    for table in schema:
        formatted += f"\n📦 Table: `{table['table_name']}` ({table['source']})\n"
        formatted += f"  📝 Description: {table['description']}\n"
        for col in table['columns']:
            formatted += f"    - {col['name']} ({col['type']})\n"
    return formatted



In [30]:

# 3️⃣ Generate SQL Using OpenAI GPT
def generate_sql(user_input, schema_context):
    prompt = f"""
You are a helpful data assistant. Based on the schema below, write a SQL query to answer the user's question.

Schema:
{schema_context}

User Question:
{user_input}

SQL Query:
"""
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.2,
    )
    
    return response.choices[0].message.content.strip()



In [31]:


# 4️⃣ Main CLI Loop
def main():
    schema = load_generic_schema()
    schema_context = format_schema(schema)
    print(schema_context)

    print("📊 Enterprise Data Assistant (NL ➜ SQL)\n")
    while True:
        user_input = input("🔍 Ask a question about your data (or 'exit'): ").strip()
        if user_input.lower() in ['exit', 'quit']:
            break
        sql = generate_sql(user_input, schema_context)
        print("\n🧾 Generated SQL:\n", sql, "\n" + "-"*50 + "\n")


In [32]:
main()


📦 Table: `customers` (GCP_BigQuery)
  📝 Description: Customer details including region and demographics.
    - customer_id (STRING)
    - name (STRING)
    - region (STRING)
    - signup_date (DATE)

📦 Table: `orders` (AWS_Redshift)
  📝 Description: Order transactions placed by customers.
    - order_id (STRING)
    - customer_id (STRING)
    - product_id (STRING)
    - order_date (DATE)
    - amount (FLOAT)

📦 Table: `products` (AWS_RDS)
  📝 Description: Product catalog.
    - product_id (STRING)
    - product_name (STRING)
    - category (STRING)
    - price (FLOAT)

📊 Enterprise Data Assistant (NL ➜ SQL)


🧾 Generated SQL:
 ```sql
SELECT c.customer_id, c.name, COUNT(o.order_id) as number_of_orders, p.product_name
FROM customers c
JOIN orders o ON c.customer_id = o.customer_id
JOIN products p ON o.product_id = p.product_id
GROUP BY c.customer_id, c.name, p.product_name;
```

Please note that this query will return the number of orders for each product by each customer. If you want t