start from here

In [1]:
#define all the pacakges
import os
import json
import textwrap
import pandas as pd
import requests
import logging
from sqlalchemy import create_engine, text
from sqlalchemy.engine import Engine
from dotenv import load_dotenv

In [2]:
# =========================
# 0) Configure Logging
# =========================
logging.basicConfig(
    level=logging.INFO,  # Set the logging level
    format="%(asctime)s - %(levelname)s - %(message)s",  # Log format
    handlers=[
        logging.FileHandler("pipeline.log"),  # Log to a file
        logging.StreamHandler()               # Log to the console
    ]
)


In [3]:
# =========================
# 1) Config / Environment
# =========================
try:
    load_dotenv()  # Load environment variables from .env file

    DATABASE_URI = os.getenv("DATABASE_URI", "postgresql+psycopg2://user:pass@localhost:5432/yourdb")
    EURI_API_URL = os.getenv("EURI_API_URL", "https://api.euron.one/api/v1/euri/chat/completions")
    EURI_API_KEY = os.getenv("EURI_API_KEY", None)
    DB_SCHEMA = os.getenv("DB_SCHEMA", None)

    # Log environment loading
    logging.info("Environment variables loaded.")
    if not DATABASE_URI:
        logging.error("DATABASE_URI is not set.")
    if not EURI_API_URL:
        logging.error("EURI_API_URL is not set.")
    if not EURI_API_KEY:
        logging.error("EURI_API_KEY is not set.")
except Exception as e:
    logging.error(f"Failed to load environment variables: {e}")
    raise

2025-09-07 14:41:57,210 - INFO - Environment variables loaded.


In [4]:
# Natural-language question (set directly, or use input())
nl_query = ""  # put your question here; if blank you'll be prompted below
if not nl_query:
    nl_query = input("Enter your question (NL to SQL): ").strip()
    
print(nl_query)

give me details on cart_items


In [5]:
# =========================
# 2) Helpers
# =========================
def get_db_schema(engine: Engine, target_schema: str | None = None) -> str:
    """
    Introspect tables & columns from information_schema and return a concise textual schema description.
    """
    logging.info("Introspecting database schema...")
    try:
        with engine.connect() as conn:
            if target_schema is None:
                current_schema = conn.execute(text("SELECT current_schema()")).scalar()
            else:
                current_schema = target_schema

            rows = conn.execute(text("""
                SELECT table_name, column_name, data_type
                FROM information_schema.columns
                WHERE table_schema = :sch
                ORDER BY table_name, ordinal_position
            """), {"sch": current_schema}).fetchall()

        if not rows:
            logging.warning(f"No tables found in schema '{current_schema}'.")
            return f"(No tables found in schema '{current_schema}')"

        from collections import defaultdict
        tables = defaultdict(list)
        for t, c, d in rows:
            tables[t].append((c, d))

        lines = [f"SCHEMA: {current_schema}"]
        for t in sorted(tables.keys()):
            cols = ", ".join([f"{c} {d}" for c, d in tables[t]])
            lines.append(f"TABLE {t}: {cols}")
        logging.info("Schema introspection completed.")
        return "\n".join(lines)
    except Exception as e:
        logging.error(f"Failed to introspect schema: {e}")
        raise

def load_prompt_template(path: str = "prompt_template.txt") -> str:
    """
    Read a prompt template file. If missing, raise an error.
    """
    logging.info(f"Loading prompt template from {path}...")
    try:
        if os.path.exists(path):
            with open(path, "r", encoding="utf-8") as f:
                logging.info("Prompt template loaded successfully.")
                return f.read()
        else:
            logging.error(f"Prompt template file '{path}' not found.")
            raise FileNotFoundError(f"Prompt template file '{path}' not found.")
    except Exception as e:
        logging.error(f"Failed to load prompt template: {e}")
        raise

def call_euri_llm(prompt: str) -> str:
    """
    Call EURI chat completions to transform prompt -> SQL. Expects EURI_API_KEY in env.
    """
    logging.info("Calling EURI API...")
    if not EURI_API_KEY:
        logging.error("EURI_API_KEY is not set.")
        raise RuntimeError("EURI_API_KEY is not set.")

    headers = {
        "Authorization": f"Bearer {EURI_API_KEY}",
        "Content-Type": "application/json",
    }
    payload = {
        "model": "gpt-4.1-nano",
        "messages": [
            {"role": "system", "content": "You convert natural language to strict, runnable SQL for PostgreSQL."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0.0
    }
    try:
        resp = requests.post(EURI_API_URL, headers=headers, data=json.dumps(payload), timeout=90)
        resp.raise_for_status()
        data = resp.json()
        logging.info("EURI API call successful.")
        return data["choices"][0]["message"]["content"]
    except Exception as e:
        logging.error(f"EURI API call failed: {e}")
        raise

def execute_sql(engine: Engine, sql: str) -> pd.DataFrame:
    """
    Execute SQL and return a DataFrame (works for SELECT; for DDL/DML returns empty DF).
    """
    logging.info("Executing SQL query...")
    try:
        with engine.begin() as conn:
            result = conn.execute(text(sql))
            if result.returns_rows:
                df = pd.DataFrame(result.fetchall(), columns=result.keys())
            else:
                df = pd.DataFrame()  # e.g., CREATE TABLE / INSERT / UPDATE
        logging.info("SQL query executed successfully.")
        return df
    except Exception as e:
        logging.error(f"SQL execution failed: {e}")
        raise

In [6]:
# =========================
# 3) Run the pipeline
# =========================
try:
    # Create engine
    logging.info("Creating database engine...")
    engine = create_engine(DATABASE_URI)

    # Introspect schema
    schema_txt = get_db_schema(engine, DB_SCHEMA)

    # Build prompt
    template = load_prompt_template("prompt_template.txt")
    prompt = template.format(schema=schema_txt, question=nl_query)

    logging.info("\n==== Prompt sent to EURI ====\n")
    logging.info(prompt[:2000] + ("\n...\n" if len(prompt) > 2000 else "\n"))  # Log first ~2k chars

    # Get SQL from LLM
    logging.info("Calling EURI to generate SQL...")
    sql_query = call_euri_llm(prompt)
    logging.info("\n==== Generated SQL ====\n")
    logging.info(sql_query)

    # Execute and display
    logging.info("Running SQL...")
    df = execute_sql(engine, sql_query)
    if df.empty:
        logging.info("Query executed successfully. No data returned.")
    else:
        logging.info(f"Returned {len(df)} rows × {len(df.columns)} columns.")
        display(df)  # Jupyter display
except Exception as e:
    logging.error(f"Pipeline failed: {e}")

2025-09-07 14:42:02,704 - INFO - Creating database engine...
2025-09-07 14:42:02,792 - INFO - Introspecting database schema...
2025-09-07 14:42:13,516 - INFO - Schema introspection completed.
2025-09-07 14:42:13,518 - INFO - Loading prompt template from prompt_template.txt...
2025-09-07 14:42:13,519 - INFO - Prompt template loaded successfully.
2025-09-07 14:42:13,520 - INFO - 
==== Prompt sent to EURI ====

2025-09-07 14:42:13,521 - INFO - You are an advanced SQL assistant with expertise in PostgreSQL. 
Your task is to translate the following natural language query into a valid and optimized SQL query.

Schema:
SCHEMA: public
TABLE abh_test: order_id bigint, order_date date, customer_id bigint, full_name text, paid_amount numeric, paid_at timestamp with time zone, warehouse_id bigint, shipped_at timestamp with time zone, tracking_no text
TABLE abhra: t1 text, t2 text, t3 text
TABLE addresses: address_id bigint, customer_id bigint, line1 text, city text, state text, country text, posta

Unnamed: 0,cart_item_id,cart_id,product_id,quantity
0,1,1,2,3
1,2,2,3,4
2,3,3,4,1
3,4,4,5,2
4,5,5,6,3
...,...,...,...,...
5995,5996,2996,2998,3
5996,5997,2997,2999,4
5997,5998,2998,3000,1
5998,5999,2999,3001,2
