In [2]:
from llama_cpp import Llama
import pandas as pd
import duckdb, os

In [4]:
# Path to your GGUF model
MODEL_PATH = "models/llama-3-sqlcoder-8b.Q6_K.gguf"

# llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=6)
llm = Llama(
    model_path=MODEL_PATH,
    n_ctx=1024,  # Lower context size if RAM is an issue
    n_threads=6,
    n_gpu_layers=20,  # Safer for 8GB GPU
    verbose=True
)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


In [5]:
# Load CSV into DuckDB
df = pd.read_csv("sales.csv")
con = duckdb.connect()
con.register("sales_data", df)

<duckdb.duckdb.DuckDBPyConnection at 0x1d480c8aaf0>

In [6]:
# Prompt template
def build_prompt(nlq):
    schema = "sales_data(region TEXT, quarter TEXT, sales INT)"
    prompt = f"""### You are an expert Postgres SQL generator.
### Given the following table schema:
# {schema}

### Write a SQL query to answer the question:
# {nlq}

### SQL:
"""
    return prompt

In [10]:
# Query model
def generate_sql(prompt):
    output = llm(prompt, temperature=0, max_tokens=256)
    text = output["choices"][0]["text"]

    if "SELECT" not in text.upper():
        print("❌ 'SELECT' not found in model output. Raw output:")
        print(text)
        return None

    # Try to extract SQL statement cleanly
    try:
        sql = "SELECT " + text.upper().split("SELECT", 1)[1].split(";")[0].strip() + ";"
        return sql
    except Exception as e:
        print("❌ Error while parsing SQL:", e)
        print("Raw model output:")
        print(text)
        return None

In [8]:
# Run query
def run_nlq(nlq):
    prompt = build_prompt(nlq)
    sql = generate_sql(prompt)

    if not sql:
        print("\n⚠️ Could not generate valid SQL.")
        return

    print("\n📜 Generated SQL:")
    print(sql)

    try:
        result = con.execute(sql).fetchdf()
        print("\n📊 Query Result:")
        print(result)
    except Exception as e:
        print("\n❌ SQL Execution Error:")
        print(e)

In [11]:
run_nlq("What were the total sales in Q3 for the Northeast?")

Llama.generate: prefix-match hit



📜 Generated SQL:
SELECT SUM(SALES) AS TOTAL_SALES FROM SALES_DATA WHERE REGION = 'NORTHEAST' AND QUARTER = 'Q3';

📊 Query Result:
   TOTAL_SALES
0          NaN


In [15]:
print(con.execute("SELECT * FROM sales_data LIMIT 5").fetchdf())

      region quarter  sales
0  Northeast      Q1  10000
1  Northeast      Q2  15000
2  Northeast      Q3  18000
3      South      Q1   8000
4      South      Q2   9500


In [19]:
sql = "SELECT SUM(SALES) AS TOTAL_SALES FROM SALES_DATA WHERE REGION = 'Northeast' AND QUARTER = 'Q3';"
print("\n📜 Executing SQL:")
con.execute(sql).fetchdf()


📜 Executing SQL:


Unnamed: 0,TOTAL_SALES
0,18000.0
