# Read Data

* Spider Dataset

* https://yale-lily.github.io/spider

In [1]:
from datasets import load_dataset

# Load Spider dataset
ds = load_dataset("spider")
db_ids = ["department_management", "farm", "aircraft", "architecture", "cinema"]

data = []

for db_id in db_ids:
    subset = ds["train"].filter(lambda x: x["db_id"] == db_id)
    questions = [entry["question"] for entry in subset]
    queries = [entry["query"] for entry in subset]

    # Combine into (db_id, question, query) triplets
    entries = [{"db_id": db_id, "question": q, "query": sql} for q, sql in zip(questions, queries)]

    # Add to overall list
    data.extend(entries)

# deepseek-coder-1.3b-base

In [2]:
import sqlparse
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TRANSFORMERS_CACHE

print(torch.cuda.is_available())
torch.cuda.empty_cache()

available_memory = torch.cuda.get_device_properties(0).total_memory
print(available_memory)

print(TRANSFORMERS_CACHE)

model_name = "deepseek-ai/deepseek-coder-1.3b-base"
cache_dir = "E:/Data File/transformers.cache"
tokenizer = AutoTokenizer.from_pretrained(model_name,cache_dir=cache_dir)

if available_memory > 6e9:
    # if you have atleast 6GB of GPU memory, run load the model in float16
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        trust_remote_code=True,
        torch_dtype=torch.float16,
        device_map="auto",
        use_cache=True,
        cache_dir=cache_dir
    )
else:
    # else, load in 8 bits – this is a bit slower
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        trust_remote_code=True,
        # torch_dtype=torch.float16,
        load_in_8bit=True,
        device_map="auto",
        use_cache=True,
        cache_dir=cache_dir
    )


True
17170956288
C:\Users\zly20\.cache\huggingface\hub


tokenizer_config.json:   0%|          | 0.00/793 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


tokenizer.json:   0%|          | 0.00/1.37M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/631 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.69G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.69G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/119 [00:00<?, ?B/s]

In [3]:
department_prompt = """
### Task
Generate a SQLite query to answer [QUESTION]{question}[/QUESTION]

### Instructions
- If you cannot answer the question with the available database schema, return 'I do not know'
- Pay close attention to the given database schema, note on how they can join together
- You do not need to generate your thought process but just the answer
- Your answer should end with '[/SQL]'

CREATE TABLE IF NOT EXISTS "department" (
"Department_ID" int,
"Name" text,
"Creation" text,
"Ranking" int,
"Budget_in_Billions" real,
"Num_Employees" real,
PRIMARY KEY ("Department_ID")
);

CREATE TABLE IF NOT EXISTS "head" (
"head_ID" int,
"name" text,
"born_state" text,
"age" real,
PRIMARY KEY ("head_ID")
);

CREATE TABLE IF NOT EXISTS "management" (
"department_ID" int,
"head_ID" int,
"temporary_acting" text,
PRIMARY KEY ("Department_ID","head_ID"),
FOREIGN KEY ("Department_ID") REFERENCES `department`("Department_ID"),
FOREIGN KEY ("head_ID") REFERENCES `head`("head_ID")
);

### Answer
Given the database schema, here is the SQLite query that answers [QUESTION]{question}[/QUESTION]
[SQL]
""" 


farm_prompt = """
### Task
Generate a SQLite query to answer [QUESTION]{question}[/QUESTION]

### Instructions
- If you cannot answer the question with the available database schema, return 'I do not know'
- Pay close attention to the given database schema, note on how they can join together
- You do not need to generate your thought process but just the answer
- Your answer should end with '[/SQL]'

CREATE TABLE "city" (
"City_ID" int,
"Official_Name" text,
"Status" text,
"Area_km_2" real,
"Population" real,
"Census_Ranking" text,
PRIMARY KEY ("City_ID")
);

CREATE TABLE "farm" (
"Farm_ID" int,
"Year" int,
"Total_Horses" real,
"Working_Horses" real,
"Total_Cattle" real,
"Oxen" real,
"Bulls" real,
"Cows" real,
"Pigs" real,
"Sheep_and_Goats" real,
PRIMARY KEY ("Farm_ID")
);

CREATE TABLE "farm_competition" (
"Competition_ID" int,
"Year" int,
"Theme" text,
"Host_city_ID" int,
"Hosts" text,
PRIMARY KEY ("Competition_ID"),
FOREIGN KEY (`Host_city_ID`) REFERENCES `city`(`City_ID`)
);


CREATE TABLE "competition_record" (
"Competition_ID" int,
"Farm_ID" int,
"Rank" int,
PRIMARY KEY ("Competition_ID","Farm_ID"),
FOREIGN KEY (`Competition_ID`) REFERENCES `farm_competition`(`Competition_ID`),
FOREIGN KEY (`Farm_ID`) REFERENCES `farm`(`Farm_ID`)
);

### Answer
Given the database schema, here is the SQLite query that answers [QUESTION]{question}[/QUESTION]
[SQL]
"""


aircraft_prompt = """
### Task
Generate a SQLite query to answer [QUESTION]{question}[/QUESTION]

### Instructions
- If you cannot answer the question with the available database schema, return 'I do not know'
- Pay close attention to the given database schema, note on how they can join together
- You do not need to generate your thought process but just the answer
- Your answer should end with '[/SQL]'

CREATE TABLE `pilot` (
  `Pilot_Id` int(11) NOT NULL,
  `Name` varchar(50) NOT NULL,
  `Age` int(11) NOT NULL,
  PRIMARY KEY (`Pilot_Id`)
);

CREATE TABLE `aircraft` (
  "Aircraft_ID" int(11) NOT NULL,
  "Aircraft" varchar(50) NOT NULL,
  "Description" varchar(50) NOT NULL,
  "Max_Gross_Weight" varchar(50) NOT NULL,
  "Total_disk_area" varchar(50) NOT NULL,
  "Max_disk_Loading" varchar(50) NOT NULL,
  PRIMARY KEY (`Aircraft_ID`)
);

CREATE TABLE `match` (
"Round" real,
"Location" text,
"Country" text,
"Date" text,
"Fastest_Qualifying" text,
"Winning_Pilot" text,
"Winning_Aircraft" text,
PRIMARY KEY ("Round"),
FOREIGN KEY (`Winning_Aircraft`) REFERENCES `aircraft`(`Aircraft_ID`),
FOREIGN KEY (`Winning_Pilot`) REFERENCES `pilot`(`Pilot_Id`)
);

CREATE TABLE `airport` (
"Airport_ID" int,
"Airport_Name" text,
"Total_Passengers" real,
"%_Change_2007" text,
"International_Passengers" real,
"Domestic_Passengers" real,
"Transit_Passengers" real,
"Aircraft_Movements" real,
"Freight_Metric_Tonnes" real,
PRIMARY KEY ("Airport_ID")
);

CREATE TABLE `airport_aircraft` (
"ID" int,
"Airport_ID" int,
"Aircraft_ID" int,
PRIMARY KEY ("Airport_ID","Aircraft_ID"),
FOREIGN KEY ("Airport_ID") REFERENCES `airport`(`Airport_ID`),
FOREIGN KEY ("Aircraft_ID") REFERENCES `aircraft`(`Aircraft_ID`)
);

### Answer
Given the database schema, here is the SQLite query that answers [QUESTION]{question}[/QUESTION]
[SQL]
"""


architecture_prompt = """
### Task
Generate a SQLite query to answer [QUESTION]{question}[/QUESTION]

### Instructions
- If you cannot answer the question with the available database schema, return 'I do not know'
- Pay close attention to the given database schema, note on how they can join together
- You do not need to generate your thought process but just the answer
- Your answer should end with '[/SQL]'

CREATE TABLE "architect" (
"id" text,
"name" text,
"nationality" text,
"gender" text,
primary key("id")
);

CREATE TABLE "bridge" (
"architect_id" int,
"id" int,
"name" text,
"location" text,
"length_meters" real,
"length_feet" real,
primary key("id"),
foreign key ("architect_id" ) references `architect`("id")
);

CREATE TABLE "mill" (
"architect_id" int,
"id" int,
"location" text,
"name" text,
"type" text,
"built_year" int,
"notes" text,
primary key ("id"),
foreign key ("architect_id" ) references `architect`("id")
);

### Answer
Given the database schema, here is the SQLite query that answers [QUESTION]{question}[/QUESTION]
[SQL]
"""


cinema_prompt = """
### Task
Generate a SQLite query to answer [QUESTION]{question}[/QUESTION]

### Instructions
- If you cannot answer the question with the available database schema, return 'I do not know'
- Pay close attention to the given database schema, note on how they can join together
- You do not need to generate your thought process but just the answer
- Your answer should end with '[/SQL]'

CREATE TABLE "film" (
"Film_ID" int,
"Rank_in_series" int,
"Number_in_season" int,
"Title" text,
"Directed_by" text,
"Original_air_date" text,
"Production_code" text,
PRIMARY KEY ("Film_ID")
);

CREATE TABLE "cinema" (
"Cinema_ID" int,
"Name" text,
"Openning_year" int,
"Capacity" int,
"Location" text,
PRIMARY KEY ("Cinema_ID"));

CREATE TABLE "schedule" (
"Cinema_ID" int,
"Film_ID" int,
"Date" text,
"Show_times_per_day" int,
"Price" float,
PRIMARY KEY ("Cinema_ID","Film_ID"),
FOREIGN KEY (`Film_ID`) REFERENCES `film`(`Film_ID`),
FOREIGN KEY (`Cinema_ID`) REFERENCES `cinema`(`Cinema_ID`)
);

### Answer
Given the database schema, here is the SQLite query that answers [QUESTION]{question}[/QUESTION]
[SQL]
"""

# Store each prompt and its corresponding DB name
dbs = [
    ("department_management", department_prompt),
    ("farm", farm_prompt),
    ("aircraft", aircraft_prompt),
    ("architecture", architecture_prompt),
    ("cinema", cinema_prompt),
]

In [None]:
import sqlite3
import json
import pandas as pd
from pathlib import Path
from tqdm import tqdm
import sqlparse

# Path config
def get_db_path(db_id):
    base_dir = Path(r"C:\Users\zly20\OneDrive - The University of Western Ontario\1B\CS 9860 Advanced Machine Learning\Final Project\CS_9860_Final_Project\data")
    return str(base_dir / f"{db_id}.sqlite")

# Run SQL and return DataFrame
def run_query_on_db(db_path, query):
    try:
        with sqlite3.connect(db_path) as conn:
            result = pd.read_sql_query(query, conn)
        return result
    except Exception as e:
        print("Query failed:", e)
        return None

# Accumulators
correct = 0
total = 0
bad_cases = []

# Per-dataset tracking
correct_by_db = {}
total_by_db = {}

# Loop through each database and use corresponding prompt
for db_id, prompt_template in dbs:
    subset = [item for item in data if item["db_id"] == db_id]
    db_path = get_db_path(db_id)

    correct_local = 0
    total_local = 0

    def generate_query(question):
        prompt = prompt_template.format(question=question)
        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
        generated_ids = model.generate(
            **inputs,
            num_return_sequences=1,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id,
            max_new_tokens=168,
            do_sample=False,
            num_beams=1,
        )
        outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
        return sqlparse.format(outputs[0].split("[SQL]")[1].split("[/SQL]")[0], reindent=True)

    print(f"🔍 Evaluating {db_id} ({len(subset)} questions)")
    for item in tqdm(subset, desc=f"{db_id}"):
        question = item["question"]
        gold_query = item["query"]

        try:
            pred_query = generate_query(question)

            gold_result = run_query_on_db(db_path, gold_query)
            pred_result = run_query_on_db(db_path, pred_query)

            if gold_result is not None and pred_result is not None:
                if gold_result.equals(pred_result):
                    correct += 1
                    correct_local += 1
                else:
                    bad_cases.append({
                        "db_id": db_id,
                        "question": question,
                        "gold_query": gold_query,
                        "pred_query": pred_query,
                        "error_type": "Mismatch",
                        "gold_result": gold_result.to_string(index=False),
                        "pred_result": pred_result.to_string(index=False)
                    })
            else:
                bad_cases.append({
                    "db_id": db_id,
                    "question": question,
                    "gold_query": gold_query,
                    "pred_query": pred_query,
                    "error_type": "ExecutionError",
                    "gold_result": str(gold_result),
                    "pred_result": str(pred_result)
                })

            total += 1
            total_local += 1

        except Exception as e:
            bad_cases.append({
                "db_id": db_id,
                "question": question,
                "gold_query": gold_query,
                "pred_query": "N/A",
                "error_type": f"Exception: {str(e)}",
                "gold_result": "N/A",
                "pred_result": "N/A"
            })
            total += 1
            total_local += 1
            continue

    correct_by_db[db_id] = correct_local
    total_by_db[db_id] = total_local
    print(f"✅ Accuracy for {db_id}: {correct_local}/{total_local} = {correct_local / total_local:.2%}")

safe_model_name = model_name.replace("/", "_")
output_filename = f"bad_cases_{safe_model_name}.json"

with open(output_filename, "w", encoding="utf-8") as f:
    json.dump(bad_cases, f, ensure_ascii=False, indent=2)

print("\n📊 Summary of Accuracy per Dataset:")
for db_id in correct_by_db:
    print(f" - {db_id}: {correct_by_db[db_id]}/{total_by_db[db_id]} = {correct_by_db[db_id] / total_by_db[db_id]:.2%}")

print(f"\n🎯 Final Accuracy: {correct}/{total} = {correct / total:.2%}")
print(f"Saved bad cases to {output_filename}")


🔍 Evaluating department_management (16 questions)


department_management: 100%|██████████| 16/16 [00:46<00:00,  2.90s/it]


✅ Accuracy for department_management: 1/16 = 6.25%
🔍 Evaluating farm (40 questions)


farm:   5%|▌         | 2/40 [00:05<01:36,  2.55s/it]

Query failed: Execution failed on sql '
SELECT COUNT(*)
FROM farm_competition
JOIN farm ON farm_competition.Farm_ID = farm.Farm_ID
JOIN city ON farm_competition.Host_city_ID = city.City_ID
WHERE city.Status = 'Capital'
GROUP BY farm_competition.Host_city_ID': no such column: farm_competition.Farm_ID


farm:  45%|████▌     | 18/40 [00:46<00:56,  2.55s/it]

Query failed: Execution failed on sql '
SELECT c.Official_Name,
       c.Status
FROM city c
JOIN farm f ON c.City_ID = f.Host_city_ID
GROUP BY c.City_ID
ORDER BY SUM(f.Total_Horses) DESC
LIMIT 1;': no such column: f.Host_city_ID


farm:  70%|███████   | 28/40 [01:12<00:31,  2.61s/it]

Query failed: Execution failed on sql '
SELECT city.Status,
       city.Population,
       city.City_ID,
       city.Official_Name,
       city.Area_km_2,
       city.Census_Ranking,
       farm.Total_Horses,
       farm.Working_Horses,
       farm.Total_Cattle,
       farm.Oxen,
       farm.Bulls,
       farm.Cows,
       farm.Pigs,
       farm.Sheep_and_Goats
FROM city
JOIN farm ON city.City_ID = farm.Host_city_ID
GROUP BY city.Status,
         city.Population,
         city.City_ID,
         city.Official_Name,
         city.Area_km_2,
         city.Census_Ranking,
         farm.Total_Horses,
         farm.Work': no such column: farm.Host_city_ID


farm:  90%|█████████ | 36/40 [01:33<00:11,  2.75s/it]

Query failed: Execution failed on sql '
SELECT c.Status
FROM city c
JOIN farm f ON c.City_ID = f.Host_city_ID
WHERE c.Population > 1500
  AND c.Population < 500
GROUP BY c.Status
HAVING COUNT(*) > 1': no such column: f.Host_city_ID


farm:  92%|█████████▎| 37/40 [01:36<00:07,  2.56s/it]

Query failed: Execution failed on sql '
SELECT c.Official_Name
FROM city c
JOIN farm f ON c.City_ID = f.Host_city_ID
WHERE c.Population > 1500
  OR c.Population < 500
GROUP BY c.Official_Name
ORDER BY c.Official_Name;': no such column: f.Host_city_ID


farm:  95%|█████████▌| 38/40 [01:38<00:04,  2.39s/it]

Query failed: Execution failed on sql '
SELECT c.Official_Name
FROM city c
JOIN farm f ON c.City_ID = f.Host_city_ID
WHERE c.Population > 1500
  OR c.Population < 500
GROUP BY c.Official_Name
ORDER BY c.Official_Name;': no such column: f.Host_city_ID


farm: 100%|██████████| 40/40 [01:42<00:00,  2.56s/it]


✅ Accuracy for farm: 7/40 = 17.50%
🔍 Evaluating aircraft (46 questions)


aircraft:  15%|█▌        | 7/46 [00:17<01:26,  2.21s/it]

Query failed: Execution failed on sql '
SELECT airport.Airport_Name,
       airport.Total_Passengers,
       airport.%_Change_2007,
       airport.International_Passengers,
       airport.Domestic_Passengers,
       airport.Transit_Passengers,
       airport.Aircraft_Movements,
       airport.Freight_Metric_Tonnes
FROM airport
JOIN airport_aircraft ON airport.Airport_ID = airport_aircraft.Airport_ID
JOIN aircraft ON aircraft.Aircraft_ID = airport_aircraft.Aircraft_ID
WHERE airport.Airport_Name = 'London Heathrow'
GROUP BY airport.Airport_Name;': near "%": syntax error


aircraft:  17%|█▋        | 8/46 [00:20<01:32,  2.42s/it]

Query failed: Execution failed on sql '
SELECT airport.Airport_Name,
       airport.Total_Passengers,
       airport.%_Change_2007,
       airport.International_Passengers,
       airport.Domestic_Passengers,
       airport.Transit_Passengers,
       airport.Aircraft_Movements,
       airport.Freight_Metric_Tonnes
FROM airport
JOIN airport_aircraft ON airport.Airport_ID = airport_aircraft.Airport_ID
JOIN aircraft ON aircraft.Aircraft_ID = airport_aircraft.Aircraft_ID
WHERE airport.Airport_Name = 'London Heathrow'
GROUP BY airport.Airport_Name;': near "%": syntax error


aircraft: 100%|██████████| 46/46 [01:58<00:00,  2.57s/it]


✅ Accuracy for aircraft: 12/46 = 26.09%
🔍 Evaluating architecture (17 questions)


architecture:  29%|██▉       | 5/17 [00:12<00:26,  2.21s/it]

Query failed: Execution failed on sql '
SELECT m.name,
       m.built_year
FROM mill m
JOIN bridge b ON m.id = b.mill_id
JOIN architect a ON a.id = b.architect_id
WHERE m.type = 'Grondzeiler'
GROUP BY m.name,
         m.built_year
ORDER BY m.built_year DESC;': no such column: b.mill_id


architecture:  41%|████      | 7/17 [00:16<00:21,  2.16s/it]

Query failed: Execution failed on sql '
SELECT m.name
FROM mill m
LEFT JOIN bridge b ON m.id = b.mill_id
WHERE b.location != 'Donceel'
GROUP BY m.name
HAVING COUNT(b.id) = 0': no such column: b.mill_id


architecture:  76%|███████▋  | 13/17 [00:30<00:08,  2.22s/it]

Query failed: Execution failed on sql '
SELECT `mill`.`name`
FROM `mill`
INNER JOIN `bridge` ON `mill`.`id` = `bridge`.`mill_id`
WHERE `bridge`.`name` LIKE '%Moulin%'
GROUP BY `mill`.`name`
LIMIT 1;': no such column: bridge.mill_id


architecture:  82%|████████▏ | 14/17 [00:32<00:06,  2.24s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT mill.name
FROM mill
JOIN bridge ON mill.id = bridge.mill_id
WHERE bridge.length_meters > 80
GROUP BY mill.name
HAVING COUNT(mill.name) > 1
ORDER BY mill.name;': no such column: bridge.mill_id


architecture: 100%|██████████| 17/17 [00:39<00:00,  2.33s/it]


✅ Accuracy for architecture: 5/17 = 29.41%
🔍 Evaluating cinema (30 questions)


cinema:  53%|█████▎    | 16/30 [00:36<00:34,  2.50s/it]

Query failed: Execution failed on sql '
SELECT film.Title,
       cinema.Directed_by
FROM film
INNER JOIN cinema ON film.Directed_by = cinema.Name': no such column: cinema.Directed_by


cinema:  57%|█████▋    | 17/30 [00:39<00:34,  2.64s/it]

Query failed: Execution failed on sql '
SELECT film.Title,
       cinema.Directed_by
FROM film
INNER JOIN schedule ON film.Film_ID = schedule.Film_ID
INNER JOIN cinema ON cinema.Cinema_ID = schedule.Cinema_ID': no such column: cinema.Directed_by


cinema:  87%|████████▋ | 26/30 [01:00<00:09,  2.38s/it]

Query failed: Execution failed on sql '
SELECT director,
       COUNT(*) AS total_show_times
FROM
  (SELECT director,
          film_id
   FROM film
   INNER JOIN schedule ON film.film_id = schedule.film_id
   GROUP BY director)
GROUP BY director
ORDER BY total_show_times DESC
LIMIT 1;': no such column: director


cinema: 100%|██████████| 30/30 [01:09<00:00,  2.31s/it]

✅ Accuracy for cinema: 6/30 = 20.00%

📊 Summary of Accuracy per Dataset:
 - department_management: 1/16 = 6.25%
 - farm: 7/40 = 17.50%
 - aircraft: 12/46 = 26.09%
 - architecture: 5/17 = 29.41%
 - cinema: 6/30 = 20.00%

🎯 Final Accuracy: 31/149 = 20.81%
Saved bad cases to bad_cases_deepseek-ai_deepseek-coder-1.3b-base.json



