# Read Data

* Spider Dataset

* https://yale-lily.github.io/spider

In [1]:
from datasets import load_dataset

# Load Spider dataset
ds = load_dataset("spider")
db_ids = ["department_management", "farm", "aircraft", "architecture", "cinema"]

data = []

for db_id in db_ids:
    subset = ds["train"].filter(lambda x: x["db_id"] == db_id)
    questions = [entry["question"] for entry in subset]
    queries = [entry["query"] for entry in subset]

    # Combine into (db_id, question, query) triplets
    entries = [{"db_id": db_id, "question": q, "query": sql} for q, sql in zip(questions, queries)]

    # Add to overall list
    data.extend(entries)

# DeepSeek-R1-Distill-Qwen-7B

In [None]:
import sqlparse
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TRANSFORMERS_CACHE

print(torch.cuda.is_available())
torch.cuda.empty_cache()

available_memory = torch.cuda.get_device_properties(0).total_memory
print(available_memory)

print(TRANSFORMERS_CACHE)

model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
cache_dir = "E:/Data File/transformers.cache"
tokenizer = AutoTokenizer.from_pretrained(model_name,cache_dir=cache_dir)

if available_memory > 17e9:
    # if you have atleast 17GB of GPU memory, run load the model in float16
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        trust_remote_code=True,
        torch_dtype=torch.float16,
        device_map="auto",
        use_cache=True,
        cache_dir=cache_dir
    )
else:
    # else, load in 8 bits – this is a bit slower
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        trust_remote_code=True,
        # torch_dtype=torch.float16,
        load_in_8bit=True,
        device_map="auto",
        use_cache=True,
        cache_dir=cache_dir
    )


True
17170956288
C:\Users\zly20\.cache\huggingface\hub


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
department_prompt = """
### Task
Generate a SQLite query to answer [QUESTION]{question}[/QUESTION]

### Instructions
- If you cannot answer the question with the available database schema, return 'I do not know'
- Pay close attention to the given database schema, note on how they can join together
- You do not need to generate your thought process but just the answer
- Your answer should end with '[/SQL]'

CREATE TABLE IF NOT EXISTS "department" (
"Department_ID" int,
"Name" text,
"Creation" text,
"Ranking" int,
"Budget_in_Billions" real,
"Num_Employees" real,
PRIMARY KEY ("Department_ID")
);

CREATE TABLE IF NOT EXISTS "head" (
"head_ID" int,
"name" text,
"born_state" text,
"age" real,
PRIMARY KEY ("head_ID")
);

CREATE TABLE IF NOT EXISTS "management" (
"department_ID" int,
"head_ID" int,
"temporary_acting" text,
PRIMARY KEY ("Department_ID","head_ID"),
FOREIGN KEY ("Department_ID") REFERENCES `department`("Department_ID"),
FOREIGN KEY ("head_ID") REFERENCES `head`("head_ID")
);

### Answer
Given the database schema, here is the SQLite query that answers [QUESTION]{question}[/QUESTION]
[SQL]
""" 


farm_prompt = """
### Task
Generate a SQLite query to answer [QUESTION]{question}[/QUESTION]

### Instructions
- If you cannot answer the question with the available database schema, return 'I do not know'
- Pay close attention to the given database schema, note on how they can join together
- You do not need to generate your thought process but just the answer
- Your answer should end with '[/SQL]'

CREATE TABLE "city" (
"City_ID" int,
"Official_Name" text,
"Status" text,
"Area_km_2" real,
"Population" real,
"Census_Ranking" text,
PRIMARY KEY ("City_ID")
);

CREATE TABLE "farm" (
"Farm_ID" int,
"Year" int,
"Total_Horses" real,
"Working_Horses" real,
"Total_Cattle" real,
"Oxen" real,
"Bulls" real,
"Cows" real,
"Pigs" real,
"Sheep_and_Goats" real,
PRIMARY KEY ("Farm_ID")
);

CREATE TABLE "farm_competition" (
"Competition_ID" int,
"Year" int,
"Theme" text,
"Host_city_ID" int,
"Hosts" text,
PRIMARY KEY ("Competition_ID"),
FOREIGN KEY (`Host_city_ID`) REFERENCES `city`(`City_ID`)
);


CREATE TABLE "competition_record" (
"Competition_ID" int,
"Farm_ID" int,
"Rank" int,
PRIMARY KEY ("Competition_ID","Farm_ID"),
FOREIGN KEY (`Competition_ID`) REFERENCES `farm_competition`(`Competition_ID`),
FOREIGN KEY (`Farm_ID`) REFERENCES `farm`(`Farm_ID`)
);

### Answer
Given the database schema, here is the SQLite query that answers [QUESTION]{question}[/QUESTION]
[SQL]
"""


aircraft_prompt = """
### Task
Generate a SQLite query to answer [QUESTION]{question}[/QUESTION]

### Instructions
- If you cannot answer the question with the available database schema, return 'I do not know'
- Pay close attention to the given database schema, note on how they can join together
- You do not need to generate your thought process but just the answer
- Your answer should end with '[/SQL]'

CREATE TABLE `pilot` (
  `Pilot_Id` int(11) NOT NULL,
  `Name` varchar(50) NOT NULL,
  `Age` int(11) NOT NULL,
  PRIMARY KEY (`Pilot_Id`)
);

CREATE TABLE `aircraft` (
  "Aircraft_ID" int(11) NOT NULL,
  "Aircraft" varchar(50) NOT NULL,
  "Description" varchar(50) NOT NULL,
  "Max_Gross_Weight" varchar(50) NOT NULL,
  "Total_disk_area" varchar(50) NOT NULL,
  "Max_disk_Loading" varchar(50) NOT NULL,
  PRIMARY KEY (`Aircraft_ID`)
);

CREATE TABLE `match` (
"Round" real,
"Location" text,
"Country" text,
"Date" text,
"Fastest_Qualifying" text,
"Winning_Pilot" text,
"Winning_Aircraft" text,
PRIMARY KEY ("Round"),
FOREIGN KEY (`Winning_Aircraft`) REFERENCES `aircraft`(`Aircraft_ID`),
FOREIGN KEY (`Winning_Pilot`) REFERENCES `pilot`(`Pilot_Id`)
);

CREATE TABLE `airport` (
"Airport_ID" int,
"Airport_Name" text,
"Total_Passengers" real,
"%_Change_2007" text,
"International_Passengers" real,
"Domestic_Passengers" real,
"Transit_Passengers" real,
"Aircraft_Movements" real,
"Freight_Metric_Tonnes" real,
PRIMARY KEY ("Airport_ID")
);

CREATE TABLE `airport_aircraft` (
"ID" int,
"Airport_ID" int,
"Aircraft_ID" int,
PRIMARY KEY ("Airport_ID","Aircraft_ID"),
FOREIGN KEY ("Airport_ID") REFERENCES `airport`(`Airport_ID`),
FOREIGN KEY ("Aircraft_ID") REFERENCES `aircraft`(`Aircraft_ID`)
);

### Answer
Given the database schema, here is the SQLite query that answers [QUESTION]{question}[/QUESTION]
[SQL]
"""


architecture_prompt = """
### Task
Generate a SQLite query to answer [QUESTION]{question}[/QUESTION]

### Instructions
- If you cannot answer the question with the available database schema, return 'I do not know'
- Pay close attention to the given database schema, note on how they can join together
- You do not need to generate your thought process but just the answer
- Your answer should end with '[/SQL]'

CREATE TABLE "architect" (
"id" text,
"name" text,
"nationality" text,
"gender" text,
primary key("id")
);

CREATE TABLE "bridge" (
"architect_id" int,
"id" int,
"name" text,
"location" text,
"length_meters" real,
"length_feet" real,
primary key("id"),
foreign key ("architect_id" ) references `architect`("id")
);

CREATE TABLE "mill" (
"architect_id" int,
"id" int,
"location" text,
"name" text,
"type" text,
"built_year" int,
"notes" text,
primary key ("id"),
foreign key ("architect_id" ) references `architect`("id")
);

### Answer
Given the database schema, here is the SQLite query that answers [QUESTION]{question}[/QUESTION]
[SQL]
"""


cinema_prompt = """
### Task
Generate a SQLite query to answer [QUESTION]{question}[/QUESTION]

### Instructions
- If you cannot answer the question with the available database schema, return 'I do not know'
- Pay close attention to the given database schema, note on how they can join together
- You do not need to generate your thought process but just the answer
- Your answer should end with '[/SQL]'

CREATE TABLE "film" (
"Film_ID" int,
"Rank_in_series" int,
"Number_in_season" int,
"Title" text,
"Directed_by" text,
"Original_air_date" text,
"Production_code" text,
PRIMARY KEY ("Film_ID")
);

CREATE TABLE "cinema" (
"Cinema_ID" int,
"Name" text,
"Openning_year" int,
"Capacity" int,
"Location" text,
PRIMARY KEY ("Cinema_ID"));

CREATE TABLE "schedule" (
"Cinema_ID" int,
"Film_ID" int,
"Date" text,
"Show_times_per_day" int,
"Price" float,
PRIMARY KEY ("Cinema_ID","Film_ID"),
FOREIGN KEY (`Film_ID`) REFERENCES `film`(`Film_ID`),
FOREIGN KEY (`Cinema_ID`) REFERENCES `cinema`(`Cinema_ID`)
);

### Answer
Given the database schema, here is the SQLite query that answers [QUESTION]{question}[/QUESTION]
[SQL]
"""

# Store each prompt and its corresponding DB name
dbs = [
    ("department_management", department_prompt),
    ("farm", farm_prompt),
    ("aircraft", aircraft_prompt),
    ("architecture", architecture_prompt),
    ("cinema", cinema_prompt),
]

In [4]:
import sqlite3
import json
import pandas as pd
from pathlib import Path
from tqdm import tqdm
import sqlparse

# Path config
def get_db_path(db_id):
    base_dir = Path(r"C:\Users\zly20\OneDrive - The University of Western Ontario\1B\CS 9860 Advanced Machine Learning\Final Project\CS_9860_Final_Project\data")
    return str(base_dir / f"{db_id}.sqlite")

# Run SQL and return DataFrame
def run_query_on_db(db_path, query):
    try:
        with sqlite3.connect(db_path) as conn:
            result = pd.read_sql_query(query, conn)
        return result
    except Exception as e:
        print("Query failed:", e)
        return None

# Accumulators
correct = 0
total = 0
bad_cases = []

# Per-dataset tracking
correct_by_db = {}
total_by_db = {}

# Loop through each database and use corresponding prompt
for db_id, prompt_template in dbs:
    subset = [item for item in data if item["db_id"] == db_id]
    db_path = get_db_path(db_id)

    correct_local = 0
    total_local = 0

    def generate_query(question):
        prompt = prompt_template.format(question=question)
        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
        generated_ids = model.generate(
            **inputs,
            num_return_sequences=1,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id,
            max_new_tokens=168,
            do_sample=False,
            num_beams=1,
        )
        outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
        return sqlparse.format(outputs[0].split("[SQL]")[1].split("[/SQL]")[0], reindent=True)

    print(f"🔍 Evaluating {db_id} ({len(subset)} questions)")
    for item in tqdm(subset, desc=f"{db_id}"):
        question = item["question"]
        gold_query = item["query"]

        try:
            pred_query = generate_query(question)

            gold_result = run_query_on_db(db_path, gold_query)
            pred_result = run_query_on_db(db_path, pred_query)

            if gold_result is not None and pred_result is not None:
                if gold_result.equals(pred_result):
                    correct += 1
                    correct_local += 1
                else:
                    bad_cases.append({
                        "db_id": db_id,
                        "question": question,
                        "gold_query": gold_query,
                        "pred_query": pred_query,
                        "error_type": "Mismatch",
                        "gold_result": gold_result.to_string(index=False),
                        "pred_result": pred_result.to_string(index=False)
                    })
            else:
                bad_cases.append({
                    "db_id": db_id,
                    "question": question,
                    "gold_query": gold_query,
                    "pred_query": pred_query,
                    "error_type": "ExecutionError",
                    "gold_result": str(gold_result),
                    "pred_result": str(pred_result)
                })

            total += 1
            total_local += 1

        except Exception as e:
            bad_cases.append({
                "db_id": db_id,
                "question": question,
                "gold_query": gold_query,
                "pred_query": "N/A",
                "error_type": f"Exception: {str(e)}",
                "gold_result": "N/A",
                "pred_result": "N/A"
            })
            total += 1
            total_local += 1
            continue

    correct_by_db[db_id] = correct_local
    total_by_db[db_id] = total_local
    print(f"✅ Accuracy for {db_id}: {correct_local}/{total_local} = {correct_local / total_local:.2%}")

safe_model_name = model_name.replace("/", "_")
output_filename = f"bad_cases_{safe_model_name}.json"

with open(output_filename, "w", encoding="utf-8") as f:
    json.dump(bad_cases, f, ensure_ascii=False, indent=2)

print("\n📊 Summary of Accuracy per Dataset:")
for db_id in correct_by_db:
    print(f" - {db_id}: {correct_by_db[db_id]}/{total_by_db[db_id]} = {correct_by_db[db_id] / total_by_db[db_id]:.2%}")

print(f"\n🎯 Final Accuracy: {correct}/{total} = {correct / total:.2%}")
print(f"Saved bad cases to {output_filename}")


🔍 Evaluating department_management (16 questions)


department_management:  19%|█▉        | 3/16 [00:17<01:08,  5.24s/it]

Query failed: Execution failed on sql '
SELECT dCreation,
       Name,
       Budget_in_Billions
FROM department;': no such column: dCreation


department_management:  50%|█████     | 8/16 [01:08<01:20, 10.01s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT head.born_state AS state_name
FROM head
JOIN management ON head.head_ID = management.head_ID
JOIN department ON management.department_ID = department.Department_ID
WHERE managementtemporary_acting = 'at least 3 heads'
GROUP BY state_name
ORDER BY state_name;': no such column: managementtemporary_acting


department_management:  56%|█████▋    | 9/16 [01:20<01:14, 10.67s/it]

Query failed: Execution failed on sql '
SELECT MINCreation
FROM department
WHERE MINCreation =
    (SELECT MINCreation
     FROM department
     GROUP BY Department_ID MIN)': near "MIN": syntax error


department_management:  88%|████████▊ | 14/16 [02:05<00:18,  9.35s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT m2.born_state
FROM management m1
JOIN management m2 ON m1.department_ID = m2.department_ID
AND m1.head_ID != m2.head_ID
WHERE m1.Name = 'Treasury'
  AND m2.Name = 'Homeland Security';': no such column: m2.born_state


department_management:  94%|█████████▍| 15/16 [02:17<00:10, 10.09s/it]

Query failed: Execution failed on sql '
SELECT d.D部ID,
       d.Name,
       COUNT(m.head_ID) as Num_Heads
FROM department d
JOIN management m ON d.D部ID = m.department_ID
GROUP BY d.D部ID,
         d.Name
HAVING Num_Heads > 1;': no such column: d.D部ID


department_management: 100%|██████████| 16/16 [02:24<00:00,  9.03s/it]


✅ Accuracy for department_management: 5/16 = 31.25%
🔍 Evaluating farm (40 questions)


farm:   8%|▊         | 3/40 [00:21<04:57,  8.04s/it]

Query failed: Execution failed on sql '
SELECT distinct Total_Horses
FROM farm,
     farm_competition,
     competition_record
WHERE farm.City_ID = farm_competition.Host_city_ID
  AND farm_competition.Host_city_ID = competition_record.Host_city_ID
  AND competition_record.Farm_ID = farm.Farm_ID
ORDER BY Total_Horses ASC;': no such column: farm.City_ID


farm:  12%|█▎        | 5/40 [00:40<05:20,  9.16s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT city.Official_Name AS Host_City
FROM city
JOIN farm_competition ON city.City_ID = farm_competition.Host_city_ID
JOIN competition_record ON farm_competition.Competition_ID = competition_record.Competition_ID
AND competition_record.Farm_ID = farm_competition.Farm_ID
WHERE farm_competition.Theme NOT LIKE '%Aliens%';

[Wait, but the table 'farm' doesn't have a 'Theme' column. So the 'farm_competition' table has the 'Theme' column. So the query is correct as it references farm_competition.Theme.] </think> To find the hosts of competitions not themed around "Aliens",
                                                                                                                                                                                                                                                  we
join the relevant tables
and filter by the theme condition.': no such column: farm_competition.Farm_ID


farm:  15%|█▌        | 6/40 [00:52<05:36,  9.89s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT city.Official_Name AS Host_City
FROM city
JOIN farm_competition ON city.City_ID = farm_competition.Host_city_ID
JOIN competition_record ON farm_competition.Competition_ID = competition_record.Competition_ID
AND competition_record.Farm_ID = farm_competition.Farm_ID
WHERE farm_competition.Theme != 'Aliens' [;/SQL] </think> To find the hosts of competitions where the theme is not Aliens,
                                                                                                                   we need to
  join the relevant tables. The `farm_competition` table links to `city`
  and `competition_record`. We'll filter by `Theme != 'Aliens'`.': near "[;/SQL]": syntax error


farm:  18%|█▊        | 7/40 [01:03<05:43, 10.42s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT fThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeTheme
FROM farm_competition
WHERE fThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeThemeTheme
  FROM competition_record
GROUP BY Year
ORDER BY Year ASC;': near "FROM": syntax error


farm:  20%|██        | 8/40 [01:15<05:48, 10.89s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT Theme
FROM farm_competition,
     competition_record
WHERE farm_competition.Host_city_ID = city.City_ID
  AND competition_record.Farm_ID = farm.Farm_ID
  AND competition_record.Competition_ID = farm_competition.Competition_ID
ORDER BY Year ASC;': no such column: city.City_ID


farm:  45%|████▌     | 18/40 [02:36<03:12,  8.75s/it]

Query failed: Execution failed on sql '
SELECT c.Official_Name,
       c.Status
FROM city c
JOIN farm_competition fc ON c.City_ID = fc.Host_city_ID
JOIN competition_record cr ON fc.Competition_ID = cr.Competition_ID
AND c.City_ID = cr.Host_city_ID
GROUP BY c.Official_Name,
         c.Status
ORDER BY c.Population DESC
LIMIT 1;': no such column: cr.Host_city_ID


farm:  48%|████▊     | 19/40 [02:47<03:17,  9.42s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT year,
                city.Official_Name
FROM city,
     competition_record,
     farm_competition
WHERE city.City_ID = competition_record.Host_city_ID
  AND competition_record.Year = farm_competition.Year
  AND competition_record.Farm_ID = farm.farm_ID;

Wait,
no,
that's not correct. Let me think again.

I need to get the years and the host city's official names. So,
                                                     I should
join the competition_record with farm_competition to get the year
and the farm details,
             then
join with city to get the official name. So,
                                   the correct SQL should be:
SELECT year,
       city.Official_Name
FROM competition_record,
     farm_competition,
     city
WHERE competition_record.Host_city_ID = farm_competition.Host_city_ID
  AND competition_record.Year = farm_competition.Year
  AND farm_competition.Host_city_ID = city.City_ID;

Wait,
but': no such colu

farm:  50%|█████     | 20/40 [02:59<03:20, 10.04s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT c.City_ID,
                f.Year,
                c.Official_Name
FROM city c
JOIN farm_competition fc ON c.City_ID = fc.Host_city_ID
JOIN competition_record cr ON fc.Competition_ID = cr.Competition_ID
WHERE cr.Rank = 1;': no such column: f.Year


farm:  52%|█████▎    | 21/40 [03:10<03:14, 10.24s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT city.Official_Name
FROM city,
     competition_record,
     farm_competition
WHERE city.City_ID = competition_record.Host_city_ID
  AND competition_record.farm_id = farm_competition.Farm_ID
  AND competition_record.Theme LIKE '%more than one%';

Wait,
no,
that might not be correct. Let me think again. I need to find cities that have hosted
more than one competition. So,
          I should
group by the city
and count the number of competitions they've hosted. If the count is
more than one,
          include them. So,
                  the query should
join city with competition_record via Host_city_ID,
                                      then
group by city
and count the number of competitions. Then,
select cities
where the count is greater than one. The correct SQL would be:
  SELECT city.Official_Name
  FROM city,
       competition_record,
       farm_competition WHERE city.City': no such column: competition_record.Host_city_ID

farm:  55%|█████▌    | 22/40 [03:21<03:08, 10.46s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT city.Official_Name
FROM city,
     competition_record,
     farm_competition,
     competition_record c2
WHERE city.City_ID = competition_record.Host_city_ID
  AND competition_record.Official_Name = c2.Official_Name
  AND competition_record.Year = c2.Year
  AND city.Official_Name != c2.Official_Name;

Wait,
no,
that's not correct. Let me think again. I need to find cities
where they have hosted
  more than one competition. So,
            I should look
  for cities that have multiple entries in the competition_record table. Each entry in competition_record has a Host_city_ID,
                                                                                                              which references city. So,
  for each city,
           I need to count how many times it appears in competition_record. If the count is
  more than one,
            include its official name. So,
                                 the query should
  sel

farm:  62%|██████▎   | 25/40 [03:53<02:38, 10.55s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT fTheme
FROM farm_competition f
JOIN city c ON f.Host_city_ID = c.City_ID
WHERE c.Population > 1000;': no such column: fTheme


farm:  65%|██████▌   | 26/40 [04:03<02:25, 10.42s/it]

Query failed: Execution failed on sql '
SELECT fTheme
FROM farm_competition f
JOIN city c ON f.Host_city_ID = c.City_ID
WHERE c.Population > 1000;': no such column: fTheme


farm:  68%|██████▊   | 27/40 [04:15<02:21, 10.88s/it]

Query failed: Execution failed on sql '
SELECT city.Status,
       AVG(farm.cityPopulation) as AveragePopulation
FROM city
JOIN farm_competition ON city.City_ID = farm_competition.Host_city_ID
JOIN competition_record ON farm_competition.Competition_ID = competition_record.Competition_ID
JOIN farm ON competition_record.Farm_ID = farm.Farm_ID
GROUP BY city.Status': no such column: farm.cityPopulation


farm:  70%|███████   | 28/40 [04:29<02:19, 11.64s/it]

Query failed: Execution failed on sql '
SELECT city."City_ID",
       city."Official_Name",
       city."Status",
       AVG(farm."Population") AS "Average Population"
FROM city
JOIN farm_competition ON city."City_ID" = farm_competition."Host_city_ID"
JOIN farm ON farm."Farm_ID" = farm_competition."Farm_ID"
GROUP BY city."City_ID",
         city."Official_Name",
         city."Status"': no such column: farm.Population


farm:  72%|███████▎  | 29/40 [04:37<01:57, 10.71s/it]

Query failed: Execution failed on sql '
SELECT distinct Status
FROM city
ORDER BY COUNT(Status) DESC;': misuse of aggregate: COUNT()


farm:  90%|█████████ | 36/40 [05:39<00:38,  9.58s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT s.Status
FROM city c
JOIN farm_competition fc ON c.City_ID = fc.Host_city_ID
JOIN competition_record cr ON fc.Competition_ID = cr.Competition_ID
WHERE cr.Rank IN
    (SELECT COUNT(DISTINCT cr.Rank)
     FROM competition_record cr
     WHERE cr.Population > 1500)
  OR cr.Rank IN
    (SELECT COUNT(DISTINCT cr.Rank)
     FROM competition_record cr
     WHERE cr.Population < 500);

[Wait,
 no,
 that's not correct. Let me think again. The question is about cities, not farms or competitions. So I need to filter the cities based on their population. But the way I'm joining might not be correct. Maybe I should first get the cities with population over 1500
and those with less': no such column: s.Status




Query failed: Execution failed on sql '
SELECT c.City_ID,
       c.Official_Name,
       c.Census_Ranking
FROM city c
WHERE c.Status != 'Village'
ORDER BY c.Census_Ranking;

Wait,
but the question is about the ranking,
                              so perhaps the ranking is a text field?
Or maybe it's a numeric field. If it's a text, ordering by it might not make sense. Maybe the ranking is an integer. Let me check the tables again. The city table has a column named "Census_Ranking" which is a text. Hmm,
                                                                                                                                                                                                                that's odd. Maybe it's supposed to be a number. Alternatively,
                                                                                                                                                                                                                           

farm: 100%|██████████| 40/40 [06:19<00:00,  9.49s/it]


✅ Accuracy for farm: 7/40 = 17.50%
🔍 Evaluating aircraft (46 questions)


aircraft:  20%|█▉        | 9/46 [01:08<05:29,  8.92s/it]

Query failed: Execution failed on sql '
SELECT SUM(airport.Domestic_Passengers) as Total_Domestic_Passengers
FROM airport
JOIN match ON airport.Airport_ID = match.Location
JOIN aircraft ON match.Aircraft_ID = aircraft.Aircraft_ID
LEFT JOIN pilot ON match.Winning_Pilot = pilot.Name
WHERE airport.Airport_Name LIKE '%London%';': no such column: match.Aircraft_ID


aircraft:  22%|██▏       | 10/46 [01:21<06:00, 10.00s/it]

Query failed: Execution failed on sql '
SELECT SUM(airport.Domestic_Passengers) as Total_Domestic_Passengers
FROM airport
WHERE airport.Airport_ID IN
    (SELECT a.airport_id
     FROM airport_aircraft a
     WHERE a.airport_id =
         (SELECT airport_id
          FROM match m
          WHERE m.Location = 'London')
       OR a.airport_id =
         (SELECT airport_id
          FROM match m
          WHERE m.Location = 'London')) Wait,
                            no,
                            that's not correct. Let me think again.

I need to get all the domestic passengers from all London airports. So first, I need to find all the airports in London. Then, for each of those airports, get their Domestic_Passengers. Then sum them all.

So, step 1: Find all airports where the airport is in London. How? By matching the 'Location' in the 'match': near "Wait": syntax error


aircraft:  41%|████▏     | 19/46 [02:39<04:20,  9.65s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT a.aircraft
FROM airport a,
     aircraft ac
WHERE a.airport_id = ac.airport_id
  AND a.airport_name = 'London Gatwick';': no such column: a.aircraft


aircraft:  43%|████▎     | 20/46 [02:51<04:27, 10.27s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT a.aircraft
FROM airport_aircraft a,
     aircraft b
WHERE a.airport_id = b.airport_id
  AND a.airport_id =
    (SELECT airport_id
     FROM airport
     WHERE airport_name = 'London Gatwick');': no such column: a.aircraft


aircraft:  46%|████▌     | 21/46 [03:01<04:14, 10.18s/it]

Query failed: Execution failed on sql '
SELECT a.aircraft
FROM airport a
JOIN airport_aircraft aa ON a.airport_id = aa.airport_id
JOIN aircraft ac ON aa.aircraft_id = ac.aircraft_id
WHERE a.Total_Passengers > 10000000;': no such column: a.aircraft


aircraft:  52%|█████▏    | 24/46 [03:37<04:12, 11.48s/it]

Query failed: Execution failed on sql '
SELECT ROUND(AVG("airport_aircraft"."Total_Passengers")) AS "Average Total Passengers"
FROM `airport_aircraft`
JOIN `aircraft` ON `airport_aircraft"."Aircraft_ID` = `aircraft"."Aircraft_ID`
WHERE `airport_aircraft"."Aircraft_ID` = 'Robinson R-22'
GROUP BY `airport_aircraft"."Airport_ID`': no such column: airport_aircraft.Total_Passengers


aircraft:  54%|█████▍    | 25/46 [03:44<03:32, 10.13s/it]

Query failed: Execution failed on sql '
SELECT a.Location,
       ac.Aircraft
FROM airport a
JOIN aircraft ac ON a.Airport_ID = ac.Aircraft_ID': no such column: a.Location


aircraft:  57%|█████▋    | 26/46 [03:55<03:23, 10.17s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT a.Location,
                a.Airport_Name
FROM `airport` a
JOIN `airport_aircraft` b ON a.Airport_ID = b.Airport_ID
AND b.Aircraft_ID =
  (SELECT aircraft_id
   FROM `match`
   WHERE `Winning_Aircraft` = a.`Aircraft`)
WHERE a.`Airport_ID` IN
    (SELECT b.Airport_ID
     FROM `match`
     WHERE `Winning_Aircraft` = a.`Aircraft`);': no such column: a.Location


aircraft:  65%|██████▌   | 30/46 [04:32<02:36,  9.77s/it]

Query failed: Execution failed on sql '
SELECT a.Aircraft,
       COUNT(m.Winning_Aircraft) as NumberOfWins
FROM aircraft a
JOIN match m ON a.Aircraft_ID = m.Winnin Wait,
                that's not correct. Let me think again. I need to
join the match table with the aircraft table. The match table has a
foreign key to aircraft,
               so I should
join on m.Aircraft_ID = a.Aircraft_ID. So the correct SQL should be:
SELECT a.Aircraft,
       COUNT(m.Winning_Aircraft) as NumberOfWins
FROM aircraft a
JOIN match m ON a.Aircraft_ID = m.Aircraft_ID
GROUP BY a.Aircraft_ID': near "Wait": syntax error


aircraft:  76%|███████▌  | 35/46 [05:11<01:36,  8.76s/it]

Query failed: Execution failed on sql '
SELECT a.Aircraft
FROM aircraft a
WHERE a.Aircraft_ID NOT IN
    (SELECT m.Aircraft_ID
     FROM match m)': no such column: m.Aircraft_ID


aircraft:  78%|███████▊  | 36/46 [05:21<01:32,  9.24s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT a.aircraft
FROM aircraft a,
     match m
WHERE a.aircraft_id = m.aircraft_id
  AND m.winning_aircraft IS NULL;': no such column: m.aircraft_id


aircraft:  89%|████████▉ | 41/46 [06:07<00:44,  8.94s/it]

Query failed: Execution failed on sql '
SELECT Name,
       Age
FROM pilot
WHERE Age < 30
  AND
    (SELECT COUNT(*)
     FROM match
     WHERE Winning_Pilot = pilot.Name) =
    (SELECT MAX(COUNT(*))
     FROM
       (SELECT COUNT(*) AS Total_Wins
        FROM match
        WHERE Winning_Pilot = pilot.Name) AS Total_Wins_Pilot);

Wait,
that's a subquery within a subquery. Maybe I should use a Common Table Expression (CTE) to make it cleaner. Let me rewrite it.': misuse of aggregate function COUNT()


aircraft:  93%|█████████▎| 43/46 [06:28<00:29,  9.95s/it]

Query failed: Execution failed on sql '
SELECT `Name`,
       `Age`
FROM `pilot`
WHERE `Pilot_Id` IN
    (SELECT `Winning_Pilot`
     FROM `match`
     ORDER BY `Age` ASC
     LIMIT 1);': no such column: Age


aircraft:  96%|█████████▌| 44/46 [06:39<00:20, 10.22s/it]

Query failed: Execution failed on sql '
SELECT MIN(pilot.Age) AS "Youngest Pilot Age",
       pilot.Name
FROM pilot
JOIN match ON pilot.Pilot_Id = match.Winning_Pilot
WHERE match.Winning_Aircraft IN
    (SELECT MIN(aircraft.Max_Gross_Weight) AS "Min Max Weight",
     FROM aircraft) Wait,
                                no,
                                that's not right. Let me think again.

I need to find the youngest pilot, so I should select the pilot with the minimum age. The pilot's ID is linked to the match table via the winning pilot. The winning pilot's aircraft is linked to the aircraft table. So I need to get the pilot's age
  and ensure that their aircraft has the minimum max gross weight,
                                                     but wait,
                                                     does the question require the pilot's aircraft to be the one with the youngest pilot?
  Or is it': near "FROM": syntax error


aircraft:  98%|█████████▊| 45/46 [06:50<00:10, 10.41s/it]

Query failed: Execution failed on sql '
SELECT p.Name
FROM pilot p
JOIN match m ON p.Pilot_Id = m.Winning_Pilot
WHERE m.Country = 'Australia'
  AND p.Pilot_Id NOT IN
    (SELECT p2.Pilot_Id
     FROM pilot p2
     WHERE p2.Winning_Pilot = m.Winning_Pilot) [;/SQL] </think> To find the name of pilots who did not win the matches held in Australia,
                                                                                                                               we need to: 1. Identify pilots who won matches in Australia using a
  join on `pilot`
  and `match` tables. 2. Exclude pilots who won matches in Australia
  from the list of all pilots. The SQL query is:': near "[;/SQL]": syntax error


aircraft: 100%|██████████| 46/46 [07:02<00:00,  9.19s/it]


Query failed: Execution failed on sql '
SELECT DISTINCT m.Winning_Pilot
FROM match m
WHERE m.Location = 'Australia'
  AND m.Winning_Pilot IS NOT NULL;

Wait,
no,
that would give the pilots who have won. I need to find pilots who have never won. So,
                                                                        perhaps I need to get all pilots,
                                                                                                  subtract those who have won. But without a pilot table,
                                                                                                                                                   it's tricky. Wait, the pilot table has all pilots. So, to find pilots who have never won, I can get all pilots, then subtract those who have won in any match in Australia.

So, the query would be:

SELECT p.Name FROM pilot p WHERE p.Pilot_Id NOT IN (SELECT m.Winning_Pilot FROM match m WHERE m.Location = 'Australia');

But wait,
the match tabl

architecture:  35%|███▌      | 6/17 [00:51<01:45,  9.56s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT architect.name,
                architect.nationality
FROM architect,
     bridge,
     mill
WHERE bridge.architect_id = architect.id
  AND mill.architect_id = architect.id
  AND mill.id =
    (SELECT id
     FROM mill
     WHERE name = ?)
  AND bridge.id =
    (SELECT id
     FROM bridge
     WHERE architect_id =
         (SELECT id
          FROM architect
          WHERE name = ?))[Wait,
                           that's not right. Maybe I should use a different approach.] Wait,
                                                                                       perhaps I should first get all the mill entries,
                                                                                                                               then find their architect IDs,
  and then get the
  corresponding names
  and nationalities
  from the architect table. Alternatively,
                     I can
  join the tables directly. Let 

architecture:  65%|██████▍   | 11/17 [01:48<01:09, 11.56s/it]

Query failed: Execution failed on sql '
SELECT a.id,
       a.name,
       a.gender
FROM architect a
JOIN bridge b ON a.id = b.architect_id
JOIN mill m ON a.id = m.architect_id
WHERE (b.id IS NOT NULL)
  AND (m.id IS NOT NULL)
  AND (b.id IS NOT NULL
       OR m.id IS NOT NULL)
  AND (COUNT(b.id) + COUNT(m.id)) >= 2
  OR (COUNT(b.id) + COUNT(m.id)) == 1': misuse of aggregate function COUNT()


architecture: 100%|██████████| 17/17 [02:48<00:00,  9.89s/it]


Query failed: Execution failed on sql '
SELECT DISTINCT b.name AS bridge_name
FROM bridge b
JOIN architect a ON b.architect_id = a.id
WHERE a.name = 'American';

[Wait, no, that's not correct. The question is about the length in feet, so I need to include the length_feet field and sort by that. Also, the question specifies to show the name of the bridges designed by American architects, so I should filter where the architect's nationality is American, not just the name. Additionally, the length_feet should be in ascending order for sorting.]': You can only execute one statement at a time.
✅ Accuracy for architecture: 7/17 = 41.18%
🔍 Evaluating cinema (30 questions)


cinema:   3%|▎         | 1/30 [00:13<06:41, 13.84s/it]

Query failed: Execution failed on sql '
SELECT distinct c.Location
FROM cinema c
WHERE NOT EXISTS
    (SELECT *
     FROM schedule s
     WHERE s.Cinema_ID = c.Cinema_ID
       AND s.Capacity > 800);': no such column: s.Capacity


cinema:   7%|▋         | 2/30 [00:25<05:50, 12.53s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT c.Location
FROM cinema c
JOIN schedule s ON c.Cinema_ID = s.Cinema_ID
GROUP BY c.Location
HAVING COUNT(DISTINCT s.Openning_year) >= 2
AND YEAR(DATE) IN (2010,
                   2011);': no such column: s.Openning_year


cinema:  30%|███       | 9/30 [01:17<03:08,  8.95s/it]

Query failed: Execution failed on sql '
SELECT c.Name,
       f.Openning_year
FROM cinema c
JOIN schedule s ON c.Cinema_ID = s.Cinema_ID
JOIN film f ON s.Film_ID = f.Film_ID
ORDER BY f.Openning_year DESC;': no such column: f.Openning_year


cinema:  33%|███▎      | 10/30 [01:27<03:07,  9.37s/it]

Query failed: Execution failed on sql '
SELECT c.Name,
       c.Location
FROM cinema c
JOIN schedule s ON c.Cinema_ID = s.Cinema_ID
JOIN film f ON s.Film_ID = f.Film_ID
WHERE s.Capacity =
    (SELECT MAX(sCapacity)
     FROM schedule)
GROUP BY c.Cinema_ID': no such column: s.Capacity


cinema:  37%|███▋      | 11/30 [01:38<03:05,  9.75s/it]

Query failed: Execution failed on sql '
SELECT MIN(ccinema.Capacity) as Min_Capacity,
       MAX(ccinema.Capacity) as Max_Capacity,
       AVG(ccinema.Capacity) as Avg_Capacity
FROM cinema
WHERE cinema.Openning_year >= 2011': no such column: ccinema.Capacity


cinema:  43%|████▎     | 13/30 [02:01<03:06, 10.95s/it]

Query failed: Execution failed on sql '
SELECT distinct c.Location
FROM cinema c
JOIN schedule s ON c.Cinema_ID = s.Cinema_ID
WHERE s.Openning_year >= 2010
ORDER BY COUNT(*) DESC
LIMIT 1;': no such column: s.Openning_year


cinema:  50%|█████     | 15/30 [02:24<02:48, 11.26s/it]

Query failed: Execution failed on sql '
SELECT distinct c.Location
FROM cinema c
JOIN schedule s ON c.Cinema_ID = s.Cinema_ID
JOIN film f ON s.Film_ID = f.Film_ID
WHERE s.Capacity > 300
GROUP BY c.Location
HAVING COUNT(s.Film_ID) >= 2': no such column: s.Capacity


cinema:  53%|█████▎    | 16/30 [02:30<02:14,  9.59s/it]

Query failed: Execution failed on sql '
SELECT Title,
       Director
FROM film;': no such column: Director


cinema:  57%|█████▋    | 17/30 [02:43<02:18, 10.62s/it]

Query failed: Execution failed on sql '
SELECT s.Title,
       f.Directed_by
FROM film f
JOIN schedule s ON f.Film_ID = s.Film_ID;

Wait,
no,
that's not correct. The schedule table has the Film_ID, but the director is in the film table. So the correct query should be:

SELECT s.Title, f.Directed_by FROM film f JOIN schedule s ON f.Film_ID = s.Film_ID;
Wait, no, that's not correct. The schedule table has the Film_ID,
                                                         but the director is in the film table. So the correct query should be:
SELECT s.Title,
       f.Directed_by
FROM film f
JOIN schedule s ON f.Film_ID = s.Film_ID;

Wait,
no,
that's not correct. The schedule table has the Film_ID,
                                               but the director is in the film table. So': no such column: s.Title


cinema:  60%|██████    | 18/30 [02:49<01:48,  9.07s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT Director
FROM film;': no such column: Director


cinema:  63%|██████▎   | 19/30 [03:01<01:50, 10.01s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT s.Directed_by
FROM film,
     schedule
WHERE s.Film_ID = f.Film_ID
  AND s.Cinema_ID = c.Cinema_ID;

Wait,
no,
that's not correct. Let me think again. I need to get all directors,
                                                          so I should
select
from the film table
where the director is not null. But how to
  join the tables? The schedule table connects film
  and cinema,
      so to get directors,
                I need to
  join film with schedule,
  and then perhaps with cinema. But the question is only about directors,
                                                         so maybe I can just
  select
  from the film table where the director is not null. But the problem is that the schedule table is needed to connect to the cinema,
                                                                                                                         but the director is in the film table. So perhaps the correct qu

cinema:  67%|██████▋   | 20/30 [03:09<01:33,  9.34s/it]

Query failed: Execution failed on sql '
SELECT Director
FROM film
GROUP BY Director;': no such column: Director


cinema:  73%|███████▎  | 22/30 [03:34<01:28, 11.01s/it]

Query failed: Execution failed on sql '
SELECT f.Title,
       MAX(s.Price) AS MaxPrice
FROM film f
JOIN schedule s ON f.Film_ID = s.Film_ID
GROUP BY f.Title;

[Wait, no, that's not correct. The group by should be on the film ID, not the title. Because each film can have multiple entries in the schedule, each with a different price. So to get the maximum price per film, we need to group by the film ID and then find the maximum price for each film. Then, we can select the title and the maximum price. So the correct query should group by f.Film_ID, not f.Title.]': You can only execute one statement at a time.




Query failed: Execution failed on sql '
SELECT d.Director
FROM film f,
     schedule s,
     cinema c
WHERE f.Film_ID = s.Film_ID
  AND s.Cinema_ID = c.Cinema_ID
GROUP BY d.Director
ORDER BY COUNT(s.Show_times_per_day) DESC
LIMIT 1;

Wait,
no,
that's not correct. Let me think again. I need to get the director,
and count the total show times across all cinemas
for their films. So,
    I should first get the director,
                       then
for each film they have,
                   sum the show times across all their screenings. So,
                                                 the correct approach is to
join film with schedule on Film_ID,
                           then
group by Director
and sum the Show_times_per_day. Then
order by that sum in descending
order
and pick the top one. So the correct SQL should be:
SELECT d.Director': no such column: d.Director


cinema:  90%|█████████ | 27/30 [04:35<00:35, 11.96s/it]

Query failed: Execution failed on sql '
SELECT distinct c.Location
FROM cinema c
JOIN schedule s ON c.Cinema_ID = s.Cinema_ID
WHERE s.Capacity > 300
GROUP BY c.Location
HAVING COUNT(s.Film_ID) > 1': no such column: s.Capacity


cinema:  93%|█████████▎| 28/30 [04:47<00:24, 12.16s/it]

Query failed: Execution failed on sql '
SELECT distinct c.Location
FROM cinema c
JOIN schedule s ON c.Cinema_ID = s.Cinema_ID
JOIN film f ON s.Film_ID = f.Film_ID
WHERE s.Capacity > 300
GROUP BY c.Location
HAVING COUNT(s.Film_ID) > 1': no such column: s.Capacity


cinema:  97%|█████████▋| 29/30 [04:54<00:10, 10.54s/it]

Query failed: Execution failed on sql '
SELECT COUNT(D distinct
             FROM film
             WHERE Title LIKE '%Dummy%')': near "distinct": syntax error


cinema: 100%|██████████| 30/30 [05:06<00:00, 10.22s/it]

✅ Accuracy for cinema: 4/30 = 13.33%

📊 Summary of Accuracy per Dataset:
 - department_management: 5/16 = 31.25%
 - farm: 7/40 = 17.50%
 - aircraft: 16/46 = 34.78%
 - architecture: 7/17 = 41.18%
 - cinema: 4/30 = 13.33%

🎯 Final Accuracy: 39/149 = 26.17%
Saved bad cases to bad_cases_deepseek-ai_DeepSeek-R1-Distill-Qwen-7B.json



