# Read Data

* Spider Dataset

* https://yale-lily.github.io/spider

In [1]:
from datasets import load_dataset

# Load Spider dataset
ds = load_dataset("spider")
db_ids = ["department_management", "farm", "aircraft", "architecture", "cinema"]

data = []

for db_id in db_ids:
    subset = ds["train"].filter(lambda x: x["db_id"] == db_id)
    questions = [entry["question"] for entry in subset]
    queries = [entry["query"] for entry in subset]

    # Combine into (db_id, question, query) triplets
    entries = [{"db_id": db_id, "question": q, "query": sql} for q, sql in zip(questions, queries)]

    # Add to overall list
    data.extend(entries)

# DeepSeek-R1-Distill-Qwen-1.5B

In [None]:
import sqlparse
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TRANSFORMERS_CACHE

print(torch.cuda.is_available())
torch.cuda.empty_cache()

available_memory = torch.cuda.get_device_properties(0).total_memory
print(available_memory)

print(TRANSFORMERS_CACHE)

model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
cache_dir = "E:/Data File/transformers.cache"
tokenizer = AutoTokenizer.from_pretrained(model_name,cache_dir=cache_dir)

if available_memory > 6e9:
    # if you have atleast 6GB of GPU memory, run load the model in float16
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        trust_remote_code=True,
        torch_dtype=torch.float16,
        device_map="auto",
        use_cache=True,
        cache_dir=cache_dir
    )
else:
    # else, load in 8 bits – this is a bit slower
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        trust_remote_code=True,
        # torch_dtype=torch.float16,
        load_in_8bit=True,
        device_map="auto",
        use_cache=True,
        cache_dir=cache_dir
    )

True
17170956288
C:\Users\zly20\.cache\huggingface\hub


In [3]:
department_prompt = """
### Task
Generate a SQLite query to answer [QUESTION]{question}[/QUESTION]

### Instructions
- If you cannot answer the question with the available database schema, return 'I do not know'
- Pay close attention to the given database schema, note on how they can join together
- You do not need to generate your thought process but just the answer
- Your answer should end with '[/SQL]'

CREATE TABLE IF NOT EXISTS "department" (
"Department_ID" int,
"Name" text,
"Creation" text,
"Ranking" int,
"Budget_in_Billions" real,
"Num_Employees" real,
PRIMARY KEY ("Department_ID")
);

CREATE TABLE IF NOT EXISTS "head" (
"head_ID" int,
"name" text,
"born_state" text,
"age" real,
PRIMARY KEY ("head_ID")
);

CREATE TABLE IF NOT EXISTS "management" (
"department_ID" int,
"head_ID" int,
"temporary_acting" text,
PRIMARY KEY ("Department_ID","head_ID"),
FOREIGN KEY ("Department_ID") REFERENCES `department`("Department_ID"),
FOREIGN KEY ("head_ID") REFERENCES `head`("head_ID")
);

### Answer
Given the database schema, here is the SQLite query that answers [QUESTION]{question}[/QUESTION]
[SQL]
""" 


farm_prompt = """
### Task
Generate a SQLite query to answer [QUESTION]{question}[/QUESTION]

### Instructions
- If you cannot answer the question with the available database schema, return 'I do not know'
- Pay close attention to the given database schema, note on how they can join together
- You do not need to generate your thought process but just the answer
- Your answer should end with '[/SQL]'

CREATE TABLE "city" (
"City_ID" int,
"Official_Name" text,
"Status" text,
"Area_km_2" real,
"Population" real,
"Census_Ranking" text,
PRIMARY KEY ("City_ID")
);

CREATE TABLE "farm" (
"Farm_ID" int,
"Year" int,
"Total_Horses" real,
"Working_Horses" real,
"Total_Cattle" real,
"Oxen" real,
"Bulls" real,
"Cows" real,
"Pigs" real,
"Sheep_and_Goats" real,
PRIMARY KEY ("Farm_ID")
);

CREATE TABLE "farm_competition" (
"Competition_ID" int,
"Year" int,
"Theme" text,
"Host_city_ID" int,
"Hosts" text,
PRIMARY KEY ("Competition_ID"),
FOREIGN KEY (`Host_city_ID`) REFERENCES `city`(`City_ID`)
);


CREATE TABLE "competition_record" (
"Competition_ID" int,
"Farm_ID" int,
"Rank" int,
PRIMARY KEY ("Competition_ID","Farm_ID"),
FOREIGN KEY (`Competition_ID`) REFERENCES `farm_competition`(`Competition_ID`),
FOREIGN KEY (`Farm_ID`) REFERENCES `farm`(`Farm_ID`)
);

### Answer
Given the database schema, here is the SQLite query that answers [QUESTION]{question}[/QUESTION]
[SQL]
"""


aircraft_prompt = """
### Task
Generate a SQLite query to answer [QUESTION]{question}[/QUESTION]

### Instructions
- If you cannot answer the question with the available database schema, return 'I do not know'
- Pay close attention to the given database schema, note on how they can join together
- You do not need to generate your thought process but just the answer
- Your answer should end with '[/SQL]'

CREATE TABLE `pilot` (
  `Pilot_Id` int(11) NOT NULL,
  `Name` varchar(50) NOT NULL,
  `Age` int(11) NOT NULL,
  PRIMARY KEY (`Pilot_Id`)
);

CREATE TABLE `aircraft` (
  "Aircraft_ID" int(11) NOT NULL,
  "Aircraft" varchar(50) NOT NULL,
  "Description" varchar(50) NOT NULL,
  "Max_Gross_Weight" varchar(50) NOT NULL,
  "Total_disk_area" varchar(50) NOT NULL,
  "Max_disk_Loading" varchar(50) NOT NULL,
  PRIMARY KEY (`Aircraft_ID`)
);

CREATE TABLE `match` (
"Round" real,
"Location" text,
"Country" text,
"Date" text,
"Fastest_Qualifying" text,
"Winning_Pilot" text,
"Winning_Aircraft" text,
PRIMARY KEY ("Round"),
FOREIGN KEY (`Winning_Aircraft`) REFERENCES `aircraft`(`Aircraft_ID`),
FOREIGN KEY (`Winning_Pilot`) REFERENCES `pilot`(`Pilot_Id`)
);

CREATE TABLE `airport` (
"Airport_ID" int,
"Airport_Name" text,
"Total_Passengers" real,
"%_Change_2007" text,
"International_Passengers" real,
"Domestic_Passengers" real,
"Transit_Passengers" real,
"Aircraft_Movements" real,
"Freight_Metric_Tonnes" real,
PRIMARY KEY ("Airport_ID")
);

CREATE TABLE `airport_aircraft` (
"ID" int,
"Airport_ID" int,
"Aircraft_ID" int,
PRIMARY KEY ("Airport_ID","Aircraft_ID"),
FOREIGN KEY ("Airport_ID") REFERENCES `airport`(`Airport_ID`),
FOREIGN KEY ("Aircraft_ID") REFERENCES `aircraft`(`Aircraft_ID`)
);

### Answer
Given the database schema, here is the SQLite query that answers [QUESTION]{question}[/QUESTION]
[SQL]
"""


architecture_prompt = """
### Task
Generate a SQLite query to answer [QUESTION]{question}[/QUESTION]

### Instructions
- If you cannot answer the question with the available database schema, return 'I do not know'
- Pay close attention to the given database schema, note on how they can join together
- You do not need to generate your thought process but just the answer
- Your answer should end with '[/SQL]'

CREATE TABLE "architect" (
"id" text,
"name" text,
"nationality" text,
"gender" text,
primary key("id")
);

CREATE TABLE "bridge" (
"architect_id" int,
"id" int,
"name" text,
"location" text,
"length_meters" real,
"length_feet" real,
primary key("id"),
foreign key ("architect_id" ) references `architect`("id")
);

CREATE TABLE "mill" (
"architect_id" int,
"id" int,
"location" text,
"name" text,
"type" text,
"built_year" int,
"notes" text,
primary key ("id"),
foreign key ("architect_id" ) references `architect`("id")
);

### Answer
Given the database schema, here is the SQLite query that answers [QUESTION]{question}[/QUESTION]
[SQL]
"""


cinema_prompt = """
### Task
Generate a SQLite query to answer [QUESTION]{question}[/QUESTION]

### Instructions
- If you cannot answer the question with the available database schema, return 'I do not know'
- Pay close attention to the given database schema, note on how they can join together
- You do not need to generate your thought process but just the answer
- Your answer should end with '[/SQL]'

CREATE TABLE "film" (
"Film_ID" int,
"Rank_in_series" int,
"Number_in_season" int,
"Title" text,
"Directed_by" text,
"Original_air_date" text,
"Production_code" text,
PRIMARY KEY ("Film_ID")
);

CREATE TABLE "cinema" (
"Cinema_ID" int,
"Name" text,
"Openning_year" int,
"Capacity" int,
"Location" text,
PRIMARY KEY ("Cinema_ID"));

CREATE TABLE "schedule" (
"Cinema_ID" int,
"Film_ID" int,
"Date" text,
"Show_times_per_day" int,
"Price" float,
PRIMARY KEY ("Cinema_ID","Film_ID"),
FOREIGN KEY (`Film_ID`) REFERENCES `film`(`Film_ID`),
FOREIGN KEY (`Cinema_ID`) REFERENCES `cinema`(`Cinema_ID`)
);

### Answer
Given the database schema, here is the SQLite query that answers [QUESTION]{question}[/QUESTION]
[SQL]
"""

# Store each prompt and its corresponding DB name
dbs = [
    ("department_management", department_prompt),
    ("farm", farm_prompt),
    ("aircraft", aircraft_prompt),
    ("architecture", architecture_prompt),
    ("cinema", cinema_prompt),
]

In [4]:
import sqlite3
import json
import pandas as pd
from pathlib import Path
from tqdm import tqdm
import sqlparse

# Path config
def get_db_path(db_id):
    base_dir = Path(r"C:\Users\zly20\OneDrive - The University of Western Ontario\1B\CS 9860 Advanced Machine Learning\Final Project\CS_9860_Final_Project\data")
    return str(base_dir / f"{db_id}.sqlite")

# Run SQL and return DataFrame
def run_query_on_db(db_path, query):
    try:
        with sqlite3.connect(db_path) as conn:
            result = pd.read_sql_query(query, conn)
        return result
    except Exception as e:
        print("Query failed:", e)
        return None

# Accumulators
correct = 0
total = 0
bad_cases = []

# Per-dataset tracking
correct_by_db = {}
total_by_db = {}

# Loop through each database and use corresponding prompt
for db_id, prompt_template in dbs:
    subset = [item for item in data if item["db_id"] == db_id]
    db_path = get_db_path(db_id)

    correct_local = 0
    total_local = 0

    def generate_query(question):
        prompt = prompt_template.format(question=question)
        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
        generated_ids = model.generate(
            **inputs,
            num_return_sequences=1,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id,
            max_new_tokens=168,
            do_sample=False,
            num_beams=1,
        )
        outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
        return sqlparse.format(outputs[0].split("[SQL]")[1].split("[/SQL]")[0], reindent=True)

    print(f"🔍 Evaluating {db_id} ({len(subset)} questions)")
    for item in tqdm(subset, desc=f"{db_id}"):
        question = item["question"]
        gold_query = item["query"]

        try:
            pred_query = generate_query(question)

            gold_result = run_query_on_db(db_path, gold_query)
            pred_result = run_query_on_db(db_path, pred_query)

            if gold_result is not None and pred_result is not None:
                if gold_result.equals(pred_result):
                    correct += 1
                    correct_local += 1
                else:
                    bad_cases.append({
                        "db_id": db_id,
                        "question": question,
                        "gold_query": gold_query,
                        "pred_query": pred_query,
                        "error_type": "Mismatch",
                        "gold_result": gold_result.to_string(index=False),
                        "pred_result": pred_result.to_string(index=False)
                    })
            else:
                bad_cases.append({
                    "db_id": db_id,
                    "question": question,
                    "gold_query": gold_query,
                    "pred_query": pred_query,
                    "error_type": "ExecutionError",
                    "gold_result": str(gold_result),
                    "pred_result": str(pred_result)
                })

            total += 1
            total_local += 1

        except Exception as e:
            bad_cases.append({
                "db_id": db_id,
                "question": question,
                "gold_query": gold_query,
                "pred_query": "N/A",
                "error_type": f"Exception: {str(e)}",
                "gold_result": "N/A",
                "pred_result": "N/A"
            })
            total += 1
            total_local += 1
            continue

    correct_by_db[db_id] = correct_local
    total_by_db[db_id] = total_local
    print(f"✅ Accuracy for {db_id}: {correct_local}/{total_local} = {correct_local / total_local:.2%}")

safe_model_name = model_name.replace("/", "_")
output_filename = f"bad_cases_{safe_model_name}.json"

with open(output_filename, "w", encoding="utf-8") as f:
    json.dump(bad_cases, f, ensure_ascii=False, indent=2)

print("\n📊 Summary of Accuracy per Dataset:")
for db_id in correct_by_db:
    print(f" - {db_id}: {correct_by_db[db_id]}/{total_by_db[db_id]} = {correct_by_db[db_id] / total_by_db[db_id]:.2%}")

print(f"\n🎯 Final Accuracy: {correct}/{total} = {correct / total:.2%}")
print(f"Saved bad cases to {output_filename}")


🔍 Evaluating department_management (16 questions)


department_management:  19%|█▉        | 3/16 [00:09<00:39,  3.05s/it]

Query failed: Execution failed on sql '
SELECT dDepartment_ID,
       dName,
       dBudget_in_Billions,
       dCreation,
       dNum_Employees
FROM department': no such column: dDepartment_ID


department_management:  31%|███▏      | 5/16 [00:14<00:32,  2.94s/it]

Query failed: Execution failed on sql '
SELECT Avg(Num_Employees) AS Average_Employees
FROM management
WHERE Department_ID IN
    (SELECT Department_ID
     FROM department
     WHERE Ranking >= 10
       AND Ranking <= 15);': no such column: Num_Employees


department_management:  38%|███▊      | 6/16 [00:18<00:30,  3.05s/it]

Query failed: Execution failed on sql '
SELECT h.name
FROM head
WHERE h.born_state != 'California';': no such column: h.name


department_management:  44%|████▍     | 7/16 [00:21<00:27,  3.08s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT c.Creation,
                m.Budget_in_Billions
FROM "department" AS d,
     "C management" AS m
WHERE m(head_ID) = dDepartment_ID
  AND d.Name = 'Alabama';': no such table: C management


department_management:  50%|█████     | 8/16 [00:24<00:26,  3.26s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT s.name
FROM
  (SELECT h.name
   FROM head h
   WHERE h.age >= 3) AS h1
FROM department d
WHERE d.department_ID IN
    (SELECT d.department_ID
     FROM management m
     WHERE m.head_ID = h1.head_ID)': near "FROM": syntax error


department_management:  75%|███████▌  | 12/16 [00:35<00:10,  2.75s/it]

Query failed: Execution failed on sql '
SELECT COUNT(*)
FROM management
WHERE NOT EXISTS ("head"
                  WHERE "head_ID" IN ("management"["head_ID"]));': near ""head"": syntax error


department_management:  81%|████████▏ | 13/16 [00:38<00:08,  2.81s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT age
FROM head
WHERE management
  AND head have a relationship where management's department ID is not null and head's ID is not null.': near "have": syntax error


department_management:  88%|████████▊ | 14/16 [00:42<00:06,  3.10s/it]

Query failed: Execution failed on sql '
SELECT s.born_state
FROM head h
JOIN department d ON h.department_ID = d.department_ID
JOIN management m ON m.department_ID = d.department_ID
WHERE h.name = 'Treasury'
  AND h.name = 'Homeland Security'': no such column: s.born_state


department_management:  94%|█████████▍| 15/16 [00:45<00:03,  3.15s/it]

Query failed: Execution failed on sql '
SELECT d.Department_ID,
       d.Name,
       hNum
FROM department d
LEFT JOIN head h ON d.Department_ID = h.department_ID
WHERE h.head_ID > 0': no such column: hNum


department_management: 100%|██████████| 16/16 [00:48<00:00,  3.01s/it]


Query failed: Execution failed on sql '
SELECT h
FROM head
WHERE h.name LIKE '%Ha%'
  OR h.name LIKE 'Ha%';': no such column: h
✅ Accuracy for department_management: 0/16 = 0.00%
🔍 Evaluating farm (40 questions)


farm:   8%|▊         | 3/40 [00:05<01:06,  1.79s/it]

Query failed: Execution failed on sql '
SELECT SUM(Horses) AS Total_Horses
FROM farm;': no such column: Horses


farm:  10%|█         | 4/40 [00:08<01:27,  2.44s/it]

Query failed: Execution failed on sql '
SELECT FARM_ID,
       Total_Horses FROMFarm;': no such column: FARM_ID


farm:  12%|█▎        | 5/40 [00:12<01:37,  2.79s/it]

Query failed: Execution failed on sql '
SELECT c.Farm_ID,
       c.Year,
       c.Rank,
       c.Farm_ID,
       c.Year,
       c.Rank,
       c.Farm_ID,
       c.Year,
       c.Rank,
       c.Farm_ID,
       c.Year,
       c.Rank,
       c.Farm_ID,
       c.Year,
       c.Rank,
       c.Farm_ID,
       c.Year,
       c.Rank,
       c.Farm_ID,
       c.Year,
       c.Rank,
       c.Farm_ID,
       c.Year,
       c.Rank,
       c.Farm_ID,
       c.Year,
       c.Rank,
       c.Farm_ID,
       c.Year,
       c.Rank,
       c.Farm_ID,
       c.Year,
       c.Rank,': incomplete input


farm:  15%|█▌        | 6/40 [00:15<01:42,  3.01s/it]

Query failed: Execution failed on sql '
SELECT Hosts
FROM competition_record
WHERE Rank = 0
  OR Theme != 'Aliens';': no such column: Hosts


farm:  20%|██        | 8/40 [00:21<01:32,  2.88s/it]

Query failed: Execution failed on sql '
SELECT t.Theme,
       t.Host_city_ID,
       t Host_city_ID,
       t Host,
         t Host,
           t Host,
             t Host,
               t Host,
                 t Host,
                   t Host,
                     t Host,
                       t Host,
                         t Host,
                           t Host,
                             t Host,
                               t Host,
                                 t Host,
                                   t Host,
                                     t Host,
                                       t Host,
                                         t Host,
                                           t Host,
                                             t Host,
                                               t Host,
                                                 t Host,
                                                   t Host,
                                               

farm:  22%|██▎       | 9/40 [00:24<01:27,  2.82s/it]

Query failed: Execution failed on sql 'SELECT ...
FROM ... WHERE ...;': near ".": syntax error


farm:  38%|███▊      | 15/40 [00:39<00:56,  2.28s/it]

Query failed: Execution failed on sql '
SELECT City Official_Name,
       Population DESC,
       State,
       Area_km_2,
       Census_Ranking,
       Rank,
       State,
       Area_km_2,
       Census_Ranking,
       Rank
FROM city
ORDER BY Population DESC': no such column: City


farm:  40%|████      | 16/40 [00:42<00:58,  2.44s/it]

Query failed: Execution failed on sql '
SELECT CityOfficialName,
       Population,
       OrderBy(Population) AS OrderBy_Population,
       CityID,
       CityOfficialName
FROM city
ORDER BY Population DESC': no such column: CityOfficialName


farm:  45%|████▌     | 18/40 [00:49<01:06,  3.04s/it]

Query failed: Execution failed on sql '
SELECT CityOfficialName,
       Status,
       Population,
       Rank,
       Area_km_2,
       Census_Ranking, ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ..., ...': near ".": syntax error


farm:  48%|████▊     | 19/40 [00:52<01:05,  3.13s/it]

Query failed: Execution failed on sql '
SELECT C.Farm_ID,
       C.Year,
       C.Farm_ID,
       C.Year,
       C.Farm_ID,
       C.Year,
       C.Farm_ID,
       C.Year
FROM city,
     farm_competition,
     competition_record,
     farm': no such column: C.Farm_ID


farm:  50%|█████     | 20/40 [00:55<00:59,  2.99s/it]

Query failed: Execution failed on sql '
SELECT c.Farm_ID,
       c.Farm_ID as Host_city_ID,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       cHosts,
       c': no such column: c.Farm_ID




Query failed: Execution failed on sql '
SELECT c.City_ID,
       c.Status
FROM competition_record c
JOIN ON farm_competition fc
JOIN ON city city
WHERE c.Rank =
    (SELECT MAX(Rank)
     From competition_record c)': near "ON": syntax error


farm:  60%|██████    | 24/40 [01:07<00:50,  3.13s/it]

Query failed: Execution failed on sql '
SELECT c.Farm_ID,
       cHost_city_ID,
       cHosts,
       cHosts_status
FROM competition_record c
JOIN city c_city ON cHosts = c_cityofficial_name SELECT ...
FROM ... WHERE ...
ORDER BY ... LIMIT ...;': near "SELECT": syntax error


farm:  62%|██████▎   | 25/40 [01:11<00:50,  3.38s/it]

Query failed: Execution failed on sql '
SELECT distinct f1.Football
from farm_competition c,
     farm f
where c.Football = f.Football
  and cHost_city_ID = fHost_city_ID
  and fPopulation > 1000;': no such column: f1.Football


farm:  65%|██████▌   | 26/40 [01:14<00:44,  3.19s/it]

Query failed: Execution failed on sql '
SELECT distinct "Theme"
from "competition"
where "Host_city" has a population greater than 1000.': near "has": syntax error


farm:  68%|██████▊   | 27/40 [01:17<00:40,  3.14s/it]

Query failed: Execution failed on sql '
SELECT c.City_ID,
       cofficial_name,
       c.status,
       avg(c POPULATION) as average_pop
FROM city c
GROUP BY c.City_ID,
         c.status': near "POPULATION": syntax error


farm:  70%|███████   | 28/40 [01:19<00:36,  3.02s/it]

Query failed: Execution failed on sql '
SELECT c.City_ID,
       cOfficial_Name,
       c.Status,
       c.Population,
       c.Census_Ranking
FROM city c
LEFT JOIN competition_record cr ON c.City_ID = cr.Host_city_ID
LEFT JOIN farm_f on cr.Farm_ID
LEFT JOIN competition_record cr ON cr.Host_city_ID = c.City_ID': no such table: farm_f


farm:  78%|███████▊  | 31/40 [01:28<00:27,  3.06s/it]

Query failed: Execution failed on sql '
SELECT COUNT(*) AS Status_count
FROM city
GROUP BY Official_Name [HSQL]': near "[HSQL]": syntax error


farm:  98%|█████████▊| 39/40 [01:52<00:03,  3.07s/it]

Query failed: Execution failed on sql '
SELECT C.City_ID,
       C Official_Name,
       C.Status,
       C.Census_Ranking
FROM city C
WHERE C.Status != 'Village';': no such column: C


farm: 100%|██████████| 40/40 [01:55<00:00,  2.89s/it]


Query failed: Execution failed on sql '
SELECT C.City_ID,
       C Official_Name,
       C.Census_Ranking
FROM city C
WHERE C.Status != 'Village';': no such column: C
✅ Accuracy for farm: 3/40 = 7.50%
🔍 Evaluating aircraft (46 questions)


aircraft:  15%|█▌        | 7/46 [00:18<01:44,  2.68s/it]

Query failed: Execution failed on sql '
SELECT IInternational_Passengers,
       IDomestic_Passengers
FROM airport': no such column: IInternational_Passengers


aircraft:  17%|█▋        | 8/46 [00:21<01:44,  2.75s/it]

Query failed: Execution failed on sql '
SELECT airportInternationalPassengers,
       airportDomesticPassengers,
from airport
where airportName = 'Heathrow' -- add the following columns
 airportInternationalPassengers,
 airportDomesticPassengers': near "from": syntax error


aircraft:  20%|█▉        | 9/46 [00:25<01:58,  3.22s/it]

Query failed: Execution failed on sql '
SELECT SUM(Domestic_Passengers)
FROM airport_aircraft
WHERE airport_aircraft.Airport_Name LIKE '%London%'': no such column: Domestic_Passengers


aircraft:  22%|██▏       | 10/46 [00:29<02:00,  3.35s/it]

Query failed: Execution failed on sql '
SELECT SUM(Domestic_Passengers)
FROM airport_aircraft
WHERE Airport_ID = 'London';': no such column: Domestic_Passengers


aircraft:  26%|██▌       | 12/46 [00:34<01:41,  2.98s/it]

Query failed: Execution failed on sql '
SELECT MAX(`Transit_Passengers`),
       MIN(`Transit_Passengers`)
FROM airport_aircraft;': no such column: Transit_Passengers




Query failed: Execution failed on sql '
SELECT TOP 1000
FROM pilot
WHERE Age <= 30
ORDER BY Name DESC': near "1000": syntax error


aircraft:  39%|███▉      | 18/46 [00:51<01:18,  2.81s/it]

Query failed: Execution failed on sql '
SELECT Name,
Order By
FROM pilot
WHERE (Pilot_Id LIKE '%3D%20%23%23')': near "Order": syntax error


aircraft:  41%|████▏     | 19/46 [00:54<01:20,  2.99s/it]

Query failed: Execution failed on sql '
SELECT A.Aircraft
FROM `aircraft` A
WHERE A.Aircraft_ID = `airport_aircraft`.Airport_ID
  AND A.Aircraft_ID = `pilot`(`Pilot_Id`)': no such column: airport_aircraft.Airport_ID


aircraft:  43%|████▎     | 20/46 [00:57<01:16,  2.94s/it]

Query failed: Execution failed on sql '
SELECT `Aircraft`
FROM `aircraft`
WHERE `Airport_ID` =
    (SELECT `Airport_ID`
     FROM `airport`
     WHERE `Airport_ID` =
         (SELECT `Airport_ID`
          FROM `airport_aircraft`
          WHERE `Airport_ID` =
              (SELECT `Airport_ID`
               FROM `airport`)));': no such column: Airport_ID


aircraft:  50%|█████     | 23/46 [01:07<01:13,  3.21s/it]

Query failed: Execution failed on sql '
SELECT AVG(`Total_Passengers`) AS AvgTotalPassengers
FROM `airport_aircraft`
WHERE `Aircraft_ID` = 'Robinson R-22';': no such column: Total_Passengers


aircraft:  52%|█████▏    | 24/46 [01:10<01:08,  3.10s/it]

Query failed: Execution failed on sql '
SELECT Avg(Total_Passengers)
FROM airport_aircraft
WHERE "Aircraft_ID" = 'Robinson R-22';': no such column: Total_Passengers


aircraft:  54%|█████▍    | 25/46 [01:12<01:00,  2.86s/it]

Query failed: Execution failed on sql '
SELECT `Location`
FROM `airport`
WHERE `Airport_ID` IN
    (SELECT `Aircraft_ID`
     FROM `aircraft`
     WHERE `Aircraft_ID` = `Pilot_Id`);': no such column: Location


aircraft:  57%|█████▋    | 26/46 [01:14<00:49,  2.47s/it]

Query failed: Execution failed on sql '
SELECT `Winning_Aircraft`
FROM `aircraft`;': no such column: Winning_Aircraft


aircraft:  59%|█████▊    | 27/46 [01:17<00:47,  2.52s/it]

Query failed: 'NoneType' object is not iterable


aircraft:  63%|██████▎   | 29/46 [01:24<00:52,  3.06s/it]

Query failed: 'NoneType' object is not iterable


aircraft:  65%|██████▌   | 30/46 [01:26<00:47,  2.95s/it]

Query failed: Execution failed on sql '
SELECT `Aircraft`
from `aircraft`
where `Winning_Pilot` is not null
  and `Winning_Aircraft` is not null': no such column: Winning_Aircraft


aircraft:  70%|██████▉   | 32/46 [01:31<00:37,  2.66s/it]

Query failed: Execution failed on sql '
SELECT Pilot.Name
FROM pilot
WHERE Pilot.Age >
    (SELECT MAX(Age)
     FROM aircraft)': misuse of aggregate: MAX()


aircraft:  72%|███████▏  | 33/46 [01:34<00:35,  2.71s/it]

Query failed: Execution failed on sql '
SELECT a.Aircraft,
       m.Winning_Pilot,
       m.Winning_Aircraft
FROM `aircraft` a
Cross Join `match` m
WHERE m.Winning_Pilot IS NOT NULL
  AND m.Winning_Aircraft IS NOT NULL
  AND m.Winning_Pilot = a.Aircraft_ID
  AND m.Winning_Aircraft = a.Aircraft_ID
  AND m.Winning_Pilot = m.Winning_Pilot
  AND m.Winning_Pilot = m.Winning_Pilot
  AND m.Winning_Pilot = m.Winning_Pilot
  AND m.Winning_Pilot = m.Winning_Pilot
  AND m.Winning_Pilot = m.Winning_Pilot
  AND m.Winning_Pilot = m.Winning_Pilot
  AND m.Winning_Pilot = m.Winning_Pilot
  AND m': no such column: m


aircraft:  74%|███████▍  | 34/46 [01:37<00:35,  2.97s/it]

Query failed: Execution failed on sql '
SELECT A.Aircraft
FROM A
JOIN M ON A.Aircraft_ID = M.Winning_Pilot
JOIN M ON A.Aircraft_ID = M.Winning_Pilot': no such table: A


aircraft:  76%|███████▌  | 35/46 [01:41<00:33,  3.02s/it]

Query failed: Execution failed on sql '
SELECT `Aircraft_ID`
FROM `aircraft`
WHERE `Winning_Pilot` IS NULL;': no such column: Winning_Pilot


aircraft:  80%|████████  | 37/46 [01:48<00:31,  3.48s/it]

Query failed: Execution failed on sql '
SELECT A.Aircraft
FROM `aircraft` A
JOIN `match` M ON A.Aircraft_ID = M.Winning_Pilot
JOIN `airport` I ON I.Airport_ID = A.Airport_ID
JOIN `airport_aircraft` IA ON IA.ID = I.Airport_ID': no such column: A.Airport_ID


aircraft:  83%|████████▎ | 38/46 [01:52<00:28,  3.51s/it]

Query failed: Execution failed on sql '
SELECT A.Aircraft
FROM `aircraft`
WHERE A.Aircraft_ID IN
    (SELECT A.Aircraft_ID
     FROM `airport_aircraft`
     WHERE airport_aircraft.AIRPORT_ID =
         (SELECT A.Aircraft_ID
          FROM `airport`
          WHERE airport_ID = 6)
       AND airport_aircraft.AIRPORT_ID =
         (SELECT A.Aircraft_ID
          FROM `airport`
          WHERE airport_ID = 7))': no such column: A.Aircraft


aircraft:  87%|████████▋ | 40/46 [02:00<00:23,  3.85s/it]

Query failed: Execution failed on sql '
SELECT airportInternationalName,
       Total_Passengers, %_Change_2007,
                          International_Passengers,
                          Domestic_Passengers,
                          Transit_Passengers,
                          AircraftMovements,
                          Freight_Metric_Tonnes
FROM airport
WHERE airportInternationalName LIKE '%_Large_Passengers%'': near "%": syntax error


aircraft:  89%|████████▉ | 41/46 [02:04<00:19,  3.92s/it]

Query failed: Execution failed on sql '
SELECT P.Name,
       P.Age
FROM pilot P
WHERE P.Age < 30
  AND
    (SELECT COUNT(*)
     FROM match M
     WHERE M.Winning_Pilot = P.Name
       AND MDate < 30) >
    (SELECT COUNT(*)
     FROM match M
     WHERE M.Winning_Pilot = P.Name
       AND MDate < 30)': no such column: MDate


aircraft:  91%|█████████▏| 42/46 [02:09<00:16,  4.06s/it]

Query failed: Execution failed on sql 'SELECT ...
FROM ... WHERE ...;': near ".": syntax error


aircraft:  93%|█████████▎| 43/46 [02:12<00:11,  3.95s/it]

Query failed: Execution failed on sql '
SELECT winner.pilot_name,
       winner.age
FROM match
WHERE match.winning_pilot = winner.pilot_id
GROUP BY winner.pilot_id
ORDER BY winner.age ASC
LIMIT 1;': no such column: winner.pilot_name


aircraft:  98%|█████████▊| 45/46 [02:21<00:03,  3.98s/it]

Query failed: Execution failed on sql '
SELECT `Name`
FROM pilot
WHERE `Pilot_Id` NOT NULL
  AND NOT EXISTS(`Winning_Pilot`) IN
    (SELECT `Winning_Pilot`
     FROM match
     WHERE `Country` LIKE '% Australia%')': near "`Winning_Pilot`": syntax error


aircraft: 100%|██████████| 46/46 [02:25<00:00,  3.16s/it]


Query failed: Execution failed on sql '
SELECT P.Name
FROM pilot
WHERE P.Pilot_Id NOT IN (
  SELECT Pilot_Id
  FROM pilot WHERE Name LIKE '%Australia%'
  OR P.Pilot_Id IN (
  SELECT Pilot_Id
  FROM pilot WHERE Name LIKE '%Australia%'
  AND P.Pilot_Id IN (
  SELECT Pilot_Id
  FROM pilot WHERE Name LIKE '%Australia%'
  AND P.Pilot_Id IN (
  SELECT Pilot_Id
  FROM pilot WHERE Name LIKE '%Australia%'
  AND P.Pilot_Id IN (
  SELECT Pilot_Id
  FROM pilot WHERE Name LIKE '%Australia%'
  AND P.Pilot_Id IN (
  SELECT Pilot_Id
  FROM pilot WHERE Name LIKE '%Australia%'
  AND P.Pilot_Id IN (
  SELECT Pilot_Id
  FROM pilot WHERE Name LIKE '%Australia%'
  AND P.Pilot_Id IN (
  SELECT Pilot_Id
  FROM pilot WHERE Name LIKE '%Australia%'
  AND P.Pilot_Id IN (
  SELECT Pilot_Id
  FROM pilot WHERE Name LIKE '%Australia': unrecognized token: "'%Australia"
✅ Accuracy for aircraft: 2/46 = 4.35%
🔍 Evaluating architecture (17 questions)


architecture:  12%|█▏        | 2/17 [00:06<00:48,  3.26s/it]

Query failed: Execution failed on sql '
SELECT m.name,
       m.nationality,
       m.id
FROM mill m
WHERE m.architect_id IN
    (SELECT a.id
     FROM a
     WHERE a.gender = 'male')
ORDER BY m.name ASC;': no such table: a


architecture:  29%|██▉       | 5/17 [00:17<00:42,  3.56s/it]

Query failed: Execution failed on sql '
SELECT m.id,
       m.name,
       m.type,
       mbuilt_year,
       m.notes
FROM mill
WHERE m.type = 'Grondzeiler';': no such column: m.id


architecture:  35%|███▌      | 6/17 [00:21<00:41,  3.81s/it]

Query failed: Execution failed on sql '
SELECT a.name,
       a.nationality
FROM architect a
WHERE a.built_year >= 0;': no such column: a.built_year


architecture:  47%|████▋     | 8/17 [00:29<00:35,  3.93s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT "mill" type
FROM "mill"
WHERE "architect_id" IN ("bridge"
                         WHERE "architect_id" IN ("architect"
                                                  WHERE "architect_id" = 'American'
                                                    OR "architect_id" = 'Canadian'));': near "WHERE": syntax error


architecture:  53%|█████▎    | 9/17 [00:33<00:31,  3.94s/it]

Query failed: Execution failed on sql '
SELECT a.id,
       a.name
FROM architect a
JOIN bridge b ON a.id = b.architect_id
JOIN bridge b ON a.id = b.architect_id ... (
                                            more queries)': near ".": syntax error


architecture:  59%|█████▉    | 10/17 [00:37<00:28,  4.04s/it]

Query failed: Execution failed on sql '
SELECT a.id,
       a.name,
       a.nationality,
       b.mill_id,
       b.mill_name,
       b.mill_nationality
FROM architect a,
     bridge b
WHERE a.id = b.mill_id
  AND a.id =
    (SELECT MIN(b.mill_id)
     FROM bridge b);': no such column: b.mill_id


architecture:  65%|██████▍   | 11/17 [00:42<00:24,  4.16s/it]

Query failed: Execution failed on sql '
SELECT a.id,
       a.name,
       a.gender,
       b.id,
       b.name,
       b.gender,
       m.id,
       m.name
FROM arch
JOIN bridge ON arch.id = bridge.architect_id
JOIN mill ON arch.id = mill.architect_id
GROUP BY a.id,
         a.name,
         a.gender,
         b.id,
         b.name,
         b.gender,
         m.id,
         m.name
HAVING COUNT(bid) = 2
OR COUNT(m.id) = 1': no such table: arch


architecture:  76%|███████▋  | 13/17 [00:50<00:17,  4.29s/it]

Query failed: Execution failed on sql '
SELECT m.id,
       m.name
FROM mill m
WHERE m.name LIKE '%Moulin%'
  AND m.français == 'Oui'': no such column: m.français


architecture:  82%|████████▏ | 14/17 [00:54<00:12,  4.08s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT m.name
FROM mill m
WHERE m.architect_id IN
    (SELECT a.id
     FROM architect a
     WHERE a.name LIKE '%Bridge%';)': near ";": syntax error


architecture:  94%|█████████▍| 16/17 [01:02<00:04,  4.06s/it]

Query failed: Execution failed on sql '
SELECT a.id,
       a.name,
       a.nationality,
       a.gender,
       b.id,
       b.name,
       b.length_meters,
       b.length_feet,
       bbuilt_year,
       b.notes,
       m.name,
       m.type,
       m.location,
       mbuilt_year,
       m.notes,
       abuilt_year > 1850
FROM architect a
LEFT JOIN bridge b ON a.id = b.architect_id
LEFT JOIN mill m ON a.id = b.architect_id
WHERE a.id NOT IN
    (SELECT id
     FROM arch 'a'
     WHERE abuilt_year > 1850)': no such table: arch


architecture: 100%|██████████| 17/17 [01:06<00:00,  3.91s/it]


Query failed: Execution failed on sql '
SELECT b.name
FROM bridge b
WHERE b.architect_id IN
    (SELECT a.id
     FROM architect a
     WHERE a.nationality = 'American'
       AND a.name = 'architect');


SELECT b.name
FROM bridge b
WHERE b.architect_id IN
    (SELECT a.id
     FROM architect a
     WHERE a.nationality = 'American'
       AND a.name = 'architect');': You can only execute one statement at a time.
✅ Accuracy for architecture: 1/17 = 5.88%
🔍 Evaluating cinema (30 questions)


cinema:   3%|▎         | 1/30 [00:04<02:19,  4.81s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT
FROM cinema
WHERE Capacity > 800': near "FROM": syntax error


cinema:   7%|▋         | 2/30 [00:08<01:55,  4.12s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT t.Location
FROM cinema t
WHERE t.Cinema_ID IN
    (SELECT Cinema_ID
     FROM cinema
     WHERE Opening_year = 2010
       OR Opening_year = 2011);': no such column: Opening_year


cinema:  17%|█▋        | 5/30 [00:15<01:10,  2.81s/it]

Query failed: Execution failed on sql '-- Get all films

SELECT Film_ID,
       Number_in_season,
       Title
FROM film LEFT JOINSchedule ON film.Film_ID = Schedule.Film_ID;

-- Get all cinemas

SELECT CINema.Name,
       Opening_year,
       Capacity
FROM cinema LEFT JOINSchedule ON cinema.Cinema_ID = Schedule.Cinema_ID;

-- Get all films per cinema

SELECT Film_ID,
       Number_in_season,
       Title
FROM film LEFT JOINSchedule ON film.Film_ID = Schedule.Film_ID;': near "ON": syntax error


cinema:  20%|██        | 6/30 [00:18<01:09,  2.91s/it]

Query failed: Execution failed on sql '
SELECT C.Name,
       C.Location,
       F.Title,
       F.Directed_by
FROM cinema C
JOIN film F ON C.Film_ID = F.Film_ID
FROM film F
WHERE F.Capacity >
    (SELECT MAX(Capacity)
     FROM cinema C)': near "FROM": syntax error


cinema:  23%|██▎       | 7/30 [00:21<01:08,  2.97s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT
From (Location) AS [Location]
FROM cinema;': near "From": syntax error


cinema:  30%|███       | 9/30 [00:26<00:56,  2.69s/it]

Query failed: Execution failed on sql '-- Join cinema with film to get all films

SELECT c.Name,
       c.Opening_year
FROM cinema c
JOIN film f ON c.Film_ID = f.Film_ID -- Order the films by opening year descending
ORDER BY f.Opening_year DESC -- Get all films

SELECT c.Name,
       c.Opening_year
FROM cinema c
JOIN film f ON c.Film_ID = f.Film_ID': near "SELECT": syntax error


cinema:  33%|███▎      | 10/30 [00:29<00:55,  2.79s/it]

Query failed: Execution failed on sql '
SELECT Cinema.Name,
       C Cinema.Location,
       C Cinema.Capacity
FROM Cinema
WHERE Capacity =
    (SELECT MAX(Cinema.Capacity)
     FROM Cinema);': near ".": syntax error


cinema:  37%|███▋      | 11/30 [00:32<00:55,  2.91s/it]

Query failed: Execution failed on sql '
SELECT C.Film_ID,
       C.Name,
       C open_year,
       C Capacities,
       C Location,
         C open_year,
         C Capacities,
         C open_year,
         C Capacities,
         C open_year,
         C Capacities,
FROM cinema
LEFT JOIN schedule ON cinema.Cinema_ID = schedule.Cinema_ID
GROUP BY cinema.Cinema_ID
ORDER BY cinema open_year ASC': near "FROM": syntax error


cinema:  43%|████▎     | 13/30 [00:38<00:48,  2.86s/it]

Query failed: Execution failed on sql '
SELECT Location,
       COUNT(Cinema_ID) AS Rank
FROM cinema
LEFT JOIN schedule ON cinema.Cinema_ID = schedule.Cinema_ID
GROUP BY Location
ORDER BY Rank DESC
LIMIT 1': ambiguous column name: Cinema_ID


cinema:  47%|████▋     | 14/30 [00:40<00:45,  2.84s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT c.Location
FROM cinema c
JOIN schedule s ON s.Cinema_ID = c.Cinema_ID
JOIN schedule s ON s.Cinema_ID = c2.Cinema_ID ... (repeat
                                                   for all cinemas with capacity >300)': near ".": syntax error


cinema:  50%|█████     | 15/30 [00:43<00:42,  2.82s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT FromLocation
FROM film
WHERE film.Title LIKE '%Cinema%'
  AND film.Title LIKE '%Film%'
  AND film.Title LIKE '%Schedule%'
  AND film.Title LIKE '%Cinema%'
  AND film.Title LIKE '%Film%'
  AND film.Title LIKE '%Schedule%'
  AND film.Title LIKE '%Cinema%'
  AND film.Title LIKE '%Film%'
  AND film.Title LIKE '%Schedule%' Wait,
      that's not right. I think I'm confusing the table names. Let me try again.': near "Wait": syntax error


cinema:  60%|██████    | 18/30 [00:50<00:30,  2.56s/it]

Query failed: 'NoneType' object is not iterable


cinema:  63%|██████▎   | 19/30 [00:53<00:27,  2.48s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT
FROM film "Directed_by" WHERE ... [Need to fill the WHERE clause]': near "FROM": syntax error


cinema:  67%|██████▋   | 20/30 [00:54<00:22,  2.27s/it]

Query failed: Execution failed on sql '
SELECT D directed_by,
       COUNT(f Films directed by each director.': near "Films": syntax error


cinema:  73%|███████▎  | 22/30 [01:00<00:20,  2.57s/it]

Query failed: Execution failed on sql '
SELECT f.Title,
       f.Price,
       MAX(s.Price) as Max_Price
FROM film f
CROSS JOIN schedule s
WHERE f Films are in the schedule': near "Films": syntax error


cinema:  77%|███████▋  | 23/30 [01:03<00:18,  2.66s/it]

Query failed: Execution failed on sql '
SELECT f Title,
       s Price
FROM film f
Cross Join Schedule s
WHERE f Film_ID = s Cinema_ID
ORDER by f Rank_in_series': near "Film_ID": syntax error


cinema:  80%|████████  | 24/30 [01:07<00:17,  2.95s/it]

Query failed: 'NoneType' object is not iterable


cinema:  87%|████████▋ | 26/30 [01:14<00:12,  3.21s/it]

Query failed: Execution failed on sql '
SELECT "Director"
FROM "film"
WHERE "Rank_in_series" = (
  SELECT MAX("Number_in_season")
  FROM "schedule" WHERE "Film_ID" = "Film_ID"
  FROM "film" WHERE "Rank_in_series" = "Rank_in_series"
  AND "Film_ID" = "Film_ID"
  AND "Director" = "Director"
  AND "Film_ID" = "Film_ID"
  AND "Schedule_ID" = "Schedule_ID"
  AND "Film_ID" = "Film_ID"
  AND "Director" = "Director"
  AND "Film_ID" = "Film_ID"
  AND "Schedule_ID" = "Schedule_ID"
  AND "Film_ID" = "Film_ID"
  AND "Schedule_ID" = "Schedule_ID"
  AND "Film_ID" = "Film_ID"
  AND "Schedule_ID" = "Schedule_ID': near "FROM": syntax error


cinema:  93%|█████████▎| 28/30 [01:20<00:06,  3.20s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT c.Location
FROM cinema c
JOIN schedule s ON s.Cinema_ID = c.Cinema_ID
WHERE s.Film_ID IN (SELECTFilm_ID
                    From film
                    Where Number_in_season > 300)': near "From": syntax error


cinema: 100%|██████████| 30/30 [01:24<00:00,  2.81s/it]

✅ Accuracy for cinema: 1/30 = 3.33%

📊 Summary of Accuracy per Dataset:
 - department_management: 0/16 = 0.00%
 - farm: 3/40 = 7.50%
 - aircraft: 2/46 = 4.35%
 - architecture: 1/17 = 5.88%
 - cinema: 1/30 = 3.33%

🎯 Final Accuracy: 7/149 = 4.70%
Saved bad cases to bad_cases_deepseek-ai_DeepSeek-R1-Distill-Qwen-1.5B.json





# Performance on Never Seen Data

In [None]:
import torch
from datasets import load_dataset
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer

movie_1_prompt = """
### Task
Generate a SQLite query to answer [QUESTION]{question}[/QUESTION]

### Instructions
- If you cannot answer the question with the available database schema, return 'I do not know'
- Pay close attention to the given database schema, note on how they can join together
- You do not need to generate your thought process but just the answer
- Your answer should end with '[/SQL]'

create table Movie(
	mID int primary key, 
	title text, 
	year int, 
	director text
);
create table Reviewer(
	rID int primary key, 
	name text);

create table Rating(
	rID int, 
	mID int, 
	stars int, 
	ratingDate date,
	FOREIGN KEY (mID) references Movie(mID),
	FOREIGN KEY (rID) references Reviewer(rID)
);

### Answer
Given the database schema, here is the SQLite query that answers [QUESTION]{question}[/QUESTION]
[SQL]
""" 

# Store each prompt and its corresponding DB name
dbs = [
    ("movie_1", movie_1_prompt)
]

model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
cache_dir = "E:/Data File/transformers.cache"
tokenizer = AutoTokenizer.from_pretrained(model_name,cache_dir=cache_dir)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    torch_dtype=torch.float16,
    device_map="auto",
    use_cache=True,
    cache_dir=cache_dir
)

# Load Spider dataset
ds = load_dataset("spider")
db_ids = ["movie_1"]

data = []

for db_id in db_ids:
    subset = ds["train"].filter(lambda x: x["db_id"] == db_id)
    questions = [entry["question"] for entry in subset]
    queries = [entry["query"] for entry in subset]

    # Combine into (db_id, question, query) triplets
    entries = [{"db_id": db_id, "question": q, "query": sql} for q, sql in zip(questions, queries)]

    # Add to overall list
    data.extend(entries)

import sqlite3
import json
import pandas as pd
from pathlib import Path
from tqdm import tqdm
import sqlparse

# Path config
def get_db_path(db_id):
    base_dir = Path(r"C:\Users\zly20\OneDrive - The University of Western Ontario\1B\CS 9860 Advanced Machine Learning\Final Project\CS_9860_Final_Project\data")
    return str(base_dir / f"{db_id}.sqlite")

# Run SQL and return DataFrame
def run_query_on_db(db_path, query):
    try:
        with sqlite3.connect(db_path) as conn:
            result = pd.read_sql_query(query, conn)
        return result
    except Exception as e:
        print("Query failed:", e)
        return None

# Accumulators
correct = 0
total = 0
bad_cases = []

# Per-dataset tracking
correct_by_db = {}
total_by_db = {}

# Loop through each database and use corresponding prompt
for db_id, prompt_template in dbs:
    subset = [item for item in data if item["db_id"] == db_id]
    db_path = get_db_path(db_id)

    correct_local = 0
    total_local = 0

    def generate_query(question):
        prompt = prompt_template.format(question=question)
        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
        generated_ids = model.generate(
            **inputs,
            num_return_sequences=1,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id,
            max_new_tokens=168,
            do_sample=False,
            num_beams=1,
        )
        outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
        return sqlparse.format(outputs[0].split("[SQL]")[1].split("[/SQL]")[0], reindent=True)

    print(f"🔍 Evaluating {db_id} ({len(subset)} questions)")
    for item in tqdm(subset, desc=f"{db_id}"):
        question = item["question"]
        gold_query = item["query"]

        try:
            pred_query = generate_query(question)

            gold_result = run_query_on_db(db_path, gold_query)
            pred_result = run_query_on_db(db_path, pred_query)

            if gold_result is not None and pred_result is not None:
                if gold_result.equals(pred_result):
                    correct += 1
                    correct_local += 1
                else:
                    bad_cases.append({
                        "db_id": db_id,
                        "question": question,
                        "gold_query": gold_query,
                        "pred_query": pred_query,
                        "error_type": "Mismatch",
                        "gold_result": gold_result.to_string(index=False),
                        "pred_result": pred_result.to_string(index=False)
                    })
            else:
                bad_cases.append({
                    "db_id": db_id,
                    "question": question,
                    "gold_query": gold_query,
                    "pred_query": pred_query,
                    "error_type": "ExecutionError",
                    "gold_result": str(gold_result),
                    "pred_result": str(pred_result)
                })

            total += 1
            total_local += 1

        except Exception as e:
            bad_cases.append({
                "db_id": db_id,
                "question": question,
                "gold_query": gold_query,
                "pred_query": "N/A",
                "error_type": f"Exception: {str(e)}",
                "gold_result": "N/A",
                "pred_result": "N/A"
            })
            total += 1
            total_local += 1
            continue

    correct_by_db[db_id] = correct_local
    total_by_db[db_id] = total_local
    print(f"✅ Accuracy for {db_id}: {correct_local}/{total_local} = {correct_local / total_local:.2%}")

safe_model_name = model_name.replace("/", "_")
output_filename = f"bad_cases_{safe_model_name}_movie_1.json"

with open(output_filename, "w", encoding="utf-8") as f:
    json.dump({
        "model_name": "lora-finetuned_deepseek-1.5b",
        "final_accuracy": correct / total,
        "per_dataset_accuracy": {
            db_id: correct_by_db[db_id] / total_by_db[db_id] for db_id in correct_by_db
        },
        "bad_cases": bad_cases
    }, f, ensure_ascii=False, indent=2)

print("\n📊 Summary of Accuracy per Dataset:")
for db_id in correct_by_db:
    print(f" - {db_id}: {correct_by_db[db_id]}/{total_by_db[db_id]} = {correct_by_db[db_id] / total_by_db[db_id]:.2%}")

print(f"\n🎯 Final Accuracy: {correct}/{total} = {correct / total:.2%}")
print(f"Saved bad cases to {output_filename}")

🔍 Evaluating movie_1 (98 questions)


movie_1:   8%|▊         | 8/98 [00:25<05:05,  3.39s/it]

Query failed: Execution failed on sql '
SELECT d.title,
       d.year,
       d.director
FROM Movie m
JOIN Reviewer r ON m.rID = r.rID
JOIN Rating g ON g.rID = m.rID
WHERE m.mID = 2': no such column: d.title


movie_1:  19%|█▉        | 19/98 [00:59<04:07,  3.14s/it]

Query failed: Execution failed on sql '-- Your answer

SELECT year
FROM
  (SELECT mID,
          year
   FROM Movie) AS t1 JOINRating ON Rating.rID = t1.mID
SELECT year
FROM t1': near "JOINRating": syntax error


movie_1:  20%|██        | 20/98 [01:02<04:01,  3.10s/it]

Query failed: Execution failed on sql '
SELECT m.year AS movie_year,
       r.year AS reviewer_year
FROM Movie m
Join Reviewer r ON m.mID = r.rID
WHERE m.stars = 4
  OR m.stars = 5': no such column: r.year


movie_1:  21%|██▏       | 21/98 [01:06<04:22,  3.41s/it]

Query failed: Execution failed on sql '
SELECT d.title,
       m.title
FROM Movie m,
     Director d
WHERE m.mID = d.mID
  AND d.stars = 5': no such table: Director


movie_1:  22%|██▏       | 22/98 [01:10<04:23,  3.47s/it]

Query failed: Execution failed on sql '
SELECT d.name,
       m.title
FROM Movie m
JOIN Reviewer r ON m.mID = r.rID
JOIN Rating rating ON rating.mID = m.mID
WHERE rating.stars = 5;': no such column: d.name


movie_1:  23%|██▎       | 23/98 [01:13<04:07,  3.30s/it]

Query failed: Execution failed on sql '
SELECT r.name,
       AVG(r.stars) AS avg_stars
FROM Reviewer r
JOIN Rating rating ON r.rID = rating.rID
GROUP BY r.name;': no such column: r.stars


movie_1:  24%|██▍       | 24/98 [01:16<03:56,  3.19s/it]

Query failed: Execution failed on sql '
SELECT AVG(stars)
FROM Reviewer': no such column: stars


movie_1:  26%|██▌       | 25/98 [01:18<03:47,  3.12s/it]

Query failed: 'NoneType' object is not iterable


movie_1:  27%|██▋       | 26/98 [01:22<03:52,  3.24s/it]

Query failed: Execution failed on sql '
SELECT title
FROM Movie
WHERE
    (SELECT mID
     FROM Rating
     WHERE rating stars = 0) = mID;': near "stars": syntax error


movie_1:  28%|██▊       | 27/98 [01:25<03:40,  3.11s/it]

Query failed: Execution failed on sql '
SELECT name
FROM Reviewer
WHERE ratingDate IS NULL;': no such column: ratingDate


movie_1:  30%|██▉       | 29/98 [01:31<03:30,  3.06s/it]

Query failed: Execution failed on sql '
SELECT avg(stars) as average_rating,
       title as average_title
FROM Movie
WHERE year =
    (SELECT MAX(year)
     FROM Movie);': no such column: stars


movie_1:  31%|███       | 30/98 [01:34<03:37,  3.20s/it]

Query failed: Execution failed on sql '
SELECT m.title,
       AVG(rating.rating) AS average_rating
FROM Movie m
JOIN Reviewer r ON m.mID = r.rID
JOIN Rating rating ON r.rID = rating.rID': no such column: rating.rating


movie_1:  32%|███▏      | 31/98 [01:38<03:33,  3.19s/it]

Query failed: Execution failed on sql '
SELECT m.title
FROM Movie
WHERE year =
    (SELECT MAX(year)
     FROM Movie);': no such column: m.title


movie_1:  34%|███▎      | 33/98 [01:45<03:50,  3.55s/it]

Query failed: Execution failed on sql '
SELECT MAX(stars),
       MAX(year)
FROM Movie
WHERE (Rating.stars >Rating.year
       OR Rating.year IS NULL);': no such column: stars


movie_1:  35%|███▍      | 34/98 [01:49<03:50,  3.60s/it]

Query failed: Execution failed on sql '
SELECT mID,
       year,
       ratingDate
FROM Movie
WHERE ratingDate = (MAX(year) OVER (
                                    ORDER BY mID ASC))
  AND year = (MAX(year) OVER (
                              ORDER BY mID ASC))': no such column: ratingDate


movie_1:  36%|███▌      | 35/98 [01:52<03:33,  3.38s/it]

Query failed: Execution failed on sql '
SELECT m.title
FROM Movie m
WHERE m.year >
    (SELECT MAX(year)
     FROM Rating r
     WHERE r.director = 'Sergei Spielberg'
       AND r.year > m.year);': no such column: r.director


movie_1:  38%|███▊      | 37/98 [01:59<03:30,  3.45s/it]

Query failed: Execution failed on sql '
SELECT title,
       director
FROM Movie
WHERE
    (SELECT COUNT(*)
     FROM Rating) >
    (SELECT AVG(stars)
     FROM
       (SELECT mID
        FROM Movie
        WHERE director = 'James Cameron'))': no such column: stars


movie_1:  40%|███▉      | 39/98 [02:07<03:50,  3.91s/it]

Query failed: 'NoneType' object is not iterable




Query failed: Execution failed on sql '-- The query joins the Reviewer table with the Rating table to get all ratings by each reviewer.
-- It then groups the ratings by the Reviewer's name and counts how many ratings each has.
-- Finally, it selects the names of Reviewer where the count is three or more.

SELECT r.name
FROM Reviewer r
JOIN Rating rating ON r.rID = rating.rID
GROUP BY r.name
HAVING COUNT(r.stars) >= 3': no such column: r.stars


movie_1:  44%|████▍     | 43/98 [02:19<03:00,  3.29s/it]

Query failed: Execution failed on sql '
SELECT name
FROM Reviewer
WHERE ratingDate >= 'Gone with the Wind';': no such column: ratingDate


movie_1:  45%|████▍     | 44/98 [02:23<02:59,  3.32s/it]

Query failed: Execution failed on sql '
SELECT DISTINCT name
FROM Reviewer
WHERE mID =
    (SELECT mID
     FROM Movie
     WHERE title = 'Gone with the Wind'`);': unrecognized token: "`);"


movie_1:  46%|████▌     | 45/98 [02:26<03:01,  3.43s/it]

Query failed: Execution failed on sql '
SELECT d.name
FROM Director d
JOIN Reviewer r ON r.name = d.name
JOIN Rating rating ON rating.rID = rID
WHERE rating.stars = 2': no such table: Director


movie_1:  47%|████▋     | 46/98 [02:30<02:53,  3.33s/it]

Query failed: Execution failed on sql '
SELECT d.name
FROM Director d
JOIN Reviewer r ON r.name = d.name
JOIN Rating g ON g.rID = r.rID
WHERE g.stars = 2': no such table: Director


movie_1:  49%|████▉     | 48/98 [02:35<02:38,  3.17s/it]

Query failed: Execution failed on sql '
SELECT m.title,
       r.name,
       stars,
       r.director
FROM Movie m
JOIN Reviewer r ON r.name = r.rID
JOIN Rating rating ON rating.mID = mID
WHERE r.name = r.director
  AND rating.stars =
    (SELECT MAX(stars)
     FROM Rating
     WHERE mID = mID
       AND rID = rID);': no such column: r.director


movie_1:  50%|█████     | 49/98 [02:39<02:35,  3.18s/it]

Query failed: Execution failed on sql '-- Get all movies and join with all reviewers

SELECT m.title,
       r.name
FROM Movie m
Cross Join Reviewer r -- Get all movies and join with all reviewers

SELECT m.title,
       r.name
FROM Movie m
Join Reviewer r': near "SELECT": syntax error




Query failed: Execution failed on sql '
SELECT r.name,
       m.title
FROM Reviewer r
LEFT JOIN Movie m ON r.mID = m.mID': no such column: r.mID


movie_1:  52%|█████▏    | 51/98 [02:47<02:50,  3.63s/it]

Query failed: 'NoneType' object is not iterable


movie_1:  53%|█████▎    | 52/98 [02:50<02:45,  3.60s/it]

Query failed: Execution failed on sql '
SELECT title
FROM Movie
WHERE
    (SELECT mID
     FROM Reviewer
     WHERE name = 'Chris Jackson') NULL
    (SELECT mID
     FROM Rating
     WHERE rID IN
         (SELECT mID
          FROM Reviewer
          WHERE name = 'Chris Jackson')
       AND rating = 0)': near "NULL": syntax error


movie_1:  55%|█████▌    | 54/98 [02:57<02:26,  3.34s/it]

Query failed: Execution failed on sql '
SELECT m.title,
       m.year,
       m.name
FROM Movie m
WHERE m.director IN
    (SELECT mID
     FROM Rating
     WHERE mID > 1);': no such column: m.name


movie_1:  57%|█████▋    | 56/98 [03:05<02:37,  3.75s/it]

Query failed: Execution failed on sql '
SELECT m.title,
       m.year,
       m.date,
       r.name as director,
       r.year as dir_year,
       r.stars as dir_stars
FROM Movie m
JOIN Reviewer r ON m.rID = r.rID
WHERE m.rID > 1': no such column: m.date


movie_1:  58%|█████▊    | 57/98 [03:08<02:23,  3.49s/it]

Query failed: Execution failed on sql '
SELECT d.name
FROM Director d
JOIN Reviewer r ON d.mID = r.rID
JOIN Rating g ON g.rID = r.rID
WHERE g.mID = d.mID
  AND COUNT(g.rID) = 1;': no such table: Director


movie_1:  59%|█████▉    | 58/98 [03:11<02:12,  3.31s/it]

Query failed: Execution failed on sql '
SELECT d.name
FROM Director d
JOIN Reviewer r ON d.mID = r.rID
JOIN Rating g ON g.rID = r.rID
WHERE d.mID =
    (SELECT m.MID
     FROM Movie m);': no such table: Director


movie_1:  60%|██████    | 59/98 [03:14<02:09,  3.31s/it]

Query failed: Execution failed on sql '
SELECT d.name
FROM Director d
JOIN Reviewer r ON d.rID = r.rID
JOIN Rating g ON g.rID = d.rID
WHERE d.mID = 1
  AND d.rID = NULL
  AND d.name LIKE '% NULL' '%'': near "'%'": syntax error


movie_1:  61%|██████    | 60/98 [03:17<02:03,  3.24s/it]

Query failed: Execution failed on sql '
SELECT d.name
FROM Director d
JOIN Reviewer r ON d.rID = r.rID
JOIN Rating g ON g.rID = r.rID
WHERE d.name IS NULL
  OR d.direcName IS NULL': no such table: Director


movie_1:  63%|██████▎   | 62/98 [03:26<02:20,  3.90s/it]

Query failed: Execution failed on sql '
SELECT d.name,
       COUNT(r.ratingDate) as review_count
FROM Director d
JOIN Reviewer r ON d.name = r.name
GROUP BY d.name': no such table: Director


movie_1:  64%|██████▍   | 63/98 [03:29<02:06,  3.62s/it]

Query failed: 'NoneType' object is not iterable


movie_1:  65%|██████▌   | 64/98 [03:32<01:59,  3.52s/it]

Query failed: Execution failed on sql '
SELECT m.title,
       AVG(ratingRating) AS avgRating
FROM Movie m
LEFT JOIN Reviewer r ON m.mID = r.mID
LEFT JOIN Rating ratingRating ON r.rID = m.rID
GROUP BY m.title
ORDER BY avgRating DESC': no such column: ratingRating


movie_1:  66%|██████▋   | 65/98 [03:35<01:49,  3.32s/it]

Query failed: Execution failed on sql '
SELECT m.title,
       avg(rating) as avg_rating
FROM Movie m
LEFT JOIN Reviewer r ON m.mID = r.mID
LEFT JOIN Rating rating ON r.rID = m.mID
GROUP BY m.mID
ORDER BY avg_rating ASC': no such column: rating


movie_1:  67%|██████▋   | 66/98 [03:39<01:50,  3.46s/it]

Query failed: Execution failed on sql '
SELECT m.title,
       avg(rating) as avg_rating
FROM Movie m
LEFT JOIN Reviewer r ON m.mID = r.mID
LEFT JOIN Rating rating ON r.mID = rating.mID
GROUP BY m.mID
ORDER BY (avg_rating) ASC': no such column: rating


movie_1:  68%|██████▊   | 67/98 [03:43<01:55,  3.74s/it]

Query failed: Execution failed on sql '
SELECT m.title,
       m.year,
       R.name,
       R.year
FROM Movie m,
     Reviewer R': no such column: R.year


movie_1:  69%|██████▉   | 68/98 [03:46<01:43,  3.44s/it]

Query failed: Execution failed on sql '
SELECT m.title,
       m.year,
       R.ratingDate
FROM Movie m
JOIN Reviewer R ON R.name = m.name
JOIN RatingRating R2 ON R2.rID = R.rID
AND R2.mID = m.mID
SELECT TOP 3
FROM RatingRating': near "SELECT": syntax error


movie_1:  70%|███████   | 69/98 [03:49<01:37,  3.36s/it]

Query failed: Execution failed on sql '
SELECT d.name,
       d.title,

  (SELECT MAX(stars)
   FROM Rating
   WHERE mID = d.mID) AS max_rating
FROM Director d
LEFT JOIN Rating r ON d.mID = r.mID
GROUP BY d.name,
         d.mID
ORDER BY max_rating DESC': no such table: Director


movie_1:  71%|███████▏  | 70/98 [03:52<01:29,  3.19s/it]

Query failed: Execution failed on sql '
SELECT d.name,
       d.title,
       r.stars,
       r.ratingDate,
       COUNT(rID) as num_reviews
FROM Reviewer d
JOIN Reviewer r ON r.rID = d.rID
JOIN Rating ON r.rID = r.mID
GROUP BY d.name,
         d.title
ORDER BY d.name,
         d.title;': no such column: d.title


movie_1:  72%|███████▏  | 71/98 [03:55<01:24,  3.11s/it]

Query failed: 'NoneType' object is not iterable


movie_1:  73%|███████▎  | 72/98 [03:59<01:27,  3.36s/it]

Query failed: Execution failed on sql '
SELECT R.name,
       R.rating,
  (SELECT MIN(R.stars) as min_rating
   FROM Rating R
   WHERE R.stars =
       (SELECT MIN(R.stars) as min_rating
        FROM Rating R
        WHERE R.stars = R.stars))
FROM Reviewer R': no such column: R.rating




Query failed: Execution failed on sql '
SELECT title,
       rating
FROM Movie
WHERE
    (SELECT mID
     FROM Reviewer
     WHERE name =
         (SELECT name
          FROM Reviewer m
          WHERE mID =
              (SELECT mID
               FROM Movie m
               WHERE director = '...')))
  AND rating =
    (SELECT stars
     FROM Rating
     WHERE mID =
         (SELECT mID
          FROM Movie m
          WHERE director = '...'))': no such column: rating


movie_1:  76%|███████▌  | 74/98 [04:06<01:20,  3.35s/it]

Query failed: Execution failed on sql '
SELECT d.title,
       d.stars,
       R.stars AS rating
FROM Director d
LEFT JOIN Reviewer r ON d.director = r.name
LEFT JOIN Rating rating ON rating.mID = d.mID
WHERE d.stars =
    (SELECT MAX(stars)
     FROM Rating rating)': no such table: Director


movie_1:  77%|███████▋  | 75/98 [04:09<01:16,  3.33s/it]

Query failed: Execution failed on sql '
SELECT m.title
FROM Movie
WHERE m.stars =
    (SELECT MAX(m.stars)
     FROM Movie)': no such column: m.title


movie_1:  79%|███████▊  | 77/98 [04:16<01:08,  3.25s/it]

Query failed: Execution failed on sql '
SELECT title
FROM Movie
WHERE rating.stars > 3
  AND rating.stars < 5;': no such column: rating.stars


movie_1:  80%|███████▉  | 78/98 [04:19<01:08,  3.42s/it]

Query failed: Execution failed on sql '
SELECT title
FROM Movie
WHERE rating.stars >= 3
  AND rating.stars <= 5;': no such column: rating.stars


movie_1:  81%|████████  | 79/98 [04:24<01:10,  3.69s/it]

Query failed: 'NoneType' object is not iterable


movie_1:  82%|████████▏ | 80/98 [04:27<01:03,  3.54s/it]

Query failed: Execution failed on sql '
SELECT r.name
FROM Reviewer r
WHERE r.rID IN
    (SELECT mID
     FROM Movie m
     WHERE m.stars > 3);': no such column: m.stars


movie_1:  83%|████████▎ | 81/98 [04:30<00:55,  3.28s/it]

Query failed: Execution failed on sql '
SELECT m.title,
       AVG(r.stars) AS avg_rating
FROM Movie m
LEFT JOIN Reviewer r ON r.mID = m.mID
WHERE m.title LIKE '%not reviewed by Brittany Harris%' -- Note: The above query may not be accurate due to potential issues with the database schema.
': no such column: r.stars


movie_1:  84%|████████▎ | 82/98 [04:32<00:50,  3.15s/it]

Query failed: Execution failed on sql '
SELECT m.title,
       AVG(ratingRate) AS avgRating
FROM Movie m
LEFT JOIN Reviewer r ON m.mID = r.mID
WHERE r.name = 'Brittany Harris'
GROUP BY m.title': no such column: ratingRate


movie_1:  85%|████████▍ | 83/98 [04:36<00:46,  3.12s/it]

Query failed: Execution failed on sql '-- The query first selects the movie IDs where the reviewer is Brittany Harris and the movie is not reviewed by her.

SELECT mID
FROM Movie
WHERE rID =
    (SELECT rID
     FROM Reviewer
     WHERE name = 'Brittany Harris'
       AND mID = mID) -- The query then joins the Movie and Reviewer tables to get the movie IDs that are not reviewed by Brittany Harris.
': no such column: rID


movie_1:  87%|████████▋ | 85/98 [04:44<00:47,  3.63s/it]

Query failed: 'NoneType' object is not iterable


movie_1:  88%|████████▊ | 86/98 [04:47<00:40,  3.39s/it]

Query failed: Execution failed on sql '
SELECT m.title,
       COUNT(rID) as review_count
FROM Movie m
JOIN Reviewer r ON m.mID = r.rID
JOIN Rating rating ON rating.mID = rID
WHERE m.mID IN
    (SELECT mID
     FROM Movie m
     WHERE dID > 3);': ambiguous column name: rID


movie_1:  89%|████████▉ | 87/98 [04:49<00:33,  3.05s/it]

Query failed: Execution failed on sql '-- The query selects the RID from the Reviewer table where the stars are not equal to 4.

SELECT r.rID
FROM Reviewer r
WHERE r.stars != 4;': no such column: r.stars


movie_1:  90%|████████▉ | 88/98 [04:52<00:29,  2.94s/it]

Query failed: Execution failed on sql '
SELECT R.rID
FROM Reviewer R
WHERE R.stars != 4;': no such column: R.stars


movie_1:  91%|█████████ | 89/98 [04:55<00:27,  3.04s/it]

Query failed: Execution failed on sql '-- The query selects the RID from the Reviewer table where the stars are not equal to 4.

SELECT rID
FROM Reviewer
WHERE stars != 4;': no such column: stars


movie_1:  93%|█████████▎| 91/98 [05:03<00:23,  3.41s/it]

Query failed: Execution failed on sql '
SELECT title
FROM Movie
WHERE year > 2000
  OR
    (SELECT name
     FROM Reviewer
     WHERE name = 'Brittany Harris'
       AND EXISTS(Rating,
                    (SELECT mID
                     FROM Movie
                     WHERE year > 2000), 1));': near "Rating": syntax error


movie_1:  94%|█████████▍| 92/98 [05:05<00:19,  3.24s/it]

Query failed: Execution failed on sql '
SELECT title
FROM Movie
WHERE year > 2000
  AND name LIKE '% Brittany Harris %';': no such column: name


movie_1:  97%|█████████▋| 95/98 [05:16<00:10,  3.47s/it]

Query failed: Execution failed on sql '
SELECT r.name
FROM Reviewer r
WHERE r.rID IN
    (SELECT mID
     FROM Movie
     WHERE stars = 3);


SELECT r.name
FROM Reviewer r
WHERE r.rID IN
    (SELECT mID
     FROM Movie
     WHERE stars = 4);': no such column: stars


movie_1:  99%|█████████▉| 97/98 [05:24<00:03,  3.59s/it]

Query failed: Execution failed on sql '
SELECT m.title
FROM Movie
WHERE m.stars = 3
  OR m.stars = 4;': no such column: m.title


movie_1: 100%|██████████| 98/98 [05:27<00:00,  3.34s/it]

Query failed: Execution failed on sql '
SELECT m.title
FROM Movie
WHERE m.year >= 2000
  AND m.year <= 2023
  AND m.stars = 3
  OR m.stars = 4;': no such column: m.title
✅ Accuracy for movie_1: 17/98 = 17.35%

📊 Summary of Accuracy per Dataset:
 - movie_1: 17/98 = 17.35%

🎯 Final Accuracy: 17/98 = 17.35%
Saved bad cases to bad_cases_deepseek-ai_DeepSeek-R1-Distill-Qwen-1.5B_movie_1.json



