Installing the required packages

In [None]:
import torch

# 1. Manually install the missing dependency from Git first
print("--- Fixing Dependency Mismatch (Installing unsloth-zoo) ---")
!pip install --upgrade "git+https://github.com/unslothai/unsloth-zoo.git"

# 2. Now install Unsloth (It should find the dependency now)
print("\n--- Installing Unsloth ---")
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

# 3. Re-verify dependencies (Just to be safe)
print("\n--- Verifying Dependencies ---")
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes

print("\n‚úÖ DONE. Please RESTART your session now (Runtime > Restart Session).")

--- Fixing Dependency Mismatch (Installing unsloth-zoo) ---
Collecting git+https://github.com/unslothai/unsloth-zoo.git
  Cloning https://github.com/unslothai/unsloth-zoo.git to /tmp/pip-req-build-iy1t3yir
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth-zoo.git /tmp/pip-req-build-iy1t3yir
  Resolved https://github.com/unslothai/unsloth-zoo.git to commit ba585aff2b3f80594497171c8d6d216f921cdb8d
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting torchao>=0.13.0 (from unsloth_zoo==2026.2.1)
  Downloading torchao-0.15.0-cp310-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (22 kB)
Collecting tyro (from unsloth_zoo==2026.2.1)
  Downloading tyro-1.0.6-py3-none-any.whl.metadata (12 kB)
Collecting transformers!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1,!=4.57.4,!=4.57.5,<=4.57.6,>

1.   Loading the fine-tuned model from the drive
2.   Loading the test data (dev.json) and schema (collections.json)
1.   Generating the predictions for test instances  List item

In [None]:
from unsloth import FastLanguageModel
from unsloth.chat_templates import get_chat_template
from google.colab import drive
import torch
import json
import os
from tqdm import tqdm

# 1. Mount Drive
drive.mount('/content/drive')

# --- CONFIGURATION ---
# Path to your fine-tuned Qwen checkpoint
model_path = "/content/drive/MyDrive/QwenTraining_Run1/checkpoint-253"
# Output file in Drive (Safe from crashes)
output_file = "/content/drive/MyDrive/Qwen_Predictions.json"

# Input Data files (Upload these to Colab Files)
dev_file_path = "dev.json"
tables_file_path = "collections.json" # Optional but recommended for schema lookup

save_every = 10  # Save progress every 10 questions
# ---------------------

# 2. Load Schemas (Robustness Step)
# This ensures we always have the schema even if dev.json misses it
schema_map = {}
if os.path.exists(tables_file_path):
    print(f"‚úÖ Loading schemas from: {tables_file_path}")
    with open(tables_file_path, 'r') as f:
        tables_data = json.load(f)
        for db in tables_data:
            schema_map[db['db_id']] = db.get('schema', db)
else:
    print(f"‚ö†Ô∏è WARNING: {tables_file_path} not found. Relying solely on dev.json schemas.")

# 3. Load Qwen Model
print(f"üîÑ Loading model from: {model_path}")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_path,
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True,
)
FastLanguageModel.for_inference(model)
print("‚úÖ Model loaded & optimized!")

# 4. Load Test Questions
if not os.path.exists(dev_file_path):
     raise FileNotFoundError(f"‚ùå Please upload '{dev_file_path}' to Colab Files!")

with open(dev_file_path, 'r') as f:
    test_data = json.load(f)

# --- AUTO-RESUME LOGIC ---
start_index = 0
generated_results = []

if os.path.exists(output_file):
    try:
        with open(output_file, 'r') as f:
            generated_results = json.load(f)
            start_index = len(generated_results)
            print(f"üîÑ Found progress in Drive! Resuming from question #{start_index}...")
    except:
        print("‚ö†Ô∏è Output file exists but unreadable. Starting fresh.")
else:
    print("üÜï No previous progress found. Starting from scratch.")

print(f"üöÄ Starting generation for {len(test_data) - start_index} remaining examples...")

# 5. Main Loop
# Use Qwen Chat Template
tokenizer = get_chat_template(tokenizer, chat_template = "qwen-2.5")

for i, entry in enumerate(tqdm(test_data[start_index:])):
    current_idx = start_index + i

    question = entry.get('question', '')
    db_id = entry.get('db_id', '')
    q_id = entry.get('question_id', 'N/A')

    # Get Schema (Try entry first, then fallback to schema_map)
    schema_info = entry.get('schema', entry.get('struct_in', ''))
    if not schema_info and db_id in schema_map:
        schema_info = schema_map[db_id]

    schema_str = json.dumps(schema_info, indent=2) if isinstance(schema_info, dict) else str(schema_info)

    # --- Construct Prompt (Qwen Style) ---
    # Note: We do NOT add "Task:..." here to match the training data format exactly
    messages = [
        {"role": "user", "content": f"Schema: {schema_str}\nQuestion: {question}"}
    ]

    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize = True,
        add_generation_prompt = True, # Adds <|im_start|>assistant
        return_tensors = "pt",
    ).to("cuda")

    # --- Generate ---
    outputs = model.generate(
        input_ids = inputs,
        max_new_tokens = 512,
        use_cache = True,
        temperature = 0.1,
        min_p = 0.1,
    )

    # --- Parse Output ---
    response_text = tokenizer.batch_decode(outputs)[0]

    # Extract only the new assistant content
    # Qwen format: ... <|im_start|>assistant\n [CONTENT] <|im_end|>
    try:
        if "<|im_start|>assistant" in response_text:
            generated_part = response_text.split("<|im_start|>assistant")[-1]
            generated_part = generated_part.replace("<|im_end|>", "").strip()
        else:
            generated_part = response_text # Fallback

        # Separate Reasoning from MQL
        # Expected format: [REASONING] ... [/REASONING] [MQL] ...
        if "[MQL]" in generated_part:
            parts = generated_part.split("[MQL]")
            reasoning = parts[0].replace("[REASONING]", "").replace("[/REASONING]", "").strip()
            mql_query = parts[1].strip()
        else:
            reasoning = generated_part
            mql_query = "ERROR_PARSING_TAGS"

    except Exception as e:
        reasoning = f"ERROR: {str(e)}"
        mql_query = "ERROR_PARSING"

    # Append Result
    generated_results.append({
        "question_id": q_id,
        "question": question,
        "gold_mql": entry.get('query', ''),
        "generated_mql": mql_query,
        "reasoning": reasoning,
        "db_id": db_id,
        "full_response": generated_part # Kept for debugging
    })

    # --- Save to Drive (Checkpoint) ---
    if len(generated_results) % save_every == 0:
        with open(output_file, "w") as f:
            json.dump(generated_results, f, indent=2)

# Final Save
with open(output_file, "w") as f:
    json.dump(generated_results, f, indent=2)

print(f"\n‚úÖ SUCCESS! All results saved to: {output_file}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
‚úÖ Loading schemas from: /content/collections.json
üîÑ Loading model from: /content/drive/MyDrive/QwenTraining_Run1/checkpoint-253
==((====))==  Unsloth 2026.2.1: Fast Qwen2 patching. Transformers: 4.57.6.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
‚úÖ Model loaded & optimized!
üÜï No previous progress found. Starting from scratch.
üöÄ Starting generation for 620 remaining examples...


  0%|          | 0/620 [00:00<?, ?it/s]The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 620/620 [3:56:22<00:00, 22.87s/it]


‚úÖ SUCCESS! All results saved to: /content/drive/MyDrive/Qwen_Predictions.json



