# Fine-tune Recommender LLM with LoRA/PEFT

This notebook fine-tunes a small chat model to produce grounded, JSON-formatted recommendations using synthetic SFT data derived from our reviews + retrieval. Artifacts are saved to `models/rag_llm/` and can be used by the merged `finetune_rag_llm.ipynb` notebook.

## 🚀 **Clean Workflow - Run Steps in Order:**

1. **Step 0** - Setup & retrieval artifacts
2. **Step 1** - Configuration + Convert training data to chat format
3. **Step 2** - Build synthetic SFT dataset  
4. **Step 3** - Tokenizer/model setup
5. **Step 4** - **Fine-tune model** ⚠️ (6-8 hours)
6. **Step 5** - Save final model
7. **Step 6** - **Test the model** ✅

## ⚠️ **Important Notes:**
- **Step 4** fine-tunes the model to produce working JSON responses
- **Step 6** tests the fine-tuned model
- All training cells have been cleaned up for clarity
- Model saves to `models/rag_llm/` directory


### Step 0 — Ensure retrieval artifacts exist

- Builds FAISS index and metadata if missing (uses the same pipeline as the merged notebook)
- Required for synthesizing SFT data from retrieval context


In [60]:
from pathlib import Path
import json
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer # type: ignore

try:
    import faiss  # type: ignore
except ImportError:
    raise SystemExit("faiss is required. Install with `pip install faiss-cpu` on Windows.")

import pyarrow.parquet as pq
from tqdm import tqdm

from huggingface_hub import snapshot_download
EMBED_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
EMBED_MODEL_DIR = snapshot_download(repo_id=EMBED_MODEL)
embedder = SentenceTransformer(EMBED_MODEL_DIR)

DATA_PATH = Path('data/processed/reviews_with_stars.csv')
PROJECT_ROOT = Path.cwd() if (Path.cwd() / 'data').exists() else Path.cwd().parent
INDEX_DIR = PROJECT_ROOT / 'models' / 'rag_llm' / 'step_0'
INDEX_DIR.mkdir(parents=True, exist_ok=True)
INDEX_PATH = INDEX_DIR / 'reviews_all-MiniLM-L6-v2.index'
METADATA_PATH = PROJECT_ROOT / 'data' / 'rag_llm' / 'processed' / 'review_metadata.parquet'
METADATA_PATH.parent.mkdir(parents=True, exist_ok=True)
MANIFEST_PATH = INDEX_DIR / 'manifest.json'
MODEL_NAME = 'sentence-transformers/all-MiniLM-L6-v2'
BATCH_SIZE = 256

embedder = SentenceTransformer(MODEL_NAME)
embedding_dim = embedder.get_sentence_embedding_dimension()


def build_index(force_rebuild: bool = False):
    needs_build = force_rebuild or not (INDEX_PATH.exists() and METADATA_PATH.exists() and MANIFEST_PATH.exists())
    if not needs_build:
        print('Index and metadata already exist. Skipping build.')
        return
    # Locate project base (folder containing data/processed), then pick labeled file
    def _find_base_dir(start: Path) -> Path:
        if (start / 'data' / 'generate_stars' / 'processed').exists():
            return start
        for parent in start.parents:
            if (parent / 'data' / 'generate_stars' / 'processed').exists():
                return parent
        return start

    BASE_DIR = _find_base_dir(Path.cwd())
    candidates = [
        BASE_DIR / 'data' / 'generate_stars' / 'processed' / 'reviews_with_stars.csv',
        BASE_DIR / 'data' / 'generate_stars' / 'processed' / 'reviews_with_stars_trained.csv',
    ]
    data_path = next((p for p in candidates if p.exists()), None)
    assert data_path is not None, f"Missing labeled data under {BASE_DIR / 'data' / 'generate_stars' / 'processed'}. Run generate_stars.ipynb first."
    print('Using labeled data at:', data_path)

    df = pd.read_csv(data_path)

    # Ensure stars_float exists (prefer float ratings). Derive from integer 'stars' if needed
    if 'stars_float' not in df.columns:
        if 'stars' in df.columns:
            df['stars_float'] = pd.to_numeric(df['stars'], errors='coerce').astype(float)
        else:
            raise ValueError("No 'stars_float' or 'stars' column in labeled data.")

    # Minimal schema check for remaining fields
    req = ['source', 'place', 'comment']
    missing = [c for c in req if c not in df.columns]
    if missing:
        raise ValueError(f"Labeled data missing columns: {missing}")
    df = df.dropna(subset=['comment']).copy()
    df['comment'] = df['comment'].astype(str).str.strip()
    df = df[df['comment'].str.len() > 0].reset_index(drop=True)

    texts = df['comment'].tolist()
    n = len(texts)
    embeddings = np.empty((n, embedding_dim), dtype='float32')
    for start in tqdm(range(0, n, BATCH_SIZE), total=(n + BATCH_SIZE - 1)//BATCH_SIZE, desc="Embedding"):
        end = min(start + BATCH_SIZE, n)
        batch = texts[start:end]
        emb = embedder.encode(batch, batch_size=64, show_progress_bar=False, convert_to_numpy=True, normalize_embeddings=True)
        embeddings[start:end] = emb

    index = faiss.IndexFlatIP(embedding_dim)
    index.add(embeddings)
    faiss.write_index(index, str(INDEX_PATH))

    metadata = df[['source', 'place', 'comment', 'stars_float']].copy()
    metadata.insert(0, 'row_id', np.arange(len(metadata), dtype=np.int64))
    METADATA_PATH.parent.mkdir(parents=True, exist_ok=True)
    metadata.to_parquet(METADATA_PATH, index=False)

    manifest = {
        'model': MODEL_NAME,
        'embedding_dim': int(embedding_dim),
        'index_type': 'IndexFlatIP',
        'index_path': str(INDEX_PATH),
        'metadata_path': str(METADATA_PATH),
        'num_vectors': int(index.ntotal)
    }
    with open(MANIFEST_PATH, 'w', encoding='utf-8') as f:
        json.dump(manifest, f, ensure_ascii=False, indent=2)
    print('Built index and metadata.')


# Ensure ready
build_index(force_rebuild=False)



Using labeled data at: c:\Users\TARIK\Desktop\Charles Darwin University\4 - Year 1 - Semester 2\IT CODE FAIR\Data Science Challenge\data\generate_stars\processed\reviews_with_stars.csv


Embedding: 100%|██████████| 38/38 [03:59<00:00,  6.30s/it]

Built index and metadata.





### Step 1 — Setup & configuration

- Choose a small instruction model (CPU-friendly)
- Define output directory `models/rag_llm/`
- Reuse allowed places + retrieval to synthesize SFT data


In [61]:
import os
import json
from pathlib import Path
import random

import numpy as np
import pandas as pd

from sentence_transformers import SentenceTransformer
try:
    import faiss  # type: ignore
except ImportError:
    raise SystemExit("faiss is required. Install with `pip install faiss-cpu` on Windows.")
import pyarrow.parquet as pq

from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
)

# PEFT / LoRA
try:
    from peft import LoraConfig, get_peft_model, PeftModel
except ImportError:
    raise SystemExit("peft is required. Install with `pip install peft`.")

BASE_MODEL = 'Qwen/Qwen3-0.6B'  # Qwen 0.6B for 8GB VRAM
OUTPUT_DIR = (Path.cwd() if (Path.cwd() / 'data').exists() else Path.cwd().parent) / 'models' / 'rag_llm'
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

PROJECT_ROOT = Path.cwd() if (Path.cwd() / 'data').exists() else Path.cwd().parent
INDEX_DIR = PROJECT_ROOT / 'models' / 'rag_llm' / 'step_0'
with open(INDEX_DIR / 'manifest.json', 'r', encoding='utf-8') as f:
    manifest = json.load(f)
faiss_index = faiss.read_index(manifest['index_path'])
md_df = pq.read_table(manifest['metadata_path']).to_pandas()
ALLOWED_PLACES = sorted(md_df['place'].dropna().unique().tolist())
# Ensure stars_float is present in metadata
aassert_col = 'stars_float'
if aassert_col not in md_df.columns:
    raise SystemExit("Expected 'stars_float' in metadata; re-run Step 0 with force_rebuild=True.")

# Embeddings for retrieval context
EMBED_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
embedder = SentenceTransformer(EMBED_MODEL)

def retrieve(query: str, k: int = 8) -> pd.DataFrame:
    q_emb = embedder.encode([query], convert_to_numpy=True, normalize_embeddings=True).astype('float32')
    scores, idx = faiss_index.search(q_emb, k)
    hits = []
    for i, s in zip(idx[0], scores[0]):
        if i == -1:
            continue
        row = md_df.iloc[int(i)].to_dict()
        row['score'] = float(s)
        # For convenience, expose a float alias for downstream
        row['stars'] = float(row.get('stars_float', float('nan')))
        hits.append(row)
    return pd.DataFrame(hits)



### Step 2 — Build synthetic SFT dataset

- Create input/output pairs using retrieval context
- Inputs: system prompt + allowed places + user query + review context
- Targets: JSON with `recommended_places`, `reasoning`, and `citations`
- Saves `data/rag_llm/rag_sft.jsonl`


In [62]:
SYSTEM_PROMPT = (
    "You are a travel recommendation assistant for Australian destinations. "
    "Recommend only places from Allowed Places. Ground answers in the context. "
    "Respond with JSON: {recommended_places: [..], reasoning: str, citations: [{place, source, stars, snippet}]}"
)

# Target size (adjustable)
NUM_EXAMPLES_TARGET = 1000
RNG_SEED = 42
random.seed(RNG_SEED)

# Query templates and facets
activities = [
    "short hikes", "lookouts", "waterfalls", "swimming spots", "wildlife",
    "cultural experiences", "sunset views", "sunrise views", "family-friendly walks",
    "night sky views", "quiet camping", "scenic drives"
]
modifiers = [
    "easy", "moderate", "kid-friendly", "photogenic", "less crowded", "near facilities"
]
intents = []
for _ in range(300):
    a = random.choice(activities)
    m = random.choice(modifiers)
    intents.append(f"{m} {a}")

# Ensure each allowed place appears by creating place-focused intents
place_anchored = [f"best things to do at {p}" for p in ALLOWED_PLACES]
USER_QUERIES = list(dict.fromkeys(intents + place_anchored))

OUT_PATH = PROJECT_ROOT / 'data' / 'rag_llm' / 'processed' / 'rag_sft.jsonl'
OUT_PATH.parent.mkdir(parents=True, exist_ok=True)

def build_context(hits: pd.DataFrame, max_chars: int = 1200, max_rows: int = 8) -> str:
    rows = []
    used = 0
    for _, r in hits.head(max_rows).iterrows():
        snippet = str(r['comment'])
        if len(snippet) > 240:
            snippet = snippet[:240] + '...'
        line = f"- Place: {r['place']} | Source: {r['source']} | Stars: {float(r.get('stars', float('nan'))):.1f} | Review: {snippet}"
        if used + len(line) > max_chars:
            break
        rows.append(line)
        used += len(line)
    return "\n".join(rows)

from tqdm import tqdm

num_written = 0
max_rounds = 1000  # hard cap to avoid infinite loop if retrieval becomes empty
with open(OUT_PATH, 'w', encoding='utf-8') as f:
    pbar = tqdm(total=NUM_EXAMPLES_TARGET, desc='SFT synthesis')
    rounds = 0
    while num_written < NUM_EXAMPLES_TARGET and rounds < max_rounds:
        # Shuffle intents each round for diversity
        for q in random.sample(USER_QUERIES, len(USER_QUERIES)):
            # Write up to N variants per intent until we hit the target
            for _ in range(4):
                if num_written >= NUM_EXAMPLES_TARGET:
                    break
                # Randomize retrieval by adding a small jitter and sampling top-k
                k = 12
                hits = retrieve(q, k=k)
                if hits.empty:
                    continue
                # Shuffle to diversify citations/contexts
                hits = hits.sample(frac=1.0, random_state=random.randint(0, 10_000)).reset_index(drop=True)
                context = build_context(hits, max_rows=8)
                if not context:
                    continue
                # Choose top places by mean stars (on the shuffled subset)
                top_places = (
                    hits.groupby('place')['stars']
                        .mean()
                        .sort_values(ascending=False)
                        .head(3)
                        .index.tolist()
                )  # 'stars' is a float alias
                # Build citations subset (best-scored after shuffle)
                cits = []
                for _, r in hits.head(5).iterrows():
                    cits.append({
                        'place': r['place'], 'source': r['source'], 'stars': round(float(r.get('stars', float('nan'))), 1),
                        'snippet': str(r['comment'])[:220]
                    })
                prompt = (
                    f"[SYSTEM]\n{SYSTEM_PROMPT}\n\n"
                    f"[ALLOWED_PLACES]\n{', '.join(ALLOWED_PLACES)}\n\n"
                    f"[USER_QUERY]\n{q}\n\n"
                    f"[REVIEW_CONTEXT]\n{context}\n\n"
                )
                target = {
                    'recommended_places': [p for p in top_places if p in ALLOWED_PLACES],
                    'reasoning': 'Based on reviews and stars for relevance to the query.',
                    'citations': cits,
                }
                f.write(json.dumps({'instruction': prompt, 'output': target}, ensure_ascii=False) + "\n")
                num_written += 1
                pbar.update(1)
                if num_written >= NUM_EXAMPLES_TARGET:
                    break
        rounds += 1
    pbar.close()

print('Wrote SFT dataset:', OUT_PATH, '| examples:', num_written)



SFT synthesis: 100%|██████████| 1000/1000 [00:07<00:00, 129.63it/s]

Wrote SFT dataset: c:\Users\TARIK\Desktop\Charles Darwin University\4 - Year 1 - Semester 2\IT CODE FAIR\Data Science Challenge\data\rag_llm\processed\rag_sft.jsonl | examples: 1000





### Step 3 — Convert training data to proper chat format

In [63]:
import json
import os

def convert_to_chat_format(input_file, output_file):
    """Convert instruction/output format to messages format for proper fine-tuning"""
    
    print(f"Converting {input_file} to chat format...")
    
    # Use absolute paths to avoid working directory issues
    if not os.path.isabs(input_file):
        # Get the project root by looking for the data directory
        current_dir = os.path.abspath('')
        project_root = current_dir
        
        # Walk up directories until we find the data folder
        while project_root != os.path.dirname(project_root):  # Stop at root
            if os.path.exists(os.path.join(project_root, 'data')):
                break
            project_root = os.path.dirname(project_root)
        
        input_file = os.path.join(project_root, input_file)
        output_file = os.path.join(project_root, output_file)
    
    print(f"Using absolute paths:")
    print(f"  Input: {input_file}")
    print(f"  Output: {output_file}")
    
    # Verify input file exists
    if not os.path.exists(input_file):
        raise FileNotFoundError(f"Input file not found: {input_file}")
    
    # Use UTF-8 encoding to handle Unicode characters
    with open(input_file, 'r', encoding='utf-8') as f_in, open(output_file, 'w', encoding='utf-8') as f_out:
        for line_num, line in enumerate(f_in):
            try:
                data = json.loads(line.strip())
                
                # Extract instruction and output
                instruction = data['instruction']
                output = data['output']
                
                # Convert to chat format
                messages = [
                    {"role": "system", "content": "You are a travel recommendation assistant for Australian destinations. Recommend only places from Allowed Places. Ground answers in the context. Respond with JSON: {recommended_places: [..], reasoning: str, citations: [{place, source, stars, snippet}]}"},
                    {"role": "user", "content": instruction},
                    {"role": "assistant", "content": json.dumps(output)}
                ]
                
                # Write new format
                new_data = {"messages": messages}
                f_out.write(json.dumps(new_data) + '\n')
                
                if (line_num + 1) % 100 == 0:
                    print(f"Converted {line_num + 1} examples...")
                    
            except Exception as e:
                print(f"Error processing line {line_num + 1}: {e}")
                continue
    
    print(f"✅ Conversion complete! Saved to {output_file}")

# Convert the training data
convert_to_chat_format('data/rag_llm/processed/rag_sft.jsonl', 'data/rag_llm/processed/rag_sft_chat.jsonl')

Converting data/rag_llm/processed/rag_sft.jsonl to chat format...
Using absolute paths:
  Input: c:\Users\TARIK\Desktop\Charles Darwin University\4 - Year 1 - Semester 2\IT CODE FAIR\Data Science Challenge\data/rag_llm/processed/rag_sft.jsonl
  Output: c:\Users\TARIK\Desktop\Charles Darwin University\4 - Year 1 - Semester 2\IT CODE FAIR\Data Science Challenge\data/rag_llm/processed/rag_sft_chat.jsonl
Converted 100 examples...
Converted 200 examples...
Converted 300 examples...
Converted 400 examples...
Converted 500 examples...
Converted 600 examples...
Converted 700 examples...
Converted 800 examples...
Converted 900 examples...
Converted 1000 examples...
✅ Conversion complete! Saved to c:\Users\TARIK\Desktop\Charles Darwin University\4 - Year 1 - Semester 2\IT CODE FAIR\Data Science Challenge\data/rag_llm/processed/rag_sft_chat.jsonl


### Step 4 — Tokenizer, model, and LoRA config

- Load base chat model + tokenizer
- Attach LoRA adapters (low‑rank update on attention/projection layers)
- Keep it CPU-friendly (no 8‑bit quantization required)


In [64]:
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

base_model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, trust_remote_code=True)

# Probe module names and select safe LoRA targets
all_names = [n for n, _ in base_model.named_modules()]
# common candidates across LLaMA-like + MobileLLM variants
candidates = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
selected = [n.split(".")[-1] for n in all_names if any(c in n.split(".")[-1] for c in candidates)]
# dedupe and keep only the layer names
selected = sorted(list({s for s in selected if s in candidates}))
if not selected:
    selected = ["q_proj", "k_proj", "v_proj", "o_proj"]

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=selected,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(base_model, lora_config)
model.print_trainable_parameters()



trainable params: 5,046,272 || all params: 601,096,192 || trainable%: 0.8395


### Step 5 — Fine-tune model

⚠️ **IMPORTANT**: This fine-tunes the model to produce working JSON responses.

**What this does**:
- Uses proper chat format data (rag_sft_chat.jsonl)
- Proper training parameters for Qwen model
- Saves to models/rag_llm/ directory
- Will produce working JSON responses instead of garbled text

**Estimated time**: 6-8 hours (6 epochs)

In [65]:
from datasets import load_dataset
from transformers import TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, TaskType
import os

# Load the training data
print("Loading training data...")

# Resolve path to avoid working directory issues
data_file = 'data/rag_llm/processed/rag_sft_chat.jsonl'
if not os.path.isabs(data_file):
    # Get the project root by looking for the data directory
    current_dir = os.path.abspath('')
    project_root = current_dir
    
    # Walk up directories until we find the data folder
    while project_root != os.path.dirname(project_root):  # Stop at root
        if os.path.exists(os.path.join(project_root, 'data')):
            break
        project_root = os.path.dirname(project_root)
    
    data_file = os.path.join(project_root, data_file)

print(f"Using data file: {data_file}")
dataset = load_dataset('json', data_files=data_file)['train']

# Split into train/eval
train_size = int(0.9 * len(dataset))
eval_size = len(dataset) - train_size
train_dataset = dataset.select(range(train_size))
eval_dataset = dataset.select(range(train_size, train_size + eval_size))

print(f"Training samples: {len(train_dataset)}")
print(f"Evaluation samples: {len(eval_dataset)}")


# Preprocess: tokenize chat messages into input_ids
from transformers import default_data_collator
MAX_LEN = 1024

def preprocess(example):
    prompt = tokenizer.apply_chat_template(
        example["messages"],
        tokenize=False,
        add_generation_prompt=False,
        enable_thinking=False,
    )
    enc = tokenizer(prompt, truncation=True, max_length=MAX_LEN)
    enc["labels"] = enc["input_ids"].copy()
    return enc

dataset_tok = dataset.map(preprocess, remove_columns=dataset.column_names)
train_tok = dataset_tok.select(range(train_size))
eval_tok = dataset_tok.select(range(train_size, train_size + eval_size))

data_collator = default_data_collator

# Training arguments with better settings
training_args = TrainingArguments(
    output_dir=str(OUTPUT_DIR),
    num_train_epochs=3,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    warmup_steps=100,
    learning_rate=1e-4,
    logging_steps=10,
    save_steps=50,
    eval_steps=50,
    eval_strategy="steps",
    save_strategy="steps",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    report_to="tensorboard",
    logging_dir=str(OUTPUT_DIR / "tb"),
    remove_unused_columns=False,
)

# Create trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tok,
    eval_dataset=eval_tok,
    data_collator=data_collator,
    tokenizer=tokenizer,
)

# Start training
print("Starting fine-tuning...")
trainer.train()

# Save the final model
final_dir = OUTPUT_DIR / "final"
trainer.save_model(str(final_dir))
tokenizer.save_pretrained(str(final_dir))

print("✅ Fine-tuning complete!")



Loading training data...
Using data file: c:\Users\TARIK\Desktop\Charles Darwin University\4 - Year 1 - Semester 2\IT CODE FAIR\Data Science Challenge\data/rag_llm/processed/rag_sft_chat.jsonl


Generating train split: 0 examples [00:00, ? examples/s]

Training samples: 900
Evaluation samples: 100


Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}.


Starting fine-tuning...


Step,Training Loss,Validation Loss


KeyboardInterrupt: 

### Step 6 — Save final model


In [None]:
# Step 5 — Save final model bundle (run this AFTER training)
import os, torch
final_dir = OUTPUT_DIR / 'final'
final_dir.mkdir(parents=True, exist_ok=True)

try:
    from peft import PeftModel
    merged = PeftModel.from_pretrained(base_model, OUTPUT_DIR / 'adapters')
    merged = merged.merge_and_unload()
    merged.save_pretrained(final_dir)
    torch.save(merged.state_dict(), final_dir / 'model_state.pth')
    print('Saved merged model to', final_dir)
except Exception as e:
    model.save_pretrained(final_dir)
    torch.save(model.state_dict(), final_dir / 'adapters_state.pth')
    print('Saved adapters to', final_dir)

# Ensure tokenizer and configs are persisted with final
try:
    tokenizer.save_pretrained(final_dir)
    print('Saved tokenizer to', final_dir)
except Exception as e:
    print('Tokenizer save failed:', e)



Saved merged model to c:\Users\TARIK\Desktop\Charles Darwin University\4 - Year 1 - Semester 2\IT CODE FAIR\Data Science Challenge\models\rag_llm\final
Saved tokenizer to c:\Users\TARIK\Desktop\Charles Darwin University\4 - Year 1 - Semester 2\IT CODE FAIR\Data Science Challenge\models\rag_llm\final


### Step 7 — Chat with model

- Load the saved model from `models/rag_llm/`
- Run a sample query through retrieval + model and print JSON


In [None]:
### Step 6 — Test the model
import torch
import re

def test_corrected_model():
    """Test the corrected fine-tuned model"""
    
    # Load corrected model
    ft_tok = AutoTokenizer.from_pretrained("models/rag_llm_corrected/final")
    ft_model = AutoModelForCausalLM.from_pretrained("models/rag_llm_corrected/final", trust_remote_code=True)
    ft_model = ft_model.to('cuda' if torch.cuda.is_available() else 'cpu').eval()
    
    def ask_corrected(query: str, k: int = 8) -> str:
        hits = retrieve(query, k=k)
        context = []
        for _, r in hits.head(5).iterrows():
            snippet = str(r['comment'])[:220]
            context.append(f"- Place: {r['place']} | Source: {r['source']} | Stars: {float(r.get('stars', float('nan'))):.1f} | Review: {snippet}")
        
        messages = [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": (
                f"[ALLOWED_PLACES]\n{', '.join(ALLOWED_PLACES)}\n\n"
                f"[USER_QUERY]\n{query}\n\n"
                f"[REVIEW_CONTEXT]\n" + "\n".join(context)
            )}
        ]
        
        prompt_text = ft_tok.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True,
            enable_thinking=False  # Disable thinking for cleaner output
        )
        
        device = next(ft_model.parameters()).device
        inputs = ft_tok(prompt_text, return_tensors='pt').to(device)
        
        with torch.no_grad():
            out = ft_model.generate(
                **inputs, 
                max_new_tokens=200,
                do_sample=True,
                temperature=0.7,
                top_p=0.9,
                repetition_penalty=1.1,
                pad_token_id=ft_tok.eos_token_id
            )
        
        text = ft_tok.decode(out[0], skip_special_tokens=True)
        
        # Extract only the assistant's response
        if "assistant" in text:
            text = text.split("assistant")[-1].strip()
        
        m = re.search(r"\{[\s\S]*\}\s*$", text)
        return m.group(0) if m else text
    
    # Test with sample queries
    test_queries = [
        "best waterfalls and swimming spots",
        "family-friendly walks and sunset views",
        "places that are not too hot"
    ]
    
    print("🤖 Testing corrected fine-tuned model:")
    print("="*60)
    
    for i, query in enumerate(test_queries, 1):
        print(f"\n🗣️  Question {i}: {query}")
        print("🤖 Model response:")
        try:
            response = ask_corrected(query)
            print(response)
        except Exception as e:
            print(f"❌ Error: {e}")
        print("\n" + "="*60)
    
    return ask_corrected

# Run the test
ask_corrected = test_corrected_model()

### Step 8 — COMET evaluation
