# Generazione dinamica di few-shot

Estrae 15 esempi **complessi** e 15 esempi **semplici** da `data/oncomx/dev.json`
e li salva in `prompts/few_shot_examples.txt`.

In [3]:
import json
import random
from pathlib import Path

# Parametri
DEV_PATH   = Path('data/oncomx/dev.json')
OUT_PATH   = Path('prompts/few_shot_examples.txt')
N_TOTAL    = 30    # esempi totali
N_COMPLEX  = 15    # di questi, quanti complessi
COMPLEX_KW = ('JOIN','GROUP','DISTINCT','COUNT','EXISTS','IN (')

# Carica tutti gli esempi
with DEV_PATH.open(encoding='utf-8') as f:
    data = json.load(f)
all_ex = []
for ex in data:
    q   = ex.get('question','').strip().replace('\n',' ')
    sql = ex.get('query', ex.get('sql','')).strip().rstrip(';')
    if q and sql:
        all_ex.append((q, sql))

# Suddividi in complessi e semplici
complex_ex = [e for e in all_ex if any(kw in e[1].upper() for kw in COMPLEX_KW)]
simple_ex  = [e for e in all_ex if e not in complex_ex]

# Campiona a caso
random.seed(42)
chosen = random.sample(complex_ex, min(N_COMPLEX, len(complex_ex)))
remain = N_TOTAL - len(chosen)
chosen += random.sample(simple_ex, min(remain, len(simple_ex)))

# Scrivi il few-shot
OUT_PATH.parent.mkdir(parents=True, exist_ok=True)
with OUT_PATH.open('w', encoding='utf-8') as f:
    for q, sql in chosen:
        f.write(f"-- Question: {q}\n-- SQL: {sql}\n\n")

print(f"Written {len(chosen)} examples to {OUT_PATH}")

Written 30 examples to prompts\few_shot_examples.txt
