In [1]:
#Requisitos (instalar antes)
#Tener en cuenta que para el uso maximo con gpu es necesario instalar cuda con la version de python y drivers de la gráfica. 
#!pip install -q -U rank-bm25 sentence-transformers torch faiss-cpu numpy llama-cpp-python

In [None]:
# RAG híbrido mejorado para PTIC2
# - Híbrido BM25 + e5 + CrossEncoder (reranker)
# - Facetas (tipo, bloque, piso, encargado, carrera, facultad, attrs)
# - Modo agente vs modo evaluación (prompts y temperatura distintas)
# - Normalización más robusta (singular/plural básico) para mejorar recall léxico
# - PRIORIDAD a señales fuertes (códigos / aliases) por encima de facetas

import sys, subprocess, importlib, json, re, unicodedata, time
from dataclasses import dataclass
from pathlib import Path
from typing import List, Dict, Any, Tuple, Optional

def ensure(pkg, pip_name=None):
    """Importa un paquete o lo instala en tiempo de ejecución."""
    try:
        return importlib.import_module(pkg)
    except ImportError:
        subprocess.check_call([
            sys.executable,
            "-m",
            "pip",
            "install",
            "--upgrade",
            pip_name or pkg,
            "-q",
        ])
        return importlib.import_module(pkg)

# --- deps
rank_bm25 = ensure("rank_bm25", "rank-bm25")
st = ensure("sentence_transformers", "sentence-transformers")
torch = ensure("torch", "torch")
numpy_mod = ensure("numpy", "numpy")
faiss = ensure("faiss", "faiss-cpu")
from rank_bm25 import BM25Okapi
from sentence_transformers import SentenceTransformer, CrossEncoder
import numpy as np

# llama.cpp
try:
    llama_cpp = ensure("llama_cpp", "llama-cpp-python")
    from llama_cpp import Llama
except Exception:
    Llama = None

engine = None  # para usar luego desde Gradio si quieres

# ============================ paths / modelos ============================

JSON_PATH = r"estructura_semantica_final_PTIC2.json"
GGUF_PATH = r"mistral-7b-instruct-v0.2.Q4_K_M.gguf"
PROMPT_PATH_AGENT = r"prompt_base.txt"
PROMPT_PATH_EVAL = r"prompt_base_ev.txt"  # modo evaluación canónica

EMB_MODEL_NAME = "intfloat/multilingual-e5-base"
RERANKER_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"

# LLM cfg (valores por defecto, algunos se ajustan por modo)
N_GPU_LAYERS = 5
N_THREADS = 8
N_CTX = 4096
MAX_TOKENS = 256
TEMPERATURE_AGENT = 0.2
TEMPERATURE_EVAL = 0.0

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"[RAG] DEVICE = {DEVICE}")

# ============================ utils ============================

def norm(s: Any) -> str:
    """Normalización fuerte:
    - lower
    - sin tildes
    - colapsa espacios
    - normaliza plural simple (palabras largas terminadas en 's' -> sin 's')
    """
    if s is None:
        return ""
    if isinstance(s, list):
        s = " ".join(map(str, s))
    s = str(s).lower().strip()
    s = unicodedata.normalize("NFD", s)
    s = "".join(c for c in s if unicodedata.category(c) != "Mn")
    s = re.sub(r"\s+", " ", s)
    tokens = s.split()
    norm_tokens = []
    for t in tokens:
        if len(t) > 3 and t.endswith("s"):
            norm_tokens.append(t[:-1])
        else:
            norm_tokens.append(t)
    return " ".join(norm_tokens)

def tokenize_simple(s: str) -> List[str]:
    return re.findall(r"[a-z0-9]+", norm(s))

def char_trigrams(s: str) -> set:
    s = re.sub(r"\s+", " ", s)
    s = f" {s} "
    if len(s) < 3:
        return {s}
    return {s[i : i + 3] for i in range(len(s) - 2)}

def jaccard_trigram(a: str, b: str) -> float:
    A, B = char_trigrams(a), char_trigrams(b)
    if not A or not B:
        return 0.0
    inter = len(A & B)
    union = len(A | B)
    return inter / union if union else 0.0

def jaccard_tokens(a: str, b: str) -> float:
    A, B = set(tokenize_simple(a)), set(tokenize_simple(b))
    if not A or not B:
        return 0.0
    inter = len(A & B)
    union = len(A | B)
    return inter / union if union else 0.0

def dedup_type_name(tipo: Optional[str], nombre: Optional[str]) -> Tuple[str, str]:
    """Evita duplicar tipo en el nombre ("Laboratorio Laboratorio de X")."""
    t = (tipo or "").strip()
    n = (nombre or "").strip()
    if not n and not t:
        return "", ""
    tn = norm(t)
    nn = norm(n)
    base = n
    if tn and (nn.startswith(tn + " ") or nn == tn):
        if n.lower().startswith(t.lower()):
            base = n[len(t) :].lstrip()
    display = (f"{t} {base}".strip() if t else base).strip()
    return base, display

CODE_RE = re.compile(r"\b[a-z]?\d{2,4}[a-z\d]*\b", re.IGNORECASE)

# ============================ carga e index ============================

def load_docs(json_path: str | Path) -> List[Dict[str, Any]]:
    """Carga JSON semántico y construye docs solo de tipo 'espacio'."""
    data = json.load(open(json_path, "r", encoding="utf-8"))
    docs: List[Dict[str, Any]] = []

    def flatten_doc(d: Dict[str, Any], prefix: str = "", out: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
        if out is None:
            out = {}
        for k, v in (d or {}).items():
            if str(k).startswith("_"):
                continue
            path = f"{prefix}.{k}" if prefix else str(k)
            if isinstance(v, dict):
                flatten_doc(v, path, out)
            elif isinstance(v, list):
                out[path] = v
            else:
                out[path] = v
        return out

    for e in data.get("espacios", []):
        attrs = e.get("attrs", {}) or {}
        personal = []
        for p in attrs.get("Personal", []) or []:
            nm = p.get("ConNombre") or p.get("nombre") or p.get("Encargado")
            if nm:
                personal.append(str(nm))

        bloque = e.get("bloque") or e.get("bloque_id") or ""
        if isinstance(bloque, str) and bloque.startswith("BL-"):
            bloque = bloque.replace("BL-", "")
        if isinstance(bloque, str) and bloque.isdigit():
            bloque = f"A{bloque}"

        pieza = {
            "id": str(e.get("id") or e.get("codigo") or e.get("nombre")),
            "kind": "espacio",
            "codigo": e.get("codigo"),
            "tipo": e.get("tipo"),
            "nombre": e.get("nombre"),
            "bloque": bloque,
            "piso": attrs.get("piso"),
            "direccionRelativa": attrs.get("direccionRelativa"),
            "direccionOrientativa": attrs.get("direccionOrientativa"),
            "carrera": attrs.get("AsignadoACarrera"),
            "facultad": attrs.get("Facultad") or attrs.get("facultad"),
            "capacidad": attrs.get("ConCapacidad"),
            "encargados": personal,
            "aliases": [str(a) for a in (e.get("aliases") or [])],
            "attrs": attrs,
        }

        fields_raw = [
            pieza.get("tipo"),
            pieza.get("nombre"),
            pieza.get("codigo"),
            str(bloque),
            (f"piso {pieza.get('piso')}" if pieza.get("piso") is not None else ""),
            pieza.get("direccionRelativa"),
            pieza.get("direccionOrientativa"),
            pieza.get("carrera"),
            pieza.get("facultad"),
            pieza.get("capacidad"),
            " ".join(pieza.get("aliases") or []),
            " ".join(pieza.get("encargados") or []),
            json.dumps(pieza.get("attrs", {}), ensure_ascii=False),
        ]
        pieza["_raw_text"] = " ".join([str(x) for x in fields_raw if x])
        pieza["_fulltext"] = norm(" ".join([str(x) for x in fields_raw if x]))

        name_base, name_display = dedup_type_name(pieza.get("tipo"), pieza.get("nombre"))
        pieza["_name_norm"] = norm(name_display)
        pieza["_aliases_norm"] = [norm(a) for a in (pieza.get("aliases") or [])]

        code_ft = norm(pieza.get("codigo") or "")
        name_ft = norm(name_display)
        aliases_ft = norm(" ".join(pieza.get("aliases") or []))
        encargados_ft = norm(" ".join(pieza.get("encargados") or []))
        rest_ft = pieza["_fulltext"]

        def rep(text, k):
            return " ".join([text] * k) if text else ""

        pieza["_bm25_text"] = " ".join(
            [
                rep(code_ft, 6),
                rep(name_ft, 4),
                rep(encargados_ft, 4),
                rep(aliases_ft, 3),
                rest_ft,
            ]
        ).strip()

        pieza["_flat_attrs"] = flatten_doc({"attrs": attrs})
        docs.append(pieza)

    return docs

def collect_attr_catalog(docs: List[Dict[str, Any]]) -> Dict[str, set]:
    values: Dict[str, set] = {}
    for d in docs:
        if d.get("kind") != "espacio":
            continue
        for path, val in (d.get("_flat_attrs") or {}).items():
            if path.startswith("_"):
                continue
            if path not in values:
                values[path] = set()

            def add_val(x):
                s = norm(x)
                if s:
                    values[path].add(s)

            if isinstance(val, list):
                for x in val:
                    add_val(x)
            elif isinstance(val, dict):
                for k in val.keys():
                    add_val(k)
            else:
                add_val(val)
    return values

# ============================ índices ============================

def build_bm25(docs: List[Dict[str, Any]]):
    tokens_all = [d["_bm25_text"].split() for d in docs]
    return BM25Okapi(tokens_all)

def build_embeddings(docs: List[Dict[str, Any]], model_name: str, M: int = 32):
    t0 = time.time()
    model = SentenceTransformer(model_name, device=DEVICE)
    corpus = [d["_raw_text"] for d in docs]
    vecs = model.encode(corpus, batch_size=64, normalize_embeddings=True, show_progress_bar=False)
    vecs = np.asarray(vecs, dtype="float32")
    d = vecs.shape[1]
    index = faiss.IndexHNSWFlat(d, M, faiss.METRIC_INNER_PRODUCT)
    faiss.normalize_L2(vecs)
    try:
        index.hnsw.efConstruction = 200
    except Exception:
        pass
    index.add(vecs)
    print(f"[TIMING][build_embeddings] total={(time.time() - t0):.3f}s")
    return model, index

# ============================ facets & parsing ============================

TIPO_SYNONYMS = {
    "lab": "laboratorio",
    "laboratorio": "laboratorio",
    "laboratorios": "laboratorio",
    "sala": "sala",
    "salas": "sala",
    "oficina": "oficina",
    "oficinas": "oficina",
    "departamento": "departamento",
    "departamentos": "departamento",
    "ducto": "ducto",
    "ductos": "ducto",
    "cuarto": "cuarto",
    "cuartos": "cuarto",
    "infraestructuravertical": "infraestructuravertical",
    "escalera": "infraestructuravertical",
    "escaleras": "infraestructuravertical",
    "ascensor": "infraestructuravertical",
    "ascensores": "infraestructuravertical",
    "bano": "baño",
    "banio": "baño",
    "banos": "baño",
    "banios": "baño",
    "baño": "baño",
    "baños": "baño",
    "sshh": "baño",
    "ssh": "baño",
    "wc": "baño",
    "servicio": "baño",
    "servicios": "baño",
    "higienico": "baño",
    "higienicos": "baño",
    "sanitario": "baño",
    "sanitarios": "baño",
}

def parse_piso(text: str) -> Optional[int]:
    ORD = {
        "primer": 1,
        "primero": 1,
        "primera": 1,
        "1er": 1,
        "1ro": 1,
        "1ra": 1,
        "1º": 1,
        "1°": 1,
        "1o": 1,
        "1a": 1,
        "segundo": 2,
        "segunda": 2,
        "2do": 2,
        "2da": 2,
        "2º": 2,
        "2°": 2,
        "2o": 2,
        "2a": 2,
        "tercero": 3,
        "tercera": 3,
        "tercer": 3,
        "3er": 3,
        "3ro": 3,
        "3ra": 3,
        "3º": 3,
        "3°": 3,
        "3o": 3,
        "3a": 3,
        "cuarto": 4,
        "cuarta": 4,
        "4to": 4,
        "4ta": 4,
        "4º": 4,
        "4°": 4,
        "4o": 4,
        "4a": 4,
        "quinto": 5,
        "quinta": 5,
        "5to": 5,
        "5ta": 5,
        "5º": 5,
        "5°": 5,
        "5o": 5,
        "5a": 5,
        "sexto": 6,
        "sexta": 6,
        "6to": 6,
        "6ta": 6,
        "6º": 6,
        "6°": 6,
        "6o": 6,
        "6a": 6,
        "septimo": 7,
        "septima": 7,
        "7mo": 7,
        "7ma": 7,
        "7º": 7,
        "7°": 7,
        "7o": 7,
        "7a": 7,
        "octavo": 8,
        "octava": 8,
        "8vo": 8,
        "8va": 8,
        "8º": 8,
        "8°": 8,
        "8o": 8,
        "8a": 8,
        "noveno": 9,
        "novena": 9,
        "9no": 9,
        "9na": 9,
        "9º": 9,
        "9°": 9,
        "9o": 9,
        "9a": 9,
        "decimo": 10,
        "decima": 10,
        "10mo": 10,
        "10ma": 10,
        "10º": 10,
        "10°": 10,
        "10o": 10,
        "10a": 10,
    }
    t = norm(text)
    m = re.search(r"\bpiso\s+(\d+)\b", t) or re.search(r"\b(\d+)\s*(?:do|da|º|°|o|a)?\s+piso\b", t)
    if m:
        return int(m.group(1))
    for w, n in ORD.items():
        if re.search(rf"\bpiso\s+{re.escape(w)}\b", t) or re.search(rf"\b{re.escape(w)}\s+piso\b", t):
            return n
    return None

def extract_person_query(text: str) -> str:
    """Intenta extraer el "núcleo" de la persona (nombre/apellidos) desde la query."""
    t = norm(text)
    t = re.sub(r"\b(ing\.?|msc\.?|phd\.?|dr\.?)\b", " ", t)
    stop = {
        "que", "sabe", "saber", "sabes", "sobre", "acerca",
        "del", "de", "la", "el", "los", "las", "en",
        "cuanto", "cuantos", "cuantas",
        "laboratorio", "lab", "laboratorios",
        "espacio", "espacios", "aula", "aulas",
        "sala", "salas", "quien", "quienes",
        "esta", "encargado", "encargada",
    }
    toks = [tok for tok in re.findall(r"[a-z0-9]+", t) if tok not in stop]
    if len(toks) >= 2:
        core = " ".join(toks[-3:])
    else:
        core = " ".join(toks)
    return core

def parse_facets_universal(query: str, docs: List[Dict[str, Any]], attr_catalog: Dict[str, set]) -> Dict[str, Any]:
    qn = norm(query)
    facets: Dict[str, Any] = {}
    toks = re.findall(r"[a-záéíóúñ\.]+", qn)

    tipos = []
    for tok in toks:
        base = TIPO_SYNONYMS.get(tok, None)
        if base and base not in tipos:
            tipos.append(base)
    if tipos:
        facets["tipo"] = tipos

    mb = re.search(r"\b(?:bloque|edificio)\s*([a-z]?\d+)\b", qn)
    if mb:
        b = mb.group(1).upper()
        if b.isdigit():
            b = "A" + b
        facets["bloque"] = b

    p = parse_piso(qn)
    if p is not None:
        facets["piso"] = p

    q_persona = extract_person_query(qn)
    people = set()
    if q_persona:
        for d in docs:
            if d.get("kind") != "espacio":
                continue
            for nm in (d.get("encargados") or []):
                if nm and jaccard_tokens(nm, q_persona) >= 0.3:
                    people.add(nm)
    if people:
        facets["encargado"] = sorted(people)

    carreras = set()
    facults = set()
    for d in docs:
        if d.get("kind") != "espacio":
            continue
        car = d.get("carrera")
        if isinstance(car, list):
            for x in car:
                if norm(x) in qn:
                    carreras.add(x)
        elif car and norm(car) in qn:
            carreras.add(car)
        fac = d.get("facultad")
        if fac and norm(fac) in qn:
            facults.add(fac)
    if carreras:
        facets["carrera"] = sorted(carreras)
    if facults:
        facets["facultad"] = sorted(facults)

    TRIGGERS = (" con ", " que tenga ", " equipado", " equipamiento", " horario", " capacidad")
    ALLOWED_ATTR_PREFIXES = (
        "attrs.Equip", "attrs.equip",
        "attrs.Horar", "attrs.horar",
        "attrs.Capacidad", "attrs.capacidad",
    )
    if any(t in f" {qn} " for t in TRIGGERS):
        for attr_path, known_vals in (attr_catalog or {}).items():
            if not any(norm(attr_path).startswith(norm(p)) for p in ALLOWED_ATTR_PREFIXES):
                continue
            hits = [v for v in known_vals if v and (v in qn or v.replace(" ", "") in qn.replace(" ", ""))]
            if hits:
                facets.setdefault("attrs", {})
                facets["attrs"][attr_path] = sorted(set(hits))

    return facets

# ============================ híbrido: bm25 + e5 + fuzzy/gate ============================

@dataclass
class RAGConfig:
    top_k: int = 12
    pool_k: int = 250
    bm25_cand: int = 500
    emb_cand: int = 300
    min_score_bm25: float = 0.0
    min_score_emb: float = 0.0
    ctx_chars: int = 2200
    use_embeddings: bool = True
    use_reranker: bool = True
    use_facets: bool = True
    faiss_M: int = 32
    faiss_ef_search: int = 150
    generate_enabled: bool = True
    rerank_top_k: int = 150
    rerank_alpha: float = 0.85
    rerank_bonus_weight: float = 0.15
    kind_prior_weight: float = 0.08
    person_prior_weight: float = 0.12
    enable_fuzzy_bonus: bool = True
    fuzzy_min_sim: float = 0.30
    enable_hard_gate: bool = False
    gate_min_len: int = 4
    gate_trigram_sim: float = 0.60
    facet_mode: str = "soft"
    min_faceted: int = 3
    debug: bool = True
    # UMBRAL global para decidir si embeddings son confiables
    # valores típicos e5: random ~0.2-0.3, relevantes >0.5
    min_best_emb_for_any: float = 0.45
    # UMBRAL de "distintividad" del mejor doc (best vs mediana)
    min_emb_gap: float = 0.05

def build_bm25_candidates(
    qn: str,
    bm25: BM25Okapi,
    k: int,
    min_score: float,
    cfg: RAGConfig,
    docs: List[Dict[str, Any]],
) -> List[Tuple[int, float]]:
    t0 = time.time()
    # usar tokenización robusta para que "i2tec?" -> "i2tec"
    toks = tokenize_simple(qn)

    if not toks:
        if cfg.debug:
            print("[BM25] Query vacía tras normalización/tokenización.")
        return []

    scores = bm25.get_scores(toks)
    max_score = float(np.max(scores)) if len(scores) else 0.0
    min_score_seen = float(np.min(scores)) if len(scores) else 0.0

    if cfg.debug:
        print(f"[BM25] toks_query={toks} max={max_score:.4f} min={min_score_seen:.4f} thr={min_score:.4f}")

    # Caso: ninguna palabra de la query aparece en ningún doc
    if max_score <= 0.0:
        if cfg.debug:
            print("[BM25] Sin solape léxico, no se devuelven candidatos.")
        return []

    if cfg.enable_fuzzy_bonus and toks:
        tokq = " ".join(toks)
        bonus = np.zeros_like(scores, dtype="float32")
        for i, d in enumerate(docs):
            smax = 0.0
            smax = max(smax, jaccard_trigram(tokq, d.get("_name_norm", "")))
            smax = max(smax, jaccard_trigram(tokq, norm(d.get("codigo") or "")))
            if smax >= cfg.fuzzy_min_sim:
                bonus[i] = 0.15 * smax
        scores = scores + bonus

    gated_idx = list(range(len(docs)))
    if cfg.enable_hard_gate and toks:
        key = " ".join([t for t in toks if len(t) >= cfg.gate_min_len])
        if key:
            keep = []
            for i, d in enumerate(docs):
                smax = 0.0
                smax = max(smax, jaccard_trigram(key, d.get("_name_norm", "")))
                smax = max(smax, jaccard_trigram(key, norm(d.get("codigo") or "")))
                if smax >= cfg.gate_trigram_sim:
                    keep.append(i)
            gated_idx = keep if keep else gated_idx

    order = np.argsort(scores)[::-1]
    out = []
    for i in order:
        if i not in gated_idx:
            continue
        s = float(scores[i])
        if s < min_score:
            continue
        out.append((int(i), s))
        if len(out) >= k:
            break
    if cfg.debug:
        kept_scores = [s for _, s in out]
        if kept_scores:
            print(
                f"[BM25][CANDS] n={len(out)} "
                f"max_kept={max(kept_scores):.4f} min_kept={min(kept_scores):.4f}"
            )
        print(f"[TIMING][bm25] elapsed={(time.time() - t0):.3f}s returned={len(out)}")
        if cfg.enable_hard_gate:
            print(f"[GATE] gated_len={len(gated_idx)}")
    return out

def build_embeddings_candidates(
    query: str,
    emb_model,
    emb_index,
    k: int,
    min_score: float,
    cfg: RAGConfig,
) -> List[Tuple[int, float]]:
    if emb_index is None or emb_model is None:
        return []
    t0 = time.time()
    qn = norm(query.strip())
    q = ("query: " + qn) if "e5" in EMB_MODEL_NAME.lower() else qn
    qv = emb_model.encode([q], normalize_embeddings=True, show_progress_bar=False)[0].astype("float32")
    faiss.normalize_L2(qv.reshape(1, -1))
    D, I = emb_index.search(qv.reshape(1, -1), k)
    out = [(int(idx), float(s)) for idx, s in zip(I[0].tolist(), D[0].tolist()) if idx >= 0 and s >= min_score]

    if cfg.debug:
        all_scores = D[0].tolist() if len(D) else []
        if all_scores:
            print(
                f"[EMB][RAW] max={max(all_scores):.4f} "
                f"min={min(all_scores):.4f} thr_min_score={min_score:.4f}"
            )
        kept_scores = [s for _, s in out]
        if kept_scores:
            print(
                f"[EMB][CANDS] n={len(out)} "
                f"max_kept={max(kept_scores):.4f} min_kept={min(kept_scores):.4f}"
            )
        print(f"[TIMING][emb] elapsed={(time.time() - t0):.3f}s returned={len(out)}")
    return out

def rrf_fuse(list_of_lists: List[List[Tuple[int, float]]], k: int, k_rrf: int = 60) -> List[int]:
    ranks: Dict[int, float] = {}
    for l in list_of_lists:
        for r, (idx, _) in enumerate(l):
            ranks[idx] = ranks.get(idx, 0.0) + 1.0 / (k_rrf + r + 1.0)
    fused = sorted(ranks.items(), key=lambda kv: -kv[1])
    return [i for i, _ in fused[:k]]

# ============================ render ============================

def snippet_compacto(d: Dict[str, Any]) -> Optional[str]:
    if d.get("kind") != "espacio":
        return None

    partes = []
    if d.get("codigo"):
        partes.append(f"Código={d['codigo']}")
    _, name_display = dedup_type_name(d.get("tipo"), d.get("nombre"))
    if name_display:
        partes.append(f"Nombre={name_display}")

    ubic = []
    if d.get("bloque"):
        ubic.append(f"Bloque {d['bloque']}")
    if d.get("piso") is not None:
        ubic.append(f"Piso {d['piso']}")
    if ubic:
        partes.append("Ubicación=" + " ; ".join(ubic))

    dirp = []
    if d.get("direccionRelativa"):
        dirp.append(d["direccionRelativa"])
    if d.get("direccionOrientativa"):
        dirp.append(d["direccionOrientativa"])
    if dirp:
        partes.append("Dirección=" + " ; ".join(dirp))

    if d.get("carrera"):
        partes.append(
            "Carrera="
            + (
                "; ".join(d["carrera"])
                if isinstance(d["carrera"], list)
                else str(d["carrera"])
            )
        )
    if d.get("facultad"):
        partes.append(f"Facultad={d['facultad']}")
    if d.get("capacidad"):
        partes.append(f"Capacidad={d['capacidad']}")
    if d.get("encargados"):
        partes.append("Encargados=" + "; ".join(d["encargados"]))

    attrs = d.get("_flat_attrs") or {}
    extra = []
    eqp = [k for k in attrs.keys() if "equip" in norm(k)]
    hor = [k for k in attrs.keys() if "horar" in norm(k)]

    def flat(v):
        if isinstance(v, list):
            return "; ".join(str(x) for x in v if x)
        if isinstance(v, dict):
            return "; ".join(str(k) for k in v.keys())
        return str(v) if v is not None else ""

    if eqp:
        vals = "; ".join([flat(attrs[k]) for k in eqp if flat(attrs[k])])
        if vals:
            extra.append(f"Equipamiento={vals}")
    if hor:
        vals = "; ".join([flat(attrs[k]) for k in hor if flat(attrs[k])])
        if vals:
            extra.append(f"Horario={vals}")
    if extra:
        partes.append("attrs{" + " ; ".join(extra) + "}")

    return " | ".join(partes)

def lexical_bonus(q: str, d: Dict[str, Any]) -> float:
    qn = norm(q)
    name = d.get("_name_norm") or ""
    code = norm(d.get("codigo") or "")
    sims = [jaccard_trigram(qn, name), jaccard_trigram(qn, code)]
    return max(sims) if sims else 0.0

# ============================ facet match ============================

def value_matches(doc_val, wanted_list_norm: List[str]) -> bool:
    if not wanted_list_norm:
        return True
    if doc_val is None:
        return False
    cand = [norm(x) for x in (doc_val if isinstance(doc_val, list) else [doc_val])]
    wanted = set(norm(v) for v in wanted_list_norm)
    return any(v in wanted for v in cand)

def doc_facet_match_score(d: Dict[str, Any], facets: Dict[str, Any]) -> float:
    if d.get("kind") != "espacio":
        return 0.0
    wants = 0
    hits = 0

    if "tipo" in facets:
        wants += 1
        hits += int(norm(d.get("tipo")) in [norm(x) for x in facets["tipo"]])

    if "bloque" in facets:
        wants += 1
        hits += int(norm(d.get("bloque")) == norm(facets["bloque"]))

    if "piso" in facets:
        wants += 1
        try:
            hits += int(int(d.get("piso")) == int(facets["piso"]))
        except Exception:
            pass

    if "carrera" in facets:
        wants += 1
        hits += int(value_matches(d.get("carrera"), facets["carrera"]))

    if "facultad" in facets:
        wants += 1
        hits += int(value_matches(d.get("facultad"), facets["facultad"]))

    if "encargado" in facets:
        wants += 1
        encs = [norm(x) for x in (d.get("encargados") or [])]
        qenc = [norm(x) for x in facets["encargado"]]
        hits += int(any(x in encs for x in qenc))

    if "attrs" in facets:
        for attr_path, vals in (facets["attrs"] or {}).items():
            wants += 1
            cur = (d.get("_flat_attrs") or {}).get(attr_path)
            ok = False
            if isinstance(cur, list):
                ok = any(norm(x) in [norm(v) for v in vals] for x in cur)
            elif isinstance(cur, dict):
                ok = any(norm(k) in [norm(v) for v in vals] for k in cur.keys())
            else:
                ok = value_matches(cur, vals)
            hits += int(ok)

    if wants == 0:
        return 0.0
    return hits / wants

# ============================ motor ============================

class RAGEngine:
    def __init__(
        self,
        json_path: str = JSON_PATH,
        prompt_path_agent: str = PROMPT_PATH_AGENT,
        prompt_path_eval: str = PROMPT_PATH_EVAL,
        gguf_path: str = GGUF_PATH,
        emb_model_name: str = EMB_MODEL_NAME,
        cfg: RAGConfig = None,
        evaluation_mode: bool = False,
    ):
        self.json_path = json_path
        self.prompt_path_agent = prompt_path_agent
        self.prompt_path_eval = prompt_path_eval
        self.gguf_path = gguf_path
        self.emb_model_name = emb_model_name
        self.cfg = cfg or RAGConfig()
        self.evaluation_mode = evaluation_mode

        self.docs: List[Dict[str, Any]] = []
        self.attr_catalog: Dict[str, set] = {}
        self.bm25 = None
        self.emb_model = None
        self.emb_index = None
        self.reranker = None
        self.llm = None
        self.known_codes_lower = set()

        self.prompt_agent = (
            Path(self.prompt_path_agent).read_text(encoding="utf-8")
            if Path(self.prompt_path_agent).exists()
            else "{{HECHOS}}\n\nPregunta: {{PREGUNTA}}"
        )
        self.prompt_eval = (
            Path(self.prompt_path_eval).read_text(encoding="utf-8")
            if Path(self.prompt_path_eval).exists()
            else "{{HECHOS}}\n\nPregunta: {{PREGUNTA}}"
        )

        self.rebuild_all()

    def rebuild_all(self):
        t0 = time.time()
        self.docs = load_docs(self.json_path)
        self.attr_catalog = collect_attr_catalog(self.docs)
        self.bm25 = build_bm25(self.docs)

        if self.cfg.use_embeddings:
            self.emb_model, self.emb_index = build_embeddings(self.docs, self.emb_model_name, M=self.cfg.faiss_M)
            try:
                if hasattr(self.emb_index, "hnsw"):
                    self.emb_index.hnsw.efSearch = self.cfg.faiss_ef_search
            except Exception:
                pass

        if self.cfg.use_reranker:
            self.reranker = CrossEncoder(RERANKER_MODEL, device=DEVICE)

        if Llama is not None and Path(self.gguf_path).exists():
            self.llm = Llama(
                model_path=str(self.gguf_path),
                n_ctx=N_CTX,
                n_threads=N_THREADS,
                n_gpu_layers=N_GPU_LAYERS,
                logits_all=False,
                verbose=False,
            )

        self.known_codes_lower = set(
            norm(d.get("codigo"))
            for d in self.docs
            if d.get("kind") == "espacio" and d.get("codigo")
        )
        print(
            f"[OK] Índices listos en {time.time() - t0:.2f}s. Docs: {len(self.docs)} | "
            f"LLM={'ON' if self.llm else 'OFF'}"
        )

    def pin_exact_ids(self, query: str) -> List[int]:
        q_raw = query or ""
        qn = norm(q_raw)
        q_codes = set(t.lower() for t in CODE_RE.findall(q_raw))
        hits = []
        for i, d in enumerate(self.docs):
            if d.get("kind") != "espacio":
                continue
            code = (d.get("codigo") or "").lower().strip()
            if code and code in q_codes:
                hits.append(i)
                continue
            for al in (d.get("_aliases_norm") or []):
                if al and (al in qn or jaccard_trigram(al, qn) >= 0.82):
                    hits.append(i)
                    break
        return list(dict.fromkeys(hits))

    def retrieve_pool(self, query: str) -> Tuple[List[int], Dict[str, Any], List[int]]:
        t0 = time.time()
        qn = norm(query)

        bm25_list = build_bm25_candidates(
            qn,
            self.bm25,
            self.cfg.bm25_cand,
            self.cfg.min_score_bm25,
            self.cfg,
            self.docs,
        )
        emb_list = (
            build_embeddings_candidates(
                query,
                self.emb_model,
                self.emb_index,
                self.cfg.emb_cand,
                self.cfg.min_score_emb,
                self.cfg,
            )
            if self.cfg.use_embeddings
            else []
        )

        # stats de embeddings para gate
        emb_scores = [s for _, s in emb_list]
        best_bm25 = max((s for _, s in bm25_list), default=0.0)
        best_emb = max(emb_scores, default=0.0)
        if emb_scores:
            sorted_emb = sorted(emb_scores, reverse=True)
            median_emb = sorted_emb[len(sorted_emb) // 2]
            gap_top_med = best_emb - median_emb
        else:
            median_emb = 0.0
            gap_top_med = 0.0

        if self.cfg.debug:
            print(
                f"[SCORES] best_bm25={best_bm25:.4f} "
                f"best_emb={best_emb:.4f} "
                f"median_emb={median_emb:.4f} "
                f"gap_top_med={gap_top_med:.4f} "
                f"thr_emb_any={self.cfg.min_best_emb_for_any:.4f} "
                f"thr_gap={self.cfg.min_emb_gap:.4f} "
                f"bm25_cand={len(bm25_list)} emb_cand={len(emb_list)}"
            )

        # UMBRAL GLOBAL:
        #  - si no hay BM25
        #  - y embeddings no producen un doc que destaque (gap pequeño)
        #  => se considera que la query es ruido tipo "hola" y NO se usa contexto.
        if not bm25_list and emb_list:
            if best_emb < self.cfg.min_best_emb_for_any or gap_top_med < self.cfg.min_emb_gap:
                if self.cfg.debug:
                    print(
                        f"[GATE][NO-CONTEXTO] best_emb={best_emb:.3f}, "
                        f"median={median_emb:.3f}, gap={gap_top_med:.3f} "
                        f"-> pool vacío"
                    )
                return [], {}, []

        if not bm25_list and not emb_list:
            if self.cfg.debug:
                print("[GATE][NO-CONTEXTO] Sin candidatos BM25 ni embeddings.")
            return [], {}, []

        fused_ids = rrf_fuse([bm25_list, emb_list], k=self.cfg.pool_k, k_rrf=60)
        pins = self.pin_exact_ids(query)
        pool = list(dict.fromkeys(pins + fused_ids))[: self.cfg.pool_k]

        if self.cfg.debug:
            print(
                f"[POOL] query='{query}' pins={pins[:6]} pool_len={len(pool)} "
                f"docs_total={len(self.docs)}"
            )
            print(f"[TIMING][retrieve] elapsed={(time.time() - t0):.3f}s")

        facets = parse_facets_universal(query, self.docs, self.attr_catalog) if self.cfg.use_facets else {}
        if self.cfg.debug:
            print(f"[DEBUG][facets] {facets}")
        return pool, facets, pins

    def rerank_order(self, query: str, idxs: List[int]) -> List[int]:
        if not self.cfg.use_reranker or not self.reranker or not idxs:
            return idxs
        R = min(self.cfg.rerank_top_k, len(idxs))
        to_rerank = idxs[:R]
        rest = idxs[R:]

        pairs = [(query, self.docs[i].get("_raw_text", "")) for i in to_rerank]
        s = np.asarray(
            self.reranker.predict(
                pairs, batch_size=(32 if DEVICE == "cuda" else 8)
            ),
            dtype="float32",
        )
        ce = (s - s.min()) / (s.max() - s.min() + 1e-8) if len(s) > 1 else np.array([1.0], dtype="float32")

        q_persona = extract_person_query(norm(query))
        enc_bonus = np.zeros_like(ce)
        if q_persona:
            for k, idx in enumerate(to_rerank):
                d = self.docs[idx]
                encs = " ".join(d.get("encargados") or [])
                if encs and jaccard_tokens(encs, q_persona) >= 0.3:
                    enc_bonus[k] = self.cfg.person_prior_weight

        bonus = np.zeros_like(ce)
        prior = np.zeros_like(ce)
        for k, idx in enumerate(to_rerank):
            d = self.docs[idx]
            bonus[k] = lexical_bonus(query, d)
            prior[k] = self.cfg.kind_prior_weight if d.get("kind") == "espacio" else 0.0

        final = self.cfg.rerank_alpha * ce + self.cfg.rerank_bonus_weight * bonus + prior + enc_bonus
        order = np.argsort(final)[::-1]
        return [to_rerank[i] for i in order] + rest

    def build_context_filtered_first(self, facets: Dict[str, Any], pool_ranked: List[int], pins: List[int]) -> str:
        t0 = time.time()

        pool = [i for i in pool_ranked if self.docs[i].get("kind") == "espacio"]

        if facets:
            scored = [(i, doc_facet_match_score(self.docs[i], facets)) for i in pool]
            ordered_by_facets = [i for i, _ in sorted(scored, key=lambda kv: kv[1], reverse=True)]
        else:
            ordered_by_facets = pool

        filtered = list(dict.fromkeys(pins + ordered_by_facets))

        max_docs = self.cfg.top_k if self.cfg.top_k > 0 else len(filtered)

        ctx_lines: List[str] = []
        total = 0
        for i in filtered[:max_docs]:
            s = snippet_compacto(self.docs[i])
            if not s:
                continue
            if total + len(s) + 1 > self.cfg.ctx_chars:
                break
            ctx_lines.append(s)
            total += len(s) + 1

        if self.cfg.debug:
            print(
                f"[TIMING][build_context] elapsed={(time.time() - t0):.3f}s "
                f"included={len(ctx_lines)} chars={total}"
            )

        if self.evaluation_mode:
            return "\n".join(ctx_lines)

        header_lines: List[str] = []
        if facets:
            header_lines.append(
                f"[FACETS] activos={list(facets.keys())} documentos_in={len(pool)}"
            )
        header = ("\n".join(header_lines) + "\n") if header_lines else ""
        return header + "\n".join(ctx_lines)

    def enforce_context_guard(self, answer: str, context_text: str) -> str:
        ctx_codes = set(t.lower() for t in re.findall(r"[a-z]?\d{2,4}[a-z\d]*", (context_text or "").lower()))
        ans_codes = set(t.lower() for t in re.findall(r"[a-z]?\d{2,4}[a-z\d]*", (answer or "").lower()))
        if ans_codes and not ans_codes.issubset(ctx_codes):
            return "NO ESTÁ EN EL CONTEXTO"
        return answer

    def build_prompt(self, context_text: str, query: str) -> str:
        tpl = self.prompt_eval if self.evaluation_mode else self.prompt_agent
        return tpl.replace("{{HECHOS}}", context_text).replace("{{PREGUNTA}}", query)

    def generate(self, prompt: str) -> str:
        if not self.llm:
            return "Gen OFF (LLM no disponible)."
        t0 = time.time()
        temp = TEMPERATURE_EVAL if self.evaluation_mode else TEMPERATURE_AGENT
        out = self.llm(
            prompt=prompt,
            max_tokens=MAX_TOKENS,
            temperature=temp,
            top_p=1.0,
            repeat_penalty=1.1,
        )
        if self.cfg.debug:
            print(f"[TIMING][generate] elapsed={(time.time() - t0):.3f}s")
        return out["choices"][0]["text"].strip()

    def answer(self, query: str, show_context: bool = True) -> str:
        total_t0 = time.time()

        pool, facets, pins = self.retrieve_pool(query)

        # Caso sin contexto confiable: NO se llama al LLM.
        if not pool and not pins:
            if show_context:
                print("=== CONTEXTO ===\n[sin resultados relevantes]\n=== RESPUESTA ===")
            text = "No consta en el contexto lo que se pregunta."
            if self.cfg.debug:
                print(f"[TIMING][total] {(time.time() - total_t0):.3f}s (sin contexto)")
            return text

        ranked = self.rerank_order(query, pool)
        ctx = self.build_context_filtered_first(facets, ranked, pins)

        if show_context:
            print("=== CONTEXTO ===\n" + ctx + "\n=== RESPUESTA ===")

        if self.cfg.generate_enabled:
            prompt = self.build_prompt(ctx, query)
            text = self.generate(prompt)
            if self.evaluation_mode:
                text = self.enforce_context_guard(text, ctx)
        else:
            text = "Contexto (LLM desactivado)."

        if self.cfg.debug:
            print(f"[TIMING][total] {(time.time() - total_t0):.3f}s")
        return text

    def peek(self, query: str, n: int = 5) -> str:
        pool, facets, pins = self.retrieve_pool(query)
        if not pool and not pins:
            return "[sin resultados relevantes]"
        ranked = self.rerank_order(query, pool)
        filtered = list(dict.fromkeys(pins + ranked))
        chosen = [self.docs[i] for i in filtered if self.docs[i].get("kind") == "espacio"][:n]
        return "\n".join(f"{k+1}. {snippet_compacto(d)}" for k, d in enumerate(chosen, 1))

# ============================ CLI ============================

CLI_HELP = """Comandos:
  :peek [n]                -> vista rápida (top-N)
  :k N | :pool N           -> ajusta top_k / pool_k
  :emb on|off              -> embeddings (dense) ON/OFF
  :rerank on|off           -> CrossEncoder ON/OFF (solo reordena)
  :gen on|off              -> generación LLM ON/OFF (retrieval no cambia)
  :ctx N                   -> tamaño de contexto (caracteres)
  :embmin X                -> min_score_emb (p.ej. 0.03)
  :bm25min X               -> min_score_bm25 (p.ej. 0.0)
  :fuzzy on|off            -> bono difuso por trigramas (typos)
  :fuzzylim X              -> similitud mínima para bono difuso (0.30..0.90)
  :gate on|off             -> hard gate léxico (más precisión)
  :gatelen N               -> longitud mínima del token clave (default 4)
  :gatesim X               -> similitud trigram para gate (p.ej. 0.60)
  :facetmode soft|strict   -> priorizar (soft) o filtrar (strict)
  :facetmin N              -> umbral mínimo para mantener strict (fallback si baja)
  :mode agent|eval         -> cambia entre modo agente y modo evaluación
  :debug on|off            -> activa/desactiva logs de depuración
  :config | :rebuild       -> ver config / reconstruir índices
  :exit                    -> salir
"""

def cli():
    global engine
    cfg = RAGConfig(
        top_k=12,
        pool_k=250,
        bm25_cand=500,
        emb_cand=300,
        min_score_bm25=0.0,
        min_score_emb=0.0,
        ctx_chars=2200,
        use_embeddings=True,
        use_reranker=True,
        use_facets=True,
        generate_enabled=True,
        rerank_top_k=(150 if DEVICE == "cuda" else 60),
        rerank_alpha=0.85,
        rerank_bonus_weight=0.15,
        kind_prior_weight=0.08,
        person_prior_weight=0.12,
        faiss_M=32,
        faiss_ef_search=150,
        facet_mode="soft",
        min_faceted=3,
        debug=True,
        # puedes ajustar estos umbrales si ves que son muy estrictos / laxos
        min_best_emb_for_any=0.45,
        min_emb_gap=0.05,
    )
    engine = RAGEngine(
        JSON_PATH,
        PROMPT_PATH_AGENT,
        PROMPT_PATH_EVAL,
        GGUF_PATH,
        EMB_MODEL_NAME,
        cfg,
        evaluation_mode=False,
    )

    print("RAG listo. (Enter vacío o 'salir' para terminar)\n")
    print(CLI_HELP)
    while True:
        try:
            q = input("Tú> ").strip()
        except EOFError:
            break
        if not q or q.lower() in {"salir", "exit", "quit", ":exit"}:
            break

        if q.startswith(":peek"):
            parts = q.split()
            n = int(parts[1]) if len(parts) >= 2 and parts[1].isdigit() else 5
            print(engine.peek(input("Consulta para peek> ").strip(), n))
            continue

        if q.startswith(":k "):
            try:
                engine.cfg.top_k = int(q.split()[1])
                print(f"[OK] top_k = {engine.cfg.top_k}")
            except Exception:
                print("[ERR] Uso: :k 12")
            continue

        if q.startswith(":pool "):
            try:
                engine.cfg.pool_k = int(q.split()[1])
                print(f"[OK] pool_k = {engine.cfg.pool_k}")
            except Exception:
                print("[ERR] Uso: :pool 80")
            continue

        if q.startswith(":emb "):
            val = q.split()[1].lower()
            engine.cfg.use_embeddings = val == "on"
            print(f"[OK] embeddings = {'ON' if engine.cfg.use_embeddings else 'OFF'}")
            continue

        if q.startswith(":rerank "):
            val = q.split()[1].lower()
            engine.cfg.use_reranker = val == "on"
            if engine.cfg.use_reranker and engine.reranker is None:
                engine.reranker = CrossEncoder(RERANKER_MODEL, device=DEVICE)
            print(f"[OK] re-ranker = {'ON' if engine.cfg.use_reranker else 'OFF'}")
            continue

        if q.startswith(":gen "):
            val = q.split()[1].lower()
            engine.cfg.generate_enabled = val == "on"
            print(f"[OK] generación LLM = {'ON' if engine.cfg.generate_enabled else 'OFF'}")
            continue

        if q.startswith(":ctx "):
            try:
                engine.cfg.ctx_chars = int(q.split()[1])
                print(f"[OK] ctx_chars = {engine.cfg.ctx_chars}")
            except Exception:
                print("[ERR] Uso: :ctx 2200")
            continue

        if q.startswith(":embmin "):
            try:
                engine.cfg.min_score_emb = float(q.split()[1])
                print(f"[OK] min_score_emb = {engine.cfg.min_score_emb:.3f}")
            except Exception:
                print("[ERR] Uso: :embmin 0.03")
            continue

        if q.startswith(":bm25min "):
            try:
                engine.cfg.min_score_bm25 = float(q.split()[1])
                print(f"[OK] min_score_bm25 = {engine.cfg.min_score_bm25:.3f}")
            except Exception:
                print("[ERR] Uso: :bm25min 0.0")
            continue

        if q.startswith(":fuzzy "):
            val = q.split()[1].lower()
            engine.cfg.enable_fuzzy_bonus = val == "on"
            print(f"[OK] fuzzy bonus = {'ON' if engine.cfg.enable_fuzzy_bonus else 'OFF'}")
            continue

        if q.startswith(":fuzzylim "):
            try:
                engine.cfg.fuzzy_min_sim = float(q.split()[1])
                print(f"[OK] fuzzy_min_sim = {engine.cfg.fuzzy_min_sim:.2f}")
            except Exception:
                print("[ERR] Uso: :fuzzylim 0.30")
            continue

        if q.startswith(":gate "):
            val = q.split()[1].lower()
            engine.cfg.enable_hard_gate = val == "on"
            print(f"[OK] hard gate = {'ON' if engine.cfg.enable_hard_gate else 'OFF'}")
            continue

        if q.startswith(":gatelen "):
            try:
                engine.cfg.gate_min_len = int(q.split()[1])
                print(f"[OK] gate_min_len = {engine.cfg.gate_min_len}")
            except Exception:
                print("[ERR] Uso: :gatelen 4")
            continue

        if q.startswith(":gatesim "):
            try:
                engine.cfg.gate_trigram_sim = float(q.split()[1])
                print(f"[OK] gate_trigram_sim = {engine.cfg.gate_trigram_sim:.2f}")
            except Exception:
                print("[ERR] Uso: :gatesim 0.60")
            continue

        if q.startswith(":facetmode "):
            val = q.split()[1].lower()
            if val in {"soft", "strict"}:
                engine.cfg.facet_mode = val
                print(f"[OK] facet_mode = {engine.cfg.facet_mode}")
            else:
                print("[ERR] Uso: :facetmode soft|strict")
            continue

        if q.startswith(":facetmin "):
            try:
                engine.cfg.min_faceted = int(q.split()[1])
                print(f"[OK] min_faceted = {engine.cfg.min_faceted}")
            except Exception:
                print("[ERR] Uso: :facetmin 3")
            continue

        if q.startswith(":mode "):
            val = q.split()[1].lower()
            if val in {"agent", "eval"}:
                engine.evaluation_mode = val == "eval"
                print(
                    f"[OK] modo = {'EVALUACIÓN' if engine.evaluation_mode else 'AGENTE'}"
                )
            else:
                print("[ERR] Uso: :mode agent|eval")
            continue

        if q.startswith(":debug "):
            val = q.split()[1].lower()
            engine.cfg.debug = val == "on"
            print(f"[OK] debug = {'ON' if engine.cfg.debug else 'OFF'}")
            continue

        if q == ":config":
            print(engine.cfg)
            continue

        if q == ":rebuild":
            engine.rebuild_all()
            continue

        ans = engine.answer(q, show_context=True)
        print(ans)

if __name__ == "__main__":
    try:
        cli()
    except KeyboardInterrupt:
        print()


[RAG] DEVICE = cuda
[TIMING][build_embeddings] total=7.454s
[OK] Índices listos en 23.21s. Docs: 25 | LLM=ON
RAG listo. (Enter vacío o 'salir' para terminar)

Comandos:
  :peek [n]                -> vista rápida (top-N)
  :k N | :pool N           -> ajusta top_k / pool_k
  :emb on|off              -> embeddings (dense) ON/OFF
  :rerank on|off           -> CrossEncoder ON/OFF (solo reordena)
  :gen on|off              -> generación LLM ON/OFF (retrieval no cambia)
  :ctx N                   -> tamaño de contexto (caracteres)
  :embmin X                -> min_score_emb (p.ej. 0.03)
  :bm25min X               -> min_score_bm25 (p.ej. 0.0)
  :fuzzy on|off            -> bono difuso por trigramas (typos)
  :fuzzylim X              -> similitud mínima para bono difuso (0.30..0.90)
  :gate on|off             -> hard gate léxico (más precisión)
  :gatelen N               -> longitud mínima del token clave (default 4)
  :gatesim X               -> similitud trigram para gate (p.ej. 0.60)
  :face

Tú>  hola, en que piso del bloque a2 se encuentra el ing pablo


[BM25] toks_query=['hola', 'en', 'que', 'piso', 'del', 'bloque', 'a2', 'se', 'encuentra', 'el', 'ing', 'pablo'] max=5.6121 min=0.6648 thr=0.0000
[BM25][CANDS] n=25 max_kept=5.6121 min_kept=0.6648
[TIMING][bm25] elapsed=0.003s returned=25
[EMB][RAW] max=0.8161 min=-340282346638528859811704183484516925440.0000 thr_min_score=0.0000
[EMB][CANDS] n=25 max_kept=0.8161 min_kept=0.7913
[TIMING][emb] elapsed=0.031s returned=25
[SCORES] best_bm25=5.6121 best_emb=0.8161 median_emb=0.8038 gap_top_med=0.0123 thr_emb_any=0.4500 thr_gap=0.0500 bm25_cand=25 emb_cand=25
[POOL] query='hola, en que piso del bloque a2 se encuentra el ing pablo' pins=[] pool_len=25 docs_total=25
[TIMING][retrieve] elapsed=0.034s
[DEBUG][facets] {'bloque': 'A2'}
[TIMING][build_context] elapsed=0.000s included=6 chars=2084
=== CONTEXTO ===
[FACETS] activos=['bloque'] documentos_in=25
Código=A214 | Nombre=Sala de Docencia | Ubicación=Bloque A2 ; Piso 1 | Dirección=Al frente del laboratorio A212 y despues de la sala A213 ; Lad

Tú>  hola, en que piso del bloque a2 se encuentra el ing pabel


[BM25] toks_query=['hola', 'en', 'que', 'piso', 'del', 'bloque', 'a2', 'se', 'encuentra', 'el', 'ing', 'pabel'] max=9.4716 min=0.6648 thr=0.0000
[BM25][CANDS] n=25 max_kept=9.4716 min_kept=0.6648
[TIMING][bm25] elapsed=0.002s returned=25
[EMB][RAW] max=0.8171 min=-340282346638528859811704183484516925440.0000 thr_min_score=0.0000
[EMB][CANDS] n=25 max_kept=0.8171 min_kept=0.7893
[TIMING][emb] elapsed=0.028s returned=25
[SCORES] best_bm25=9.4716 best_emb=0.8171 median_emb=0.7999 gap_top_med=0.0172 thr_emb_any=0.4500 thr_gap=0.0500 bm25_cand=25 emb_cand=25
[POOL] query='hola, en que piso del bloque a2 se encuentra el ing pabel' pins=[] pool_len=25 docs_total=25
[TIMING][retrieve] elapsed=0.031s
[DEBUG][facets] {'bloque': 'A2'}
[TIMING][build_context] elapsed=0.000s included=6 chars=2174
=== CONTEXTO ===
[FACETS] activos=['bloque'] documentos_in=25
Código=No Aplica | Nombre=Departamento de Investigacion e Innovación Tecnológica - I2TEC | Ubicación=Bloque A2 ; Piso 4 | Dirección=Al frente d

In [6]:
# ==================== BLOQUE 2: Evaluación con ROUGE-1 + Faithfulness ====================
import json, csv, unicodedata, re
from collections import defaultdict, Counter
from pathlib import Path
from typing import Any

EVAL_FILE = "eval.jsonl"

CSV_OUT_DETALLE        = "resultados_eval_detallado.csv"
CSV_OUT_RESUMEN_GLOBAL = "resultados_eval_resumen_global.csv"
CSV_OUT_RESUMEN_TIPO   = "resultados_eval_resumen_por_tipo.csv"
CSV_OUT_DEBUG          = "resultados_eval_debug.csv"  # opcional

# ---------------------------------------------------------
# 1. Carga de casos de evaluación
# ---------------------------------------------------------
eval_items = []
with open(EVAL_FILE, "r", encoding="utf-8") as f:
    for line in f:
        line = line.strip()
        if not line:
            continue
        eval_items.append(json.loads(line))

print(f"Casos cargados: {len(eval_items)}")

# ---------------------------------------------------------
# 2. Instanciar motor en modo evaluación
# ---------------------------------------------------------
cfg_eval = RAGConfig(
    top_k=12,
    pool_k=250,
    bm25_cand=500,
    emb_cand=300,
    min_score_bm25=0.0,
    min_score_emb=0.0,
    ctx_chars=2200,
    use_embeddings=True,
    use_reranker=False,   
    use_facets=False,    
    generate_enabled=True,
    rerank_top_k=(150 if DEVICE == "cuda" else 60),
    rerank_alpha=0.85,
    rerank_bonus_weight=0.15,
    kind_prior_weight=0.08,
    person_prior_weight=0.12,
    faiss_M=32,
    faiss_ef_search=150,
    facet_mode="soft",
    min_faceted=3,
    debug=False,  # menos ruido en modo batch
)

engine_eval = RAGEngine(
    JSON_PATH,
    PROMPT_PATH_AGENT,
    PROMPT_PATH_EVAL,
    GGUF_PATH,
    EMB_MODEL_NAME,
    cfg_eval,
    evaluation_mode=True,
)

# ---------------------------------------------------------
# 3. Utilidades de normalización y ROUGE-1 clásico
# ---------------------------------------------------------
def strip_accents(s: str) -> str:
    s = unicodedata.normalize("NFD", s)
    return "".join(c for c in s if unicodedata.category(c) != "Mn")

def normalize_basic(text: str) -> str:
    """
    Normalización ligera, aceptable para ROUGE clásico:
    - lower
    - sin acentos
    - sin puntuación
    - colapsa espacios
    - quita 'respuesta:' al inicio (si aparece)
    """
    if text is None:
        return ""
    t = str(text).strip().lower()
    t = strip_accents(t)
    # quitar prefijo 'respuesta' (el modelo a veces lo pone solo por estilo)
    t = re.sub(r"^respuesta\s*:\s*", "", t)
    # eliminar puntuación (deja solo letras/números/espacios)
    t = re.sub(r"[^a-z0-9áéíóúñ]+", " ", t)
    t = re.sub(r"\s+", " ", t).strip()
    return t

def rouge1_scores(pred: str, gold: str) -> tuple[float, float, float, list[str], list[str]]:
    """
    ROUGE-1 clásico:
    - tokens = unigrams después de normalización básica
    - matching con multiset (Counter)
    - devuelve P, R, F1 + listas de tokens no coincidentes (para debug)
    """
    ng = normalize_basic(gold)
    npred = normalize_basic(pred)

    gold_tokens = ng.split() if ng else []
    pred_tokens = npred.split() if npred else []

    if not gold_tokens and not pred_tokens:
        return 1.0, 1.0, 1.0, [], []
    if not gold_tokens or not pred_tokens:
        # no hay solape posible
        return 0.0, 0.0, 0.0, pred_tokens, gold_tokens

    gold_counts = Counter(gold_tokens)
    pred_counts = Counter(pred_tokens)

    overlap = 0
    for tok, c in pred_counts.items():
        overlap += min(c, gold_counts.get(tok, 0))

    precision = overlap / sum(pred_counts.values()) if pred_counts else 0.0
    recall    = overlap / sum(gold_counts.values()) if gold_counts else 0.0
    if precision + recall == 0:
        f1 = 0.0
    else:
        f1 = 2 * precision * recall / (precision + recall)

    # tokens que se dijeron en pred y no aparecen en gold (para inspección)
    pred_only = sorted((pred_counts - gold_counts).elements())
    gold_only = sorted((gold_counts - pred_counts).elements())

    return precision, recall, f1, pred_only, gold_only

# ---------------------------------------------------------
# 4. Faithfulness = cobertura de evidencia en contexto
# ---------------------------------------------------------
STOPWORDS = {
    "el","la","los","las","un","una","unos","unas",
    "de","del","al","a","y","o","u","en","por","para","con",
    "que","qué","quien","quién","donde","dónde","como","cómo",
    "es","son","está","estan","esta","están",
    "laboratorio","laboratorios","sala","salas","departamento","departamentos",
    "baño","banio","banos","banios","cuarto","cuartos",
    "piso","pisos","bloque","bloques","lado","lados",
    "tiene","hay","esta","está","se","encuentra","queda",
    "si","sí","no","respuesta"
}

def tokenize_content(text: str) -> list[str]:
    t = normalize_basic(text)
    toks = t.split()
    return [tok for tok in toks if tok not in STOPWORDS]

def faithfulness_score(pred: str, context: str) -> tuple[float, list[str], list[str]]:
    """
    Faithfulness sencilla:
    - tokens de contenido de la respuesta
    - tokens del contexto
    - score = (#tokens_de_respuesta_que_aparecen_en_contexto) / (#tokens_de_respuesta)
    Devuelve score, lista soportados, lista NO soportados.
    """
    ans_tokens = tokenize_content(pred)
    ctx_tokens = set(tokenize_content(context))

    if not ans_tokens:
        # si la respuesta no tiene contenido (ej. "Sí." / "No."),
        # no penalizamos por faithfulness
        return 1.0, [], []

    supported = [t for t in ans_tokens if t in ctx_tokens]
    unsupported = [t for t in ans_tokens if t not in ctx_tokens]

    score = len(supported) / len(ans_tokens) if ans_tokens else 0.0
    return score, supported, unsupported

# ---------------------------------------------------------
# 5. Bucle de evaluación
# ---------------------------------------------------------
global_rouge_f1_sum = 0.0
global_rouge_p_sum  = 0.0
global_faith_sum    = 0.0
n_cases = 0

# guardamos por tipo: (precision, f1, faithfulness)
per_type_scores: dict[str, list[tuple[float, float, float]]] = defaultdict(list)

# detalle legible por caso
rows_for_csv: list[dict[str, Any]] = []
# debug opcional muy detallado
debug_rows_for_csv: list[dict[str, Any]] = []

for idx, item in enumerate(eval_items, start=1):
    qid   = item.get("id", f"case_{idx}")
    q     = item.get("question") or item.get("query") or ""
    gold  = item.get("gold", "")
    qtype = item.get("type", "OTRO")

    print(f"\n==================== Caso {idx} / {len(eval_items)} ====================")
    print(f"ID: {qid}")
    print(f"PREGUNTA: {q}")
    print(f"GOLD: {gold}")

    # --- Ejecutar pipeline manual para poder capturar contexto ---
    pool, facets, pins = engine_eval.retrieve_pool(q)
    ranked = engine_eval.rerank_order(q, pool)
    ctx = engine_eval.build_context_filtered_first(facets, ranked, pins)
    prompt = engine_eval.build_prompt(ctx, q)
    raw_answer = engine_eval.generate(prompt)
    pred = engine_eval.enforce_context_guard(raw_answer, ctx)

    print(f"PRED : {pred}")

    # --- ROUGE-1 clásico ---
    r_p, r_r, r_f1, pred_only, gold_only = rouge1_scores(pred, gold)

    # --- Faithfulness ---
    faith, supported_tokens, unsupported_tokens = faithfulness_score(pred, ctx)

    global_rouge_f1_sum += r_f1
    global_rouge_p_sum  += r_p
    global_faith_sum    += faith
    n_cases += 1

    per_type_scores[qtype].append((r_p, r_f1, faith))

    print(f"ROUGE-1 precision: {r_p:.4f}")
    print(f"ROUGE-1 F1       : {r_f1:.4f}")
    print(f"Faithfulness     : {faith:.4f}")
    if unsupported_tokens:
        print(f"TOKENS RESPUESTA NO RESPALDADOS POR CONTEXTO: {unsupported_tokens}")

    avg_r_f1 = global_rouge_f1_sum / n_cases
    avg_r_p  = global_rouge_p_sum  / n_cases
    avg_f    = global_faith_sum    / n_cases
    print("----------------------------------------------")
    print(f"PROMEDIO ROUGE-1 PRECISION HASTA AHORA: {avg_r_p:.4f}")
    print(f"PROMEDIO ROUGE-1 F1 HASTA AHORA       : {avg_r_f1:.4f}")
    print(f"PROMEDIO FAITHFULNESS HASTA AHORA     : {avg_f:.4f}")

    # --- guardar fila detallada (pero legible) para Excel ---
    rows_for_csv.append({
        "id": qid,
        "type": qtype,
        "question": q,
        "gold": gold,
        "pred": pred,
        "rouge1_precision": r_p,
        "rouge1_recall": r_r,
        "rouge1_f1": r_f1,
        "faithfulness": faith,
        # señal simple de si hay tokens no respaldados por contexto
        "tiene_tokens_no_respaldo": int(len(unsupported_tokens) > 0),
    })

    # --- (Opcional) debug súper detallado por caso ---
    debug_rows_for_csv.append({
        "id": qid,
        "type": qtype,
        "question": q,
        "gold": gold,
        "pred": pred,
        "rouge1_f1": r_f1,
        "faithfulness": faith,
        "rouge_pred_only_tokens": " ".join(pred_only),
        "rouge_gold_only_tokens": " ".join(gold_only),
        "faith_supported_tokens": " ".join(supported_tokens),
        "faith_unsupported_tokens": " ".join(unsupported_tokens),
        "context_used": ctx,
    })

# ---------------------------------------------------------
# 6. Resumen global y por tipo
# ---------------------------------------------------------
print("\n==================== RESUMEN FINAL ====================")
print(f"Casos evaluados: {n_cases}")
avg_rouge_f1 = global_rouge_f1_sum / n_cases if n_cases else 0.0
avg_rouge_p  = global_rouge_p_sum  / n_cases if n_cases else 0.0
avg_faith    = global_faith_sum    / n_cases if n_cases else 0.0
print(f"ROUGE-1 precision promedio: {avg_rouge_p:.4f}")
print(f"ROUGE-1 F1 promedio       : {avg_rouge_f1:.4f}")
print(f"Faithfulness promedio     : {avg_faith:.4f}")

# Estructura para CSV de resumen global
resumen_global = [{
    "casos_evaluados": n_cases,
    "rouge1_precision_promedio": avg_rouge_p,
    "rouge1_f1_promedio": avg_rouge_f1,
    "faithfulness_promedio": avg_faith,
}]

print("\n==================== PROMEDIOS POR TIPO ====================")
resumen_por_tipo: list[dict[str, Any]] = []
for t, vals in per_type_scores.items():
    if not vals:
        continue
    p_mean = sum(v[0] for v in vals) / len(vals)  # precision
    f1_mean = sum(v[1] for v in vals) / len(vals) # f1
    faith_mean = sum(v[2] for v in vals) / len(vals)
    resumen_por_tipo.append({
        "type": t,
        "casos": len(vals),
        "rouge1_precision_promedio": p_mean,
        "rouge1_f1_promedio": f1_mean,
        "faithfulness_promedio": faith_mean,
    })
    print(
        f"Tipo {t}: casos={len(vals)} | "
        f"ROUGE-1 precision={p_mean:.4f} | "
        f"ROUGE-1 F1={f1_mean:.4f} | "
        f"Faithfulness={faith_mean:.4f}"
    )

# ---------------------------------------------------------
# 7. Guardar CSVs legibles
# ---------------------------------------------------------

# 7.1 Detalle por caso
if rows_for_csv:
    detalle_fields = list(rows_for_csv[0].keys())
    with open(CSV_OUT_DETALLE, "w", encoding="utf-8", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=detalle_fields)
        writer.writeheader()
        writer.writerows(rows_for_csv)
    print(f"Resultados detallados guardados en: {CSV_OUT_DETALLE}")

# 7.2 Resumen global
if resumen_global:
    with open(CSV_OUT_RESUMEN_GLOBAL, "w", encoding="utf-8", newline="") as f:
        fieldnames = list(resumen_global[0].keys())
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(resumen_global)
    print(f"Resumen global guardado en: {CSV_OUT_RESUMEN_GLOBAL}")

# 7.3 Resumen por tipo de pregunta
if resumen_por_tipo:
    with open(CSV_OUT_RESUMEN_TIPO, "w", encoding="utf-8", newline="") as f:
        fieldnames = list(resumen_por_tipo[0].keys())
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(resumen_por_tipo)
    print(f"Resumen por tipo guardado en: {CSV_OUT_RESUMEN_TIPO}")

# 7.4 (Opcional) CSV de debug súper detallado
if debug_rows_for_csv:
    debug_fields = list(debug_rows_for_csv[0].keys())
    with open(CSV_OUT_DEBUG, "w", encoding="utf-8", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=debug_fields)
        writer.writeheader()
        writer.writerows(debug_rows_for_csv)
    print(f"Debug detallado guardado en: {CSV_OUT_DEBUG}")


Casos cargados: 69
[TIMING][build_embeddings] total=11.265s
[OK] Índices listos en 12.20s. Docs: 25 | LLM=ON

ID: NOMBRE_A211
PREGUNTA: ¿Cómo se llama el laboratorio A211?
GOLD: Laboratorio Integrado de Manufactura.
PRED : Laboratorio Integrado de Manufactura.
ROUGE-1 precision: 1.0000
ROUGE-1 F1       : 1.0000
Faithfulness     : 1.0000
----------------------------------------------
PROMEDIO ROUGE-1 PRECISION HASTA AHORA: 1.0000
PROMEDIO ROUGE-1 F1 HASTA AHORA       : 1.0000
PROMEDIO FAITHFULNESS HASTA AHORA     : 1.0000

ID: NOMBRE_A212
PREGUNTA: ¿Cómo se llama el laboratorio A212?
GOLD: Laboratorio de Energia y Fluidos.
PRED : Laboratorio de Energia y Fluidos.
ROUGE-1 precision: 1.0000
ROUGE-1 F1       : 1.0000
Faithfulness     : 1.0000
----------------------------------------------
PROMEDIO ROUGE-1 PRECISION HASTA AHORA: 1.0000
PROMEDIO ROUGE-1 F1 HASTA AHORA       : 1.0000
PROMEDIO FAITHFULNESS HASTA AHORA     : 1.0000

ID: NOMBRE_A221
PREGUNTA: ¿Cómo se llama el laboratorio A221?


In [2]:
import gradio as gr

def preguntar_rag(pregunta: str) -> str:
    if not pregunta.strip():
        return "Por favor escribe una pregunta"
    if engine is None:
        return "El motor RAG todavía no está inicializado. Ejecuta primero el bloque 1 (CLI) y sal con :exit."
    respuesta = engine.answer(pregunta, show_context=False)
    return respuesta

titulo = "Asistente de Localización PTIC2 – UNL"
descripcion = (
    "Escribe una pregunta sobre los espacios del edificio 2 "
    "de la UNL (laboratorios, salas, baños, etc.)."
)

demo = gr.Interface(
    fn=preguntar_rag,
    inputs=gr.Textbox(lines=3, label="Pregunta"),
    outputs=gr.Textbox(lines=8, label="Respuesta"),
    title=titulo,
    description=descripcion,
)

demo.launch(server_name="0.0.0.0", server_port=7000)


* Running on local URL:  http://0.0.0.0:7000
* To create a public link, set `share=True` in `launch()`.




[TIMING][bm25] elapsed=0.003s returned=25
[TIMING][emb] elapsed=12.444s returned=25
[POOL] query='Hola' pins=[] pool_len=25 docs_total=25
[TIMING][retrieve] elapsed=12.488s
[DEBUG][facets] {}
[TIMING][build_context] elapsed=0.001s included=5 chars=1879
[TIMING][generate] elapsed=63.482s
[TIMING][total] 83.014s
[TIMING][bm25] elapsed=0.002s returned=25
[TIMING][emb] elapsed=11.818s returned=25
[POOL] query='Hola, en donde esta computacion aplicada?' pins=[12] pool_len=25 docs_total=25
[TIMING][retrieve] elapsed=11.999s
[DEBUG][facets] {'carrera': ['Computación']}
[TIMING][build_context] elapsed=0.002s included=7 chars=2133
[TIMING][generate] elapsed=39.721s
[TIMING][total] 60.729s
Created dataset file at: .gradio\flagged\dataset1.csv
[TIMING][bm25] elapsed=0.000s returned=25
[TIMING][emb] elapsed=11.394s returned=25
[POOL] query='Hola' pins=[] pool_len=25 docs_total=25
[TIMING][retrieve] elapsed=11.406s
[DEBUG][facets] {}
[TIMING][build_context] elapsed=0.002s included=5 chars=1879
[TIM

In [5]:
#import sys
#!{sys.executable} -m pip install gradio
import socket
socket.gethostbyname(socket.gethostname())


'192.168.1.9'