In [None]:
#@title install
#@markdown Instala PYD (Pydantic), DLG (Docling), Qwen stack y **sqlite-vec==0.1.6**. Agrega **PyMuPDF** como _fallback_ para PDFs escaneados.

import sys, subprocess, os
os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")

pkgs = [
    "pydantic>=2.6",
    "docling",
    "transformers>=4.42.0",
    "accelerate>=0.31.0",
    "torch",
    "huggingface_hub>=0.23.0",
    "sqlite-vec==0.1.6",
    "scikit-learn>=1.4",
    "pymupdf>=1.24.2",  # <-- fallback robusto
]
for p in pkgs:
    print("Installing", p)
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", p])
print("✅ Instalación completada")


Installing pydantic>=2.6
Installing docling
Installing transformers>=4.42.0
Installing accelerate>=0.31.0
Installing torch
Installing huggingface_hub>=0.23.0
Installing sqlite-vec==0.1.6
Installing scikit-learn>=1.4
Installing pymupdf>=1.24.2
✅ Instalación completada


In [None]:
#@title load dependencies
#@markdown Configuración base, constantes y `meta-data.json`.
from __future__ import annotations
import os, json, math, sqlite3, hashlib
from dataclasses import dataclass
from typing import List, Tuple, Optional, Dict, Any
from pathlib import Path

import json, sqlite3

import numpy as np
from pydantic import BaseModel, Field

# Torch/Transformers
import torch
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM

WORKDIR = "/content"
os.makedirs(WORKDIR, exist_ok=True)

DB_NAME = "embeddings.db"
DB_PATH = os.path.join(WORKDIR, DB_NAME)

MODEL_EMB_REPO = "Qwen/Qwen3-Embedding-0.6B"
MODEL_RERANK_REPO = "Qwen/Qwen3-Reranker-0.6B"
MODEL_LLM_REPO = "Qwen/Qwen3-4B-Instruct-2507"

MODELS_DIR = "/content/models"
EMB_LOCAL_PATH = os.path.join(MODELS_DIR, "Qwen3-Embedding-0.6B")
RER_LOCAL_PATH = os.path.join(MODELS_DIR, "Qwen3-Reranker-0.6B")
LLM_LOCAL_PATH = os.path.join(MODELS_DIR, "Qwen/Qwen3-4B-Instruct-2507")

EMB_DIM = 1024
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

PACKAGE_INDEX = {
    "2025-05": ["05. Manual Red Comercial Mayo 2025.pdf"],
    "2025-06": ["06. Manual Red Comercial Junio 2025.pdf"],
    "2025-07": [
        "07. Manual Multiplicadores Red Comercial y Remota Julio 2025.pdf",
        "07. Manual Red Comercial y Asesor Digital Julio 2025.pdf",
    ],
}

META_PATH = os.path.join(WORKDIR, "meta-data.json")
with open(META_PATH, "w", encoding="utf-8") as f:
    json.dump({"PACKAGE_INDEX": PACKAGE_INDEX, "db_path": DB_PATH}, f, ensure_ascii=False, indent=2)

print("✅ meta-data.json:", META_PATH)
print("VDB:", DB_PATH)

# --- EasyOCR: modelos locales + prewarm silencioso ------------------------
import logging
import easyocr

# Directorio de modelos (persistente mientras viva la sesión)
EASYOCR_MODEL_DIR = os.path.join(MODELS_DIR, ".EasyOCR")
os.makedirs(EASYOCR_MODEL_DIR, exist_ok=True)

# Silenciar únicamente los WARNING de easyocr (las descargas no hablarán)
for _name in ("easyocr", "easyocr.easyocr"):
    _lg = logging.getLogger(_name)
    _lg.setLevel(logging.ERROR)
    _lg.propagate = False

def ensure_easyocr_ready(langs=("es","en"), gpu=None, do_prewarm=True):
    """
    Descarga los modelos de easyocr si faltan y verifica que se pueden cargar.
    - Idempotente: si ya están, no baja nada.
    - Silencioso: no imprime warnings de descarga.
    """
    if not do_prewarm:
        return
    if gpu is None:
        gpu = torch.cuda.is_available()
    # Esto solo descargará si faltan los archivos; en siguientes ejecuciones no hace nada.
    _ = easyocr.Reader(
        list(langs),
        gpu=gpu,
        model_storage_directory=EASYOCR_MODEL_DIR,
        download_enabled=True,
        verbose=False
    )

# Permite saltarse el prewarm exportando EASYOCR_PREWARM=0
_do_prewarm = os.environ.get("EASYOCR_PREWARM", "1") != "0"
ensure_easyocr_ready(do_prewarm=_do_prewarm)
# --------------------------------------------------------------------------


✅ meta-data.json: /content/meta-data.json
VDB: /content/embeddings.db


In [None]:
#@title Define Programs
#@markdown Define los *system prompts* y *builders* (español) para Reranker, SHORT, LARGE, Keywords y Fallback.

# ============================
# RERANKER (yes/no en inglés)
# ============================
RERANK_SYSTEM_ES = (
    "Eres un verificador de relevancia. Debes decidir si el Documento responde la Consulta, "
    "siguiendo la Instrucción. Responde ÚNICAMENTE con \"yes\" o \"no\" (en minúsculas, en inglés). "
    "No expliques tu decisión. Considera el Documento tal cual está (no inventes)."
)

def build_rerank_prompt_es(instruccion: str, consulta: str, documento_md: str) -> str:
    return (
        f"<|im_start|>system\n{RERANK_SYSTEM_ES}<|im_end|>\n"
        f"<|im_start|>user\n"
        f"<Instrucción>: {instruccion}\n"
        f"<Consulta>: {consulta}\n"
        f"<Documento>: {documento_md}\n"
        f"<|im_end|>\n"
        f"<|im_start|>assistant\n"
    )

# ===================================
# LLM SHORT (respuesta ejecutiva ES)
# ===================================
LLM_SYSTEM_SHORT_ES = (
    "Eres un asistente de BBVA. Responde en español, directo y verificable. "
    "Usa EXCLUSIVAMENTE los contextos proporcionados. "
    "Cita página(s) en línea con el formato: (Manual <MES> <AÑO>, p. X). "
    "Si un dato no aparece explícitamente, indica: 'No aparece explícitamente en los contextos.' "
    "No incluyas explicaciones de tu proceso ni meta-comentarios."
)

def build_short_prompt_es(consulta: str, contextos_md):
    ctx = "\n\n".join(f"[CTX-{i+1}]\n{c}" for i, c in enumerate(contextos_md))
    usuario = (
        "Responde en 1–3 frases o viñetas, sólo con información sustentada en los contextos.\n"
        "Incluye las páginas entre paréntesis junto a cada hecho clave.\n\n"
        f"Consulta:\n{consulta}\n\n"
        f"Contextos:\n{ctx}"
    )
    return (
        f"<|im_start|>system\n{LLM_SYSTEM_SHORT_ES}<|im_end|>\n"
        f"<|im_start|>user\n{usuario}<|im_end|>\n"
        f"<|im_start|>assistant\n"
    )

# ================================
# LLM LARGE (resumen + detalle ES)
# ================================
LLM_SYSTEM_LARGE_ES = (
    "Eres un asistente de BBVA. Responde en español de forma exhaustiva pero clara. "
    "Usa EXCLUSIVAMENTE los contextos proporcionados. "
    "Cita página(s) en línea en cada afirmación clave con el formato: (Manual <MES> <AÑO>, p. X). "
    "Si hay reglas por segmento/mes, diferéncialas con viñetas y cita páginas. "
    "Si un dato no aparece, indica explícitamente que no está en los contextos. "
    "No incluyas tu razonamiento ni meta-comentarios."
)

def build_large_prompt_es(consulta: str, contextos_md):
    ctx = "\n\n".join(f"[CTX-{i+1}]\n{c}" for i, c in enumerate(contextos_md))
    usuario = (
        "Estructura la respuesta así:\n"
        "1) Resumen (máx. 2 frases).\n"
        "2) Detalle en viñetas con citas de página por punto.\n"
        "3) (Opcional) Aclaraciones/Exclusiones si aplica, con cita.\n\n"
        f"Consulta:\n{consulta}\n\n"
        f"Contextos:\n{ctx}"
    )
    return (
        f"<|im_start|>system\n{LLM_SYSTEM_LARGE_ES}<|im_end|>\n"
        f"<|im_start|>user\n{usuario}<|im_end|>\n"
        f"<|im_start|>assistant\n"
    )

# =========================================
# KEYWORDS dinámicas (boost recuperación ES)
# =========================================
KEYWORDS_SYSTEM_ES = (
    "Eres un generador de palabras clave. En español y SIN texto extra, "
    "devuelve de 6 a 12 keywords separadas por comas, relevantes para buscar la respuesta en los documentos. "
    "Evita términos genéricos como '2025', 'pdf', 'manual'. Prioriza términos de negocio (producto, regla, canal, condición)."
)

def build_keywords_prompt_es(consulta: str, contextos_md):
    ctx = "\n\n".join(f"[CTX-{i+1}]\n{c}" for i, c in enumerate(contextos_md))
    usuario = (
        "Genera keywords sólo a partir de la consulta y los contextos.\n"
        "Formato: palabra1, palabra2, ...\n\n"
        f"Consulta:\n{consulta}\n\n"
        f"Contextos:\n{ctx}"
    )
    return (
        f"<|im_start|>system\n{KEYWORDS_SYSTEM_ES}<|im_end|>\n"
        f"<|im_start|>user\n{usuario}<|im_end|>\n"
        f"<|im_start|>assistant\n"
    )

# ==================
# FALLBACK seguro ES
# ==================
SAFE_FALLBACK_ES = (
    "No aparece explícitamente en los contextos proporcionados. "
    "Revisa el manual y la página específica del mes correspondiente, o ajusta la búsqueda/paginación."
)

print("✅ Prompts definidos (RERANK, SHORT, LARGE, KEYWORDS, FALLBACK).")


✅ Prompts definidos (RERANK, SHORT, LARGE, KEYWORDS, FALLBACK).


In [None]:
#@title Download Models
#@markdown Descarga los 3 modelos a **/content/models/** con salida limpia (solo prints).
import os, json, warnings, logging, contextlib
from huggingface_hub import snapshot_download

# Silenciar barras de progreso, logs y warnings
os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1")
os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
for name in ("huggingface_hub", "tqdm", "filelock", "urllib3"):
    logging.getLogger(name).setLevel(logging.CRITICAL)

os.makedirs(MODELS_DIR, exist_ok=True)

def _dl(repo_id: str, out_dir: str):
    print(f"⬇️  Descargando {repo_id} → {out_dir}")
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        with open(os.devnull, "w") as devnull, \
             contextlib.redirect_stdout(devnull), \
             contextlib.redirect_stderr(devnull):
            local_dir = snapshot_download(
                repo_id=repo_id,
                local_dir=out_dir,           # descarga directo al directorio destino
            )
    return local_dir

# Descarga si no existe (solo prints solicitados)
if not os.path.isdir(EMB_LOCAL_PATH):
    emb_dir = _dl(MODEL_EMB_REPO, EMB_LOCAL_PATH)
    print(f"   ✅ listo: {emb_dir}")
else:
    print(f"✓ ya existe: {EMB_LOCAL_PATH}")

if not os.path.isdir(RER_LOCAL_PATH):
    rer_dir = _dl(MODEL_RERANK_REPO, RER_LOCAL_PATH)
    print(f"   ✅ listo: {rer_dir}")
else:
    print(f"✓ ya existe: {RER_LOCAL_PATH}")

if not os.path.isdir(LLM_LOCAL_PATH):
    llm_dir = _dl(MODEL_LLM_REPO, LLM_LOCAL_PATH)
    print()  # línea en blanco intencional para igualar tu salida
    print(f"   ✅ listo: {llm_dir}")
else:
    print(f"✓ ya existe: {LLM_LOCAL_PATH}")

# Guarda índice de modelos locales
MODELS_INDEX = {
    "embedding": {"repo": MODEL_EMB_REPO, "path": EMB_LOCAL_PATH},
    "reranker": {"repo": MODEL_RERANK_REPO, "path": RER_LOCAL_PATH},
    "llm": {"repo": MODEL_LLM_REPO, "path": LLM_LOCAL_PATH},
}
with open(os.path.join(MODELS_DIR, "models.json"), "w") as f:
    json.dump(MODELS_INDEX, f, indent=2)

print("✅ models.json:", os.path.join(MODELS_DIR, "models.json"))


⬇️  Descargando Qwen/Qwen3-Embedding-0.6B → /content/models/Qwen3-Embedding-0.6B


.gitattributes: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/313 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/727 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/215 [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/117 [00:00<?, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.19G [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

   ✅ listo: /content/models/Qwen3-Embedding-0.6B
⬇️  Descargando Qwen/Qwen3-Reranker-0.6B → /content/models/Qwen3-Reranker-0.6B


.gitattributes: 0.00B [00:00, ?B/s]

README.md: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/727 [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/214 [00:00<?, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.19G [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

   ✅ listo: /content/models/Qwen3-Reranker-0.6B
⬇️  Descargando Qwen/Qwen3-4B-Instruct-2507 → /content/models/Qwen/Qwen3-4B-Instruct-2507


.gitattributes: 0.00B [00:00, ?B/s]

LICENSE: 0.00B [00:00, ?B/s]

README.md: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/727 [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/238 [00:00<?, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/3.96G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/3.99G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/99.6M [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]


   ✅ listo: /content/models/Qwen/Qwen3-4B-Instruct-2507
✅ models.json: /content/models/models.json


In [None]:
#@title PDF
#@markdown Extrae elementos con Docling; si no hay items (escaneado/ocr), usa **PyMuPDF fallback** por bloques.

from typing import Iterable
import fitz  # PyMuPDF

class ElementNode(BaseModel):
    id: str
    page: int
    kind: str
    md: str
    bbox: Optional[Tuple[float,float,float,float]] = None
    prev_id: Optional[str] = None
    next_id: Optional[str] = None
    source_path: str

class Chunk(BaseModel):
    chunk_id: str
    element_id: str
    page: int
    md: str
    neighbor_ids: List[str] = Field(default_factory=list)
    source_path: str

def sha1(s: str) -> str:
    import hashlib
    return hashlib.sha1(s.encode("utf-8")).hexdigest()[:10]

def resolve_package_files(package_index: Dict[str, List[str]], search_dirs: Optional[List[str]] = None) -> Dict[str, List[str]]:
    if search_dirs is None:
        search_dirs = ["/content", "/mnt/data", "/content/drive/MyDrive"]
    resolved = {}
    for month, files in package_index.items():
        resolved_paths = []
        for name in files:
            for d in search_dirs:
                p = os.path.join(d, name)
                if os.path.exists(p):
                    resolved_paths.append(p)
                    break
        resolved[month] = resolved_paths
    return resolved

def _parse_with_docling(pdf_path: str):
    try:
        from docling.document_converter import DocumentConverter
    except Exception as e:
        return [], 0
    conv = DocumentConverter()
    try:
        doc = conv.convert(pdf_path)
    except Exception:
        return [], 0
    total_pages = len(getattr(doc, "pages", []))
    nodes: List[ElementNode] = []
    node_seq = 0
    for page_idx, page in enumerate(getattr(doc, "pages", []), start=1):
        # diferentes builds pueden llamarlo items/elements/blocks
        candidates = []
        for attr in ["items", "elements", "blocks"]:
            if hasattr(page, attr):
                arr = getattr(page, attr)
                if isinstance(arr, list) and len(arr) > 0:
                    candidates = arr
                    break
        for it in candidates:
            kind = it.__class__.__name__.lower()
            md = ""
            try:
                if hasattr(it, "export_to_markdown"):
                    md = it.export_to_markdown(doc)
                elif hasattr(it, "to_markdown"):
                    md = it.to_markdown()
                elif hasattr(it, "text"):
                    md = it.text
            except Exception:
                md = getattr(it, "text", "") or ""
            if not md.strip():
                continue
            bbox = getattr(it, "bbox", None)
            node_id = f"{Path(pdf_path).name}::p{page_idx}::{node_seq}::{sha1(md)[:6]}"
            nodes.append(ElementNode(id=node_id, page=page_idx, kind=kind, md=md, bbox=bbox, source_path=pdf_path))
            node_seq += 1
    # enlazar
    by_page: Dict[int, List[ElementNode]] = {}
    for n in nodes:
        by_page.setdefault(n.page, []).append(n)
    for page_nodes in by_page.values():
        for i, n in enumerate(page_nodes):
            n.prev_id = page_nodes[i-1].id if i > 0 else None
            n.next_id = page_nodes[i+1].id if i < len(page_nodes)-1 else None
    return nodes, total_pages

def _parse_with_pymupdf(pdf_path: str):
    # fallback por bloques de texto
    doc = fitz.open(pdf_path)
    total_pages = len(doc)
    nodes: List[ElementNode] = []
    node_seq = 0
    for page_idx in range(total_pages):
        page = doc[page_idx]
        blocks = page.get_text("blocks")  # [(x0,y0,x1,y1,text,block_no, ...)]
        for bi, b in enumerate(blocks):
            if len(b) < 5:
                continue
            x0, y0, x1, y1, text = b[:5]
            text = (text or "").strip()
            if not text:
                continue
            kind = "paragraph"
            node_id = f"{Path(pdf_path).name}::p{page_idx+1}::{node_seq}::{sha1(text)[:6]}"
            md = text
            nodes.append(ElementNode(id=node_id, page=page_idx+1, kind=kind, md=md, bbox=(x0,y0,x1,y1), source_path=pdf_path))
            node_seq += 1
    # enlazar por página
    by_page: Dict[int, List[ElementNode]] = {}
    for n in nodes:
        by_page.setdefault(n.page, []).append(n)
    for page_nodes in by_page.values():
        for i, n in enumerate(page_nodes):
            n.prev_id = page_nodes[i-1].id if i > 0 else None
            n.next_id = page_nodes[i+1].id if i < len(page_nodes)-1 else None
    return nodes, total_pages

def parse_pdf_to_nodes(pdf_path: str):
    nodes, total_pages = _parse_with_docling(pdf_path)
    if len(nodes) == 0:
        # muchos manuales BBVA son escaneados → usar fallback
        nodes, total_pages = _parse_with_pymupdf(pdf_path)
    return nodes, total_pages

RESOLVED_INDEX = resolve_package_files(PACKAGE_INDEX)
print("PDFs resueltos por mes:")
for k,v in RESOLVED_INDEX.items():
    print(" ", k, "=>", [Path(x).name for x in v])


PDFs resueltos por mes:
  2025-05 => []
  2025-06 => []
  2025-07 => ['07. Manual Multiplicadores Red Comercial y Remota Julio 2025.pdf', '07. Manual Red Comercial y Asesor Digital Julio 2025.pdf']


In [None]:
#@title TEXT&LAYOUT
#@markdown Construye **NODO + K_VECINOS** y genera `manuales.md`. Ahora muestra páginas, nodos, chunks y k-vecindades.

k_neighbors = 3 #@param {type:"slider", "label":"k_neighbors", "min":0, "max":3, "step":1}

def build_node_k_neighbors(nodes: List[ElementNode], k: int) -> List[Chunk]:
    """
    Construye chunks por nodo e incluye hasta k vecinos por página.
    IMPORTANTE: guardamos neighbor_ids como CHUNK IDs (no element IDs),
    es decir: f"{neighbor_element.id}::k{k}" para que expand_pagination pueda resolverlos.
    """
    chunks: List[Chunk] = []
    by_page: Dict[int, List[ElementNode]] = {}
    for n in nodes:
        by_page.setdefault(n.page, []).append(n)

    for page, arr in by_page.items():
        for i, n in enumerate(arr):
            left = max(0, i - k)
            right = min(len(arr), i + k + 1)
            neigh = [x for j, x in enumerate(arr[left:right]) if j + left != i]

            md_parts = [f"<!-- NODE {n.id} ({n.kind}) -->\n{n.md}"]
            for nb in neigh:
                md_parts.append(f"\n<!-- NEIGHBOR {nb.id} ({nb.kind}) -->\n{nb.md}")

            chunk_id = f"{n.id}::k{k}"
            neighbor_chunk_ids = [f"{nb.id}::k{k}" for nb in neigh]

            chunks.append(Chunk(
                chunk_id=chunk_id,
                element_id=n.id,
                page=page,
                md="\n".join(md_parts),
                neighbor_ids=neighbor_chunk_ids,  # <--- ahora son chunk_ids
                source_path=n.source_path
            ))
    return chunks

ALL_NODES: List[ElementNode] = []
ALL_CHUNKS: List[Chunk] = []
TOTAL_PAGES = 0
TOTAL_NEIGHBOR_LINKS = 0

MANUALES_MD_PATH = os.path.join(WORKDIR, "manuales.md")
with open(MANUALES_MD_PATH, "w", encoding="utf-8") as fmd:
    for month, paths in RESOLVED_INDEX.items():
        for p in paths:
            try:
                nodes, n_pages = parse_pdf_to_nodes(p)
                chunks = build_node_k_neighbors(nodes, k_neighbors)
                ALL_NODES.extend(nodes); ALL_CHUNKS.extend(chunks)
                TOTAL_PAGES += n_pages
                TOTAL_NEIGHBOR_LINKS += sum(len(c.neighbor_ids) for c in chunks)

                # Escribe markdown por página para inspección humana
                fmd.write(f"\n# Manual: {Path(p).name}\n")
                by_page: Dict[int, List[ElementNode]] = {}
                for n in nodes:
                    by_page.setdefault(n.page, []).append(n)
                for pg in sorted(by_page.keys()):
                    fmd.write(f"\n## Página {pg}\n")
                    for n in by_page[pg]:
                        # recorta bloques muy largos para visual rápida (opcional)
                        fmd.write(f"\n<!-- {n.kind} {n.id} -->\n{n.md}\n")

                print(f"OK: {Path(p).name} -> {n_pages} pages / {len(nodes)} nodes / {len(chunks)} chunks")
            except Exception as e:
                print(f"ERROR procesando {p}: {e}")

print(f"Total páginas: {TOTAL_PAGES} | Total nodes: {len(ALL_NODES)} | Total chunks: {len(ALL_CHUNKS)} | Total k-vecindades: {TOTAL_NEIGHBOR_LINKS}")
print("📝 Se generó manuales.md en:", MANUALES_MD_PATH)


OK: 07. Manual Multiplicadores Red Comercial y Remota Julio 2025.pdf -> 32 pages / 603 nodes / 603 chunks
OK: 07. Manual Red Comercial y Asesor Digital Julio 2025.pdf -> 116 pages / 1970 nodes / 1970 chunks
Total páginas: 148 | Total nodes: 2573 | Total chunks: 2573 | Total k-vecindades: 4852
📝 Se generó manuales.md en: /content/manuales.md


In [None]:
#@title EMBEDDING
#@markdown Carga **Qwen/Qwen3-Embedding-0.6B** desde `/content/models` y genera embeddings.

def load_embedder():
    def _select_dtype():
        if torch.cuda.is_available():
            return torch.float16
        # En Macs con MPS también va bien float16
        try:
            if torch.backends.mps.is_available():
                return torch.float16
        except Exception:
            pass
        return torch.float32  # CPU seguro

    tok = AutoTokenizer.from_pretrained(EMB_LOCAL_PATH, trust_remote_code=True)
    mdl = AutoModel.from_pretrained(
        EMB_LOCAL_PATH,
        trust_remote_code=True,
        torch_dtype=_select_dtype(),
        device_map="auto"
    )
    return tok, mdl


def _last_token_index(attn_mask: torch.Tensor) -> torch.Tensor:
    return attn_mask.sum(dim=1) - 1

def _mean_pool(last_hidden_state: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
    # mean pooling con máscara (evita promediar padding)
    mask = attention_mask.unsqueeze(-1).type_as(last_hidden_state)
    summed = (last_hidden_state * mask).sum(dim=1)
    counts = mask.sum(dim=1).clamp(min=1e-9)
    return torch.nn.functional.normalize(summed / counts, dim=1)

@torch.inference_mode()
def embed_texts(texts: List[str], tok, mdl, max_length: int = 4096) -> np.ndarray:
    if not texts:
        return np.zeros((0, EMB_DIM), dtype="float32")
    batch = tok(texts, padding=True, truncation=True, max_length=max_length, return_tensors="pt")
    batch = {k: v.to(mdl.device) for k, v in batch.items()}
    out = mdl(**batch)
    last = out.last_hidden_state  # [B, T, H]
    vecs = _mean_pool(last, batch["attention_mask"])  # [B, H]
    return vecs.float().cpu().numpy()



# Prepara textos
CHUNK_TEXTS = [c.md for c in ALL_CHUNKS]
print("Chunks a embeder:", len(CHUNK_TEXTS))

_tok_emb, _mdl_emb = load_embedder()
print("✅ Embedder cargado en:", _mdl_emb.device)


Chunks a embeder: 2573


`torch_dtype` is deprecated! Use `dtype` instead!


✅ Embedder cargado en: cuda:0


In [None]:
#@title VECTOR DATABASE
#@markdown Crea/abre **sqlite-vec==0.1.6** y persiste en `embeddings.db`.

import sqlite3, json

def init_sqlite_vec(db_path: str, dim: int = 1024) -> sqlite3.Connection:
    import sqlite_vec  # carga la extensión empaquetada
    conn = sqlite3.connect(db_path)
    sqlite_vec.load(conn)  # habilita vec0
    conn.execute(f"""
        CREATE VIRTUAL TABLE IF NOT EXISTS v_chunks
        USING vec0(embedding FLOAT[{dim}]);
    """)
    conn.execute("""
        CREATE TABLE IF NOT EXISTS chunks_meta (
            rowid INTEGER PRIMARY KEY,
            chunk_id TEXT UNIQUE,
            element_id TEXT,
            page INTEGER,
            md TEXT,
            neighbors TEXT,
            source_path TEXT
        );
    """)
    return conn

def _pack_f32(arr: np.ndarray) -> bytes:
    return arr.astype("float32").tobytes()

def upsert_embeddings(conn: sqlite3.Connection, chunks: List[Chunk], embs: np.ndarray):
    assert len(chunks) == len(embs)
    cur = conn.cursor()
    for i, ch in enumerate(chunks):
        blob = _pack_f32(embs[i])
        # Inserta metadatos si no existen
        cur.execute("""
            INSERT OR IGNORE INTO chunks_meta(chunk_id, element_id, page, md, neighbors, source_path)
            VALUES (?, ?, ?, ?, ?, ?);
        """, (ch.chunk_id, ch.element_id, ch.page, ch.md, json.dumps(ch.neighbor_ids), ch.source_path))
        # Asocia vector a misma rowid
        cur.execute("""
            INSERT OR REPLACE INTO v_chunks(rowid, embedding)
            VALUES ((SELECT rowid FROM chunks_meta WHERE chunk_id=?), ?);
        """, (ch.chunk_id, blob))
    conn.commit()

def search_vec(conn: sqlite3.Connection, query_vec: np.ndarray, topN: int = 10) -> list[tuple[int, float]]:
    blob = _pack_f32(query_vec.reshape(-1))
    try:
        # Camino preferido en sqlite-vec 0.1.6: usar 'k = ?' en el WHERE
        rows = conn.execute("""
            SELECT rowid, distance
            FROM v_chunks
            WHERE embedding MATCH ? AND k = ?
            ORDER BY distance ASC;
        """, (blob, int(topN))).fetchall()
    except sqlite3.OperationalError:
        # Fallback para builds que requieren LIMIT literal (no parametrizado)
        rows = conn.execute(f"""
            SELECT rowid, distance
            FROM v_chunks
            WHERE embedding MATCH ?
            ORDER BY distance ASC
            LIMIT {int(topN)};
        """, (blob,)).fetchall()
    return rows

def fetch_meta(conn: sqlite3.Connection, rowids: List[int]) -> List[Dict[str, Any]]:
    if not rowids:
        return []
    q = f"SELECT rowid, chunk_id, element_id, page, md, neighbors, source_path FROM chunks_meta WHERE rowid IN ({','.join('?'*len(rowids))})"
    rows = conn.execute(q, rowids).fetchall()
    cols = ["rowid", "chunk_id", "element_id", "page", "md", "neighbors", "source_path"]
    return [dict(zip(cols, r)) for r in rows]

# ==== Build VDB ====
conn = init_sqlite_vec(DB_PATH, dim=EMB_DIM)

# Embeddings en lotes
BATCH = 16
vecs_parts = []
for i in range(0, len(CHUNK_TEXTS), BATCH):
    vecs_parts.append(embed_texts(CHUNK_TEXTS[i:i+BATCH], _tok_emb, _mdl_emb))
EMBEDS = np.vstack(vecs_parts) if vecs_parts else np.zeros((0, EMB_DIM), dtype="float32")

upsert_embeddings(conn, ALL_CHUNKS, EMBEDS)
print(f"✅ VDB listo: {DB_PATH} | {len(ALL_CHUNKS)} chunks indexados")


✅ VDB listo: /content/embeddings.db | 2573 chunks indexados


In [None]:
#@title RANKER
#@markdown Carga **Qwen/Qwen3-Reranker-0.6B** desde `/content/models` y calcula *scores* yes/no.

def load_reranker():
    def _select_dtype():
        if torch.cuda.is_available():
            return torch.float16
        try:
            if torch.backends.mps.is_available():
                return torch.float16
        except Exception:
            pass
        return torch.float32

    tok = AutoTokenizer.from_pretrained(RER_LOCAL_PATH, trust_remote_code=True)
    mdl = AutoModelForCausalLM.from_pretrained(
        RER_LOCAL_PATH,
        trust_remote_code=True,
        torch_dtype=_select_dtype(),
        device_map="auto"
    )
    yn_ids = tok([" yes", " no"], add_special_tokens=False).input_ids
    yes_id = yn_ids[0][0]; no_id = yn_ids[1][0]
    return tok, mdl, yes_id, no_id


def _rerank_prompt(instruction: str, query: str, document: str) -> str:
    # Mantén la plantilla ES pero salida yes/no en inglés
    return build_rerank_prompt_es(instruction, query, document)

@torch.inference_mode()
def rerank(query: str, candidates_md: List[str], tok, mdl, yes_id: int, no_id: int, instruction: str = "Evalúa si el documento contiene evidencia explícita y suficiente para responder la consulta con precisión."):
    import math
    scores = []
    for md in candidates_md:
        prompt = _rerank_prompt(instruction, query, md)
        batch = tok(prompt, return_tensors="pt").to(mdl.device)
        out = mdl(**batch)
        logits = out.logits[:, -1, :]
        yes = logits[0, yes_id].item(); no = logits[0, no_id].item()
        p_yes = math.exp(yes) / (math.exp(yes) + math.exp(no) + 1e-8)
        scores.append(p_yes)
    return scores

_tok_rer, _mdl_rer, _yes_id, _no_id = load_reranker()
print("✅ Reranker cargado en:", _mdl_rer.device)


✅ Reranker cargado en: cuda:0


In [None]:
#@title LANGUAGE
#@markdown Carga **Qwen/Qwen3-4B** desde `/content/models` para generar respuestas informadas.

def load_llm():
    def _select_dtype():
        if torch.cuda.is_available():
            # si tu GPU soporta bfloat16 puedes cambiar a torch.bfloat16
            return torch.float16
        try:
            if torch.backends.mps.is_available():
                return torch.float16
        except Exception:
            pass
        return torch.float32

    tok = AutoTokenizer.from_pretrained(LLM_LOCAL_PATH, trust_remote_code=True)
    mdl = AutoModelForCausalLM.from_pretrained(
        LLM_LOCAL_PATH,
        trust_remote_code=True,
        torch_dtype=_select_dtype(),
        device_map="auto"
    )
    return tok, mdl


@torch.inference_mode()
def generate_answer_short(query: str, contexts_md, tok, mdl, max_new_tokens=320) -> str:
    prompt = build_short_prompt_es(query, contexts_md)
    batch = tok(prompt, return_tensors="pt").to(mdl.device)
    out = mdl.generate(**batch, max_new_tokens=max_new_tokens, do_sample=False)
    return tok.decode(out[0][batch['input_ids'].shape[1]:], skip_special_tokens=True).strip()

@torch.inference_mode()
def generate_answer_large(query: str, contexts_md, tok, mdl, max_new_tokens=640) -> str:
    prompt = build_large_prompt_es(query, contexts_md)
    batch = tok(prompt, return_tensors="pt").to(mdl.device)
    out = mdl.generate(**batch, max_new_tokens=max_new_tokens, do_sample=False)
    return tok.decode(out[0][batch['input_ids'].shape[1]:], skip_special_tokens=True).strip()

_tok_llm, _mdl_llm = load_llm()
print("✅ LLM cargado en:", _mdl_llm.device)


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

✅ LLM cargado en: cuda:0


In [None]:
#@title CHAT
#@markdown Genera SIEMPRE ambas salidas (SHORT y LARGE) y las muestra con **rich** en el formato solicitado.

# Parámetros del formulario (la asignación debe ir ANTES del #@param)
query = "¿Cuándo cuenta Portabilidad para Banquero en julio y qué condición aplica a Open Market?"  #@param {type:"string", "label":"query", "placeholder":"Escribe tu pregunta BBVA..."}
pagination_neighbors = 1  #@param {type:"slider", "label":"pagination_neighbors", "min":0, "max":2, "step":1}
top_k = 5  #@param {type:"slider", "label":"top_k (LARGE)", "min":3, "max":8, "step":1}

# ===== Rich (instalar on-the-fly si no está) =====
try:
    from rich.console import Console
    from rich.panel import Panel
    from rich.markdown import Markdown
    from rich.table import Table
    from rich.text import Text
except Exception:
    import sys, subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "rich"])
    from rich.console import Console
    from rich.panel import Panel
    from rich.markdown import Markdown
    from rich.table import Table
    from rich.text import Text

console = Console(width=120)

# ===== Helpers (apoyan al pipeline ya definido) =====
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np, json, sqlite3
from pathlib import Path

def _format_query_for_embedding(q: str) -> str:
    # Si definiste format_query en EMBEDDING (instruct-aware), úsalo.
    try:
        return format_query(q)  # opcional
    except NameError:
        return q

def embed_query(text: str) -> np.ndarray:
    return embed_texts([_format_query_for_embedding(text)], _tok_emb, _mdl_emb)[0]

def retrieve(conn, query: str, topN: int = 10):
    q_emb = embed_query(query)
    nn = search_vec(conn, q_emb, topN)  # search_vec ya debe usar 'k = ?' internamente
    metas = fetch_meta(conn, [r[0] for r in nn])
    return metas, nn

def describe_chunks(chunks_list):
    """Tabla rich de chunk_id / page / source."""
    table = Table(show_header=True, header_style="bold")
    table.add_column("Rank/Item", justify="right", style="cyan", no_wrap=True)
    table.add_column("chunk_id", overflow="fold")
    table.add_column("page", justify="right")
    table.add_column("source")
    for i, m in enumerate(chunks_list, 1):
        table.add_row(str(i), m["chunk_id"], str(m["page"]), Path(m["source_path"]).name)
    return table

def snippets_for(chunks_list, limit=420):
    out = []
    for m in chunks_list:
        md = (m.get("md") or "").strip()
        out.append(md[:limit])
    return out

def fetch_by_chunk_ids(conn: sqlite3.Connection, chunk_ids: list[str]) -> list[dict]:
    """Trae metadatos por chunk_id manteniendo el orden de entrada."""
    if not chunk_ids:
        return []
    q = f"""
    SELECT rowid, chunk_id, element_id, page, md, neighbors, source_path
    FROM chunks_meta
    WHERE chunk_id IN ({','.join('?'*len(chunk_ids))})
    """
    rows = conn.execute(q, chunk_ids).fetchall()
    cols = ["rowid", "chunk_id", "element_id", "page", "md", "neighbors", "source_path"]
    mapping = {r[1]: dict(zip(cols, r)) for r in rows}
    return [mapping[cid] for cid in chunk_ids if cid in mapping]

def expand_pagination(conn: sqlite3.Connection, meta: dict, window: int = 1) -> list[dict]:
    """
    Usa neighbor_ids (que AHORA son chunk_ids) para traer los vecinos.
    """
    try:
        neighbor_chunk_ids = json.loads(meta.get("neighbors", "[]"))
    except Exception:
        neighbor_chunk_ids = []
    neighbor_chunk_ids = neighbor_chunk_ids[: max(0, int(window))]
    return [meta] + fetch_by_chunk_ids(conn, neighbor_chunk_ids)

# ===== Pipeline de consulta =====
# Recupera candidatos iniciales
metas, _ = retrieve(conn, query, topN=max(10, top_k * 2))

if not metas:
    console.rule("[bold red]RAG BBVA[/bold red]")
    console.print("USER QUERY", style="bold")
    console.print(Panel.fit(Text(query), border_style="blue"))
    console.print("SYSTEM RESPONSE SHORT: RESPONSE ONLY (CONSTRUCCIÓN = QUERY + TOP-1 + PAGINATION TOP-1)", style="bold")
    console.print("No hay candidatos.\n")
    console.print("RESPONSE PAGINATION (TOP-1)\n-")
    console.print("SYSTEM RESPONSE LARGE: RESPONSE ONLY (CONSTRUCCIÓN = QUERY + ANÁLISIS TOP-K + PAGINATION TOP-K)", style="bold")
    console.print("No hay candidatos.\n")
    console.print("PAGINATION TOP-K\n-")
    console.print("MINI-DOCUMENT-KNOWLEDGE-BASE (MINIDKB):")
    console.print("TOP-K DEL RERANKER (ordenado tras re-ranking)\n-")
    console.print("Dynamic Keywords (boost)\n-")
    console.print("SNIPPETS (preview)\n-")
else:
    # Rerank point-wise yes/no
    scores = rerank(query, [m['md'] for m in metas], _tok_rer, _mdl_rer, _yes_id, _no_id)
    order = np.argsort(scores)[::-1]

    # === SHORT: Top-1 + paginación de su vecindario ===
    top1 = metas[order[0]]
    expanded_top1 = expand_pagination(conn, top1, window=pagination_neighbors)
    contexts_short = [m['md'] for m in expanded_top1]
    resp_short = generate_answer_short(query, contexts_short, _tok_llm, _mdl_llm, max_new_tokens=320)

    # === LARGE: Top-K + paginación de cada uno ===
    topk_idx = order[:top_k]
    topk = [metas[i] for i in topk_idx]
    expanded_sets = []
    contexts_large = []
    for m in topk:
        expanded = expand_pagination(conn, m, window=pagination_neighbors)
        expanded_sets.append(expanded)
        contexts_large.extend([x['md'] for x in expanded])
    resp_large = generate_answer_large(query, contexts_large, _tok_llm, _mdl_llm, max_new_tokens=640)

    # === MINI-DKB: keywords + snippets (sobre Top-K principales, no expandidos) ===
    try:
        vec = TfidfVectorizer(ngram_range=(1,2), max_features=24)
        _ = vec.fit_transform([m['md'] for m in topk])
        dyn_keywords = sorted(vec.vocabulary_.keys())[:12]
    except Exception:
        dyn_keywords = []
    snips = snippets_for(topk, limit=420)

    # ======== SALIDA FORMATEADA ========
    console.rule("[bold blue]RAG BBVA[/bold blue]")

    # 1) USER QUERY
    console.print("USER QUERY", style="bold")
    console.print(Panel.fit(Text(query), border_style="blue"))

    # 2) SYSTEM RESPONSE SHORT
    console.print("SYSTEM RESPONSE SHORT: RESPONSE ONLY (CONSTRUCCIÓN = QUERY + TOP-1 + PAGINATION TOP-1)", style="bold")
    console.print(Panel(Markdown(resp_short), title="SHORT", border_style="green"))

    # 3) RESPONSE PAGINATION (TOP-1)
    console.print("RESPONSE PAGINATION (TOP-1)", style="bold")
    console.print(describe_chunks(expanded_top1))

    # 4) SYSTEM RESPONSE LARGE
    console.print("SYSTEM RESPONSE LARGE: RESPONSE ONLY (CONSTRUCCIÓN = QUERY + ANÁLISIS TOP-K + PAGINATION TOP-K)", style="bold")
    console.print(Panel(Markdown(resp_large), title="LARGE", border_style="magenta"))

    # 5) PAGINATION TOP-K
    console.print("PAGINATION TOP-K", style="bold")
    for i, ex in enumerate(expanded_sets, 1):
        console.print(Panel(describe_chunks(ex), title=f"Top-{i} vecindario", border_style="cyan"))

    # 6) MINI-DOCUMENT-KNOWLEDGE-BASE (MINIDKB)
    console.print("MINI-DOCUMENT-KNOWLEDGE-BASE (MINIDKB):", style="bold")

    # 6.1) TOP-K DEL RERANKER
    console.print("TOP-K DEL RERANKER (ordenado tras re-ranking)", style="bold")
    console.print(describe_chunks(topk))

    # 6.2) Dynamic Keywords (boost)
    console.print("Dynamic Keywords (boost)", style="bold")
    if dyn_keywords:
        kw_table = Table(show_header=False)
        for k in dyn_keywords:
            kw_table.add_row(k)
        console.print(kw_table)
    else:
        console.print("-")

    # 6.3) SNIPPETS (preview)
    console.print("SNIPPETS (preview)", style="bold")
    for i, sn in enumerate(snips, 1):
        console.print(Panel.fit(Markdown(sn if sn.strip() else "-"), title=f"Snippet {i}", border_style="yellow"))


The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


In [None]:
#@title CHAT
#@markdown Genera SIEMPRE ambas salidas (SHORT y LARGE) y las muestra con **rich** en el formato solicitado.

# Parámetros del formulario (la asignación debe ir ANTES del #@param)
query = "¿Cuándo cuenta Portabilidad para Banquero en julio y qué condición aplica a Open Market?"  #@param {type:"string", "label":"query", "placeholder":"Escribe tu pregunta BBVA..."}
pagination_neighbors = 1  #@param {type:"slider", "label":"pagination_neighbors", "min":0, "max":2, "step":1}
top_k = 5  #@param {type:"slider", "label":"top_k (LARGE)", "min":3, "max":8, "step":1}

# ===== Rich (instalar on-the-fly si no está) =====
try:
    from rich.console import Console
    from rich.panel import Panel
    from rich.markdown import Markdown
    from rich.table import Table
    from rich.text import Text
except Exception:
    import sys, subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "rich"])
    from rich.console import Console
    from rich.panel import Panel
    from rich.markdown import Markdown
    from rich.table import Table
    from rich.text import Text

console = Console(width=120)

# ===== Helpers (apoyan al pipeline ya definido) =====
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np, json, sqlite3
from pathlib import Path

def _format_query_for_embedding(q: str) -> str:
    # Si definiste format_query en EMBEDDING (instruct-aware), úsalo.
    try:
        return format_query(q)  # opcional
    except NameError:
        return q

def embed_query(text: str) -> np.ndarray:
    return embed_texts([_format_query_for_embedding(text)], _tok_emb, _mdl_emb)[0]

def retrieve(conn, query: str, topN: int = 10):
    q_emb = embed_query(query)
    nn = search_vec(conn, q_emb, topN)  # search_vec ya debe usar 'k = ?' internamente
    metas = fetch_meta(conn, [r[0] for r in nn])
    return metas, nn

def describe_chunks(chunks_list):
    """Tabla rich de chunk_id / page / source."""
    table = Table(show_header=True, header_style="bold")
    table.add_column("Rank/Item", justify="right", style="cyan", no_wrap=True)
    table.add_column("chunk_id", overflow="fold")
    table.add_column("page", justify="right")
    table.add_column("source")
    for i, m in enumerate(chunks_list, 1):
        table.add_row(str(i), m["chunk_id"], str(m["page"]), Path(m["source_path"]).name)
    return table

def snippets_for(chunks_list, limit=420):
    out = []
    for m in chunks_list:
        md = (m.get("md") or "").strip()
        out.append(md[:limit])
    return out

def fetch_by_chunk_ids(conn: sqlite3.Connection, chunk_ids: list[str]) -> list[dict]:
    """Trae metadatos por chunk_id manteniendo el orden de entrada."""
    if not chunk_ids:
        return []
    q = f"""
    SELECT rowid, chunk_id, element_id, page, md, neighbors, source_path
    FROM chunks_meta
    WHERE chunk_id IN ({','.join('?'*len(chunk_ids))})
    """
    rows = conn.execute(q, chunk_ids).fetchall()
    cols = ["rowid", "chunk_id", "element_id", "page", "md", "neighbors", "source_path"]
    mapping = {r[1]: dict(zip(cols, r)) for r in rows}
    return [mapping[cid] for cid in chunk_ids if cid in mapping]

def expand_pagination(conn: sqlite3.Connection, meta: dict, window: int = 1) -> list[dict]:
    """
    Usa neighbor_ids (que AHORA son chunk_ids) para traer los vecinos.
    """
    try:
        neighbor_chunk_ids = json.loads(meta.get("neighbors", "[]"))
    except Exception:
        neighbor_chunk_ids = []
    neighbor_chunk_ids = neighbor_chunk_ids[: max(0, int(window))]
    return [meta] + fetch_by_chunk_ids(conn, neighbor_chunk_ids)

# ===== Pipeline de consulta =====
# Recupera candidatos iniciales
metas, _ = retrieve(conn, query, topN=max(10, top_k * 2))

if not metas:
    console.rule("[bold red]RAG BBVA[/bold red]")
    console.print("USER QUERY", style="bold")
    console.print(Panel.fit(Text(query), border_style="blue"))
    console.print("SYSTEM RESPONSE SHORT: RESPONSE ONLY (CONSTRUCCIÓN = QUERY + TOP-1 + PAGINATION TOP-1)", style="bold")
    console.print("No hay candidatos.\n")
    console.print("RESPONSE PAGINATION (TOP-1)\n-")
    console.print("SYSTEM RESPONSE LARGE: RESPONSE ONLY (CONSTRUCCIÓN = QUERY + ANÁLISIS TOP-K + PAGINATION TOP-K)", style="bold")
    console.print("No hay candidatos.\n")
    console.print("PAGINATION TOP-K\n-")
    console.print("MINI-DOCUMENT-KNOWLEDGE-BASE (MINIDKB):")
    console.print("TOP-K DEL RERANKER (ordenado tras re-ranking)\n-")
    console.print("Dynamic Keywords (boost)\n-")
    console.print("SNIPPETS (preview)\n-")
else:
    # Rerank point-wise yes/no
    scores = rerank(query, [m['md'] for m in metas], _tok_rer, _mdl_rer, _yes_id, _no_id)
    order = np.argsort(scores)[::-1]

    # === SHORT: Top-1 + paginación de su vecindario ===
    top1 = metas[order[0]]
    expanded_top1 = expand_pagination(conn, top1, window=pagination_neighbors)
    contexts_short = [m['md'] for m in expanded_top1]
    resp_short = generate_answer_short(query, contexts_short, _tok_llm, _mdl_llm, max_new_tokens=320)

    # === LARGE: Top-K + paginación de cada uno ===
    topk_idx = order[:top_k]
    topk = [metas[i] for i in topk_idx]
    expanded_sets = []
    contexts_large = []
    for m in topk:
        expanded = expand_pagination(conn, m, window=pagination_neighbors)
        expanded_sets.append(expanded)
        contexts_large.extend([x['md'] for x in expanded])
    resp_large = generate_answer_large(query, contexts_large, _tok_llm, _mdl_llm, max_new_tokens=640)

    # === MINI-DKB: keywords + snippets (sobre Top-K principales, no expandidos) ===
    try:
        vec = TfidfVectorizer(ngram_range=(1,2), max_features=24)
        _ = vec.fit_transform([m['md'] for m in topk])
        dyn_keywords = sorted(vec.vocabulary_.keys())[:12]
    except Exception:
        dyn_keywords = []
    snips = snippets_for(topk, limit=420)

    # ======== SALIDA FORMATEADA ========
    console.rule("[bold blue]RAG BBVA[/bold blue]")

    # 1) USER QUERY
    console.print("USER QUERY", style="bold")
    console.print(Panel.fit(Text(query), border_style="blue"))

    # 2) SYSTEM RESPONSE SHORT
    console.print("SYSTEM RESPONSE SHORT: RESPONSE ONLY (CONSTRUCCIÓN = QUERY + TOP-1 + PAGINATION TOP-1)", style="bold")
    console.print(Panel(Markdown(resp_short), title="SHORT", border_style="green"))

    # 3) RESPONSE PAGINATION (TOP-1)
    console.print("RESPONSE PAGINATION (TOP-1)", style="bold")
    console.print(describe_chunks(expanded_top1))

    # 4) SYSTEM RESPONSE LARGE
    console.print("SYSTEM RESPONSE LARGE: RESPONSE ONLY (CONSTRUCCIÓN = QUERY + ANÁLISIS TOP-K + PAGINATION TOP-K)", style="bold")
    console.print(Panel(Markdown(resp_large), title="LARGE", border_style="magenta"))

    # 5) PAGINATION TOP-K
    console.print("PAGINATION TOP-K", style="bold")
    for i, ex in enumerate(expanded_sets, 1):
        console.print(Panel(describe_chunks(ex), title=f"Top-{i} vecindario", border_style="cyan"))

    # 6) MINI-DOCUMENT-KNOWLEDGE-BASE (MINIDKB)
    console.print("MINI-DOCUMENT-KNOWLEDGE-BASE (MINIDKB):", style="bold")

    # 6.1) TOP-K DEL RERANKER
    console.print("TOP-K DEL RERANKER (ordenado tras re-ranking)", style="bold")
    console.print(describe_chunks(topk))

    # 6.2) Dynamic Keywords (boost)
    console.print("Dynamic Keywords (boost)", style="bold")
    if dyn_keywords:
        kw_table = Table(show_header=False)
        for k in dyn_keywords:
            kw_table.add_row(k)
        console.print(kw_table)
    else:
        console.print("-")

    # 6.3) SNIPPETS (preview)
    console.print("SNIPPETS (preview)", style="bold")
    for i, sn in enumerate(snips, 1):
        console.print(Panel.fit(Markdown(sn if sn.strip() else "-"), title=f"Snippet {i}", border_style="yellow"))


The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


In [None]:
#@title CHAT
#@markdown Genera SIEMPRE ambas salidas (SHORT y LARGE) y las muestra con **rich** en el formato solicitado.

# Parámetros del formulario (la asignación debe ir ANTES del #@param)
query = "¿Qué tres condiciones operativas debo cuidar para que TDC Open Market cuente en tablero/GanaMás en julio?"  #@param {type:"string", "label":"query", "placeholder":"Escribe tu pregunta BBVA..."}
pagination_neighbors = 1  #@param {type:"slider", "label":"pagination_neighbors", "min":0, "max":2, "step":1}
top_k = 5  #@param {type:"slider", "label":"top_k (LARGE)", "min":3, "max":8, "step":1}

# ===== Rich (instalar on-the-fly si no está) =====
try:
    from rich.console import Console
    from rich.panel import Panel
    from rich.markdown import Markdown
    from rich.table import Table
    from rich.text import Text
except Exception:
    import sys, subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "rich"])
    from rich.console import Console
    from rich.panel import Panel
    from rich.markdown import Markdown
    from rich.table import Table
    from rich.text import Text

console = Console(width=120)

# ===== Helpers (apoyan al pipeline ya definido) =====
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np, json, sqlite3
from pathlib import Path

def _format_query_for_embedding(q: str) -> str:
    # Si definiste format_query en EMBEDDING (instruct-aware), úsalo.
    try:
        return format_query(q)  # opcional
    except NameError:
        return q

def embed_query(text: str) -> np.ndarray:
    return embed_texts([_format_query_for_embedding(text)], _tok_emb, _mdl_emb)[0]

def retrieve(conn, query: str, topN: int = 10):
    q_emb = embed_query(query)
    nn = search_vec(conn, q_emb, topN)  # search_vec ya debe usar 'k = ?' internamente
    metas = fetch_meta(conn, [r[0] for r in nn])
    return metas, nn

def describe_chunks(chunks_list):
    """Tabla rich de chunk_id / page / source."""
    table = Table(show_header=True, header_style="bold")
    table.add_column("Rank/Item", justify="right", style="cyan", no_wrap=True)
    table.add_column("chunk_id", overflow="fold")
    table.add_column("page", justify="right")
    table.add_column("source")
    for i, m in enumerate(chunks_list, 1):
        table.add_row(str(i), m["chunk_id"], str(m["page"]), Path(m["source_path"]).name)
    return table

def snippets_for(chunks_list, limit=420):
    out = []
    for m in chunks_list:
        md = (m.get("md") or "").strip()
        out.append(md[:limit])
    return out

def fetch_by_chunk_ids(conn: sqlite3.Connection, chunk_ids: list[str]) -> list[dict]:
    """Trae metadatos por chunk_id manteniendo el orden de entrada."""
    if not chunk_ids:
        return []
    q = f"""
    SELECT rowid, chunk_id, element_id, page, md, neighbors, source_path
    FROM chunks_meta
    WHERE chunk_id IN ({','.join('?'*len(chunk_ids))})
    """
    rows = conn.execute(q, chunk_ids).fetchall()
    cols = ["rowid", "chunk_id", "element_id", "page", "md", "neighbors", "source_path"]
    mapping = {r[1]: dict(zip(cols, r)) for r in rows}
    return [mapping[cid] for cid in chunk_ids if cid in mapping]

def expand_pagination(conn: sqlite3.Connection, meta: dict, window: int = 1) -> list[dict]:
    """
    Devuelve [meta_principal, vecinos...] usando neighbor_ids del chunk.
    window controla cuántos vecinos (K) añadimos.
    """
    try:
        neighbor_ids = json.loads(meta.get("neighbors", "[]"))
    except Exception:
        neighbor_ids = []
    neighbor_ids = neighbor_ids[: max(0, int(window))]
    return [meta] + fetch_by_chunk_ids(conn, neighbor_ids)

# ===== Pipeline de consulta =====
# Recupera candidatos iniciales
metas, _ = retrieve(conn, query, topN=max(10, top_k * 2))

if not metas:
    console.rule("[bold red]RAG BBVA[/bold red]")
    console.print("USER QUERY", style="bold")
    console.print(Panel.fit(Text(query), border_style="blue"))
    console.print("SYSTEM RESPONSE SHORT: RESPONSE ONLY (CONSTRUCCIÓN = QUERY + TOP-1 + PAGINATION TOP-1)", style="bold")
    console.print("No hay candidatos.\n")
    console.print("RESPONSE PAGINATION (TOP-1)\n-")
    console.print("SYSTEM RESPONSE LARGE: RESPONSE ONLY (CONSTRUCCIÓN = QUERY + ANÁLISIS TOP-K + PAGINATION TOP-K)", style="bold")
    console.print("No hay candidatos.\n")
    console.print("PAGINATION TOP-K\n-")
    console.print("MINI-DOCUMENT-KNOWLEDGE-BASE (MINIDKB):")
    console.print("TOP-K DEL RERANKER (ordenado tras re-ranking)\n-")
    console.print("Dynamic Keywords (boost)\n-")
    console.print("SNIPPETS (preview)\n-")
else:
    # Rerank point-wise yes/no
    scores = rerank(query, [m['md'] for m in metas], _tok_rer, _mdl_rer, _yes_id, _no_id)
    order = np.argsort(scores)[::-1]

    # === SHORT: Top-1 + paginación de su vecindario ===
    top1 = metas[order[0]]
    expanded_top1 = expand_pagination(conn, top1, window=pagination_neighbors)
    contexts_short = [m['md'] for m in expanded_top1]
    resp_short = generate_answer_short(query, contexts_short, _tok_llm, _mdl_llm, max_new_tokens=320)

    # === LARGE: Top-K + paginación de cada uno ===
    topk_idx = order[:top_k]
    topk = [metas[i] for i in topk_idx]
    expanded_sets = []
    contexts_large = []
    for m in topk:
        expanded = expand_pagination(conn, m, window=pagination_neighbors)
        expanded_sets.append(expanded)
        contexts_large.extend([x['md'] for x in expanded])
    resp_large = generate_answer_large(query, contexts_large, _tok_llm, _mdl_llm, max_new_tokens=640)

    # === MINI-DKB: keywords + snippets (sobre Top-K principales, no expandidos) ===
    try:
        vec = TfidfVectorizer(ngram_range=(1,2), max_features=24)
        _ = vec.fit_transform([m['md'] for m in topk])
        dyn_keywords = sorted(vec.vocabulary_.keys())[:12]
    except Exception:
        dyn_keywords = []
    snips = snippets_for(topk, limit=420)

    # ======== SALIDA FORMATEADA ========
    console.rule("[bold blue]RAG BBVA[/bold blue]")

    # 1) USER QUERY
    console.print("USER QUERY", style="bold")
    console.print(Panel.fit(Text(query), border_style="blue"))

    # 2) SYSTEM RESPONSE SHORT
    console.print("SYSTEM RESPONSE SHORT: RESPONSE ONLY (CONSTRUCCIÓN = QUERY + TOP-1 + PAGINATION TOP-1)", style="bold")
    console.print(Panel(Markdown(resp_short), title="SHORT", border_style="green"))

    # 3) RESPONSE PAGINATION (TOP-1)
    console.print("RESPONSE PAGINATION (TOP-1)", style="bold")
    console.print(describe_chunks(expanded_top1))

    # 4) SYSTEM RESPONSE LARGE
    console.print("SYSTEM RESPONSE LARGE: RESPONSE ONLY (CONSTRUCCIÓN = QUERY + ANÁLISIS TOP-K + PAGINATION TOP-K)", style="bold")
    console.print(Panel(Markdown(resp_large), title="LARGE", border_style="magenta"))

    # 5) PAGINATION TOP-K
    console.print("PAGINATION TOP-K", style="bold")
    for i, ex in enumerate(expanded_sets, 1):
        console.print(Panel(describe_chunks(ex), title=f"Top-{i} vecindario", border_style="cyan"))

    # 6) MINI-DOCUMENT-KNOWLEDGE-BASE (MINIDKB)
    console.print("MINI-DOCUMENT-KNOWLEDGE-BASE (MINIDKB):", style="bold")

    # 6.1) TOP-K DEL RERANKER
    console.print("TOP-K DEL RERANKER (ordenado tras re-ranking)", style="bold")
    console.print(describe_chunks(topk))

    # 6.2) Dynamic Keywords (boost)
    console.print("Dynamic Keywords (boost)", style="bold")
    if dyn_keywords:
        kw_table = Table(show_header=False)
        for k in dyn_keywords:
            kw_table.add_row(k)
        console.print(kw_table)
    else:
        console.print("-")

    # 6.3) SNIPPETS (preview)
    console.print("SNIPPETS (preview)", style="bold")
    for i, sn in enumerate(snips, 1):
        console.print(Panel.fit(Markdown(sn if sn.strip() else "-"), title=f"Snippet {i}", border_style="yellow"))


In [None]:
#@title CHAT
#@markdown Genera SIEMPRE ambas salidas (SHORT y LARGE) y las muestra con **rich** en el formato solicitado.

# Parámetros del formulario (la asignación debe ir ANTES del #@param)
query = "¿Cuál es la regla de IGBC (día, vigencia, límite) para TDC Preaprobada en julio?"  #@param {type:"string", "label":"query", "placeholder":"Escribe tu pregunta BBVA..."}
pagination_neighbors = 1  #@param {type:"slider", "label":"pagination_neighbors", "min":0, "max":2, "step":1}
top_k = 5  #@param {type:"slider", "label":"top_k (LARGE)", "min":3, "max":8, "step":1}

# ===== Rich (instalar on-the-fly si no está) =====
try:
    from rich.console import Console
    from rich.panel import Panel
    from rich.markdown import Markdown
    from rich.table import Table
    from rich.text import Text
except Exception:
    import sys, subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "rich"])
    from rich.console import Console
    from rich.panel import Panel
    from rich.markdown import Markdown
    from rich.table import Table
    from rich.text import Text

console = Console(width=120)

# ===== Helpers (apoyan al pipeline ya definido) =====
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np, json, sqlite3
from pathlib import Path

def _format_query_for_embedding(q: str) -> str:
    # Si definiste format_query en EMBEDDING (instruct-aware), úsalo.
    try:
        return format_query(q)  # opcional
    except NameError:
        return q

def embed_query(text: str) -> np.ndarray:
    return embed_texts([_format_query_for_embedding(text)], _tok_emb, _mdl_emb)[0]

def retrieve(conn, query: str, topN: int = 10):
    q_emb = embed_query(query)
    nn = search_vec(conn, q_emb, topN)  # search_vec ya debe usar 'k = ?' internamente
    metas = fetch_meta(conn, [r[0] for r in nn])
    return metas, nn

def describe_chunks(chunks_list):
    """Tabla rich de chunk_id / page / source."""
    table = Table(show_header=True, header_style="bold")
    table.add_column("Rank/Item", justify="right", style="cyan", no_wrap=True)
    table.add_column("chunk_id", overflow="fold")
    table.add_column("page", justify="right")
    table.add_column("source")
    for i, m in enumerate(chunks_list, 1):
        table.add_row(str(i), m["chunk_id"], str(m["page"]), Path(m["source_path"]).name)
    return table

def snippets_for(chunks_list, limit=420):
    out = []
    for m in chunks_list:
        md = (m.get("md") or "").strip()
        out.append(md[:limit])
    return out

def fetch_by_chunk_ids(conn: sqlite3.Connection, chunk_ids: list[str]) -> list[dict]:
    """Trae metadatos por chunk_id manteniendo el orden de entrada."""
    if not chunk_ids:
        return []
    q = f"""
    SELECT rowid, chunk_id, element_id, page, md, neighbors, source_path
    FROM chunks_meta
    WHERE chunk_id IN ({','.join('?'*len(chunk_ids))})
    """
    rows = conn.execute(q, chunk_ids).fetchall()
    cols = ["rowid", "chunk_id", "element_id", "page", "md", "neighbors", "source_path"]
    mapping = {r[1]: dict(zip(cols, r)) for r in rows}
    return [mapping[cid] for cid in chunk_ids if cid in mapping]

def expand_pagination(conn: sqlite3.Connection, meta: dict, window: int = 1) -> list[dict]:
    """
    Devuelve [meta_principal, vecinos...] usando neighbor_ids del chunk.
    window controla cuántos vecinos (K) añadimos.
    """
    try:
        neighbor_ids = json.loads(meta.get("neighbors", "[]"))
    except Exception:
        neighbor_ids = []
    neighbor_ids = neighbor_ids[: max(0, int(window))]
    return [meta] + fetch_by_chunk_ids(conn, neighbor_ids)

# ===== Pipeline de consulta =====
# Recupera candidatos iniciales
metas, _ = retrieve(conn, query, topN=max(10, top_k * 2))

if not metas:
    console.rule("[bold red]RAG BBVA[/bold red]")
    console.print("USER QUERY", style="bold")
    console.print(Panel.fit(Text(query), border_style="blue"))
    console.print("SYSTEM RESPONSE SHORT: RESPONSE ONLY (CONSTRUCCIÓN = QUERY + TOP-1 + PAGINATION TOP-1)", style="bold")
    console.print("No hay candidatos.\n")
    console.print("RESPONSE PAGINATION (TOP-1)\n-")
    console.print("SYSTEM RESPONSE LARGE: RESPONSE ONLY (CONSTRUCCIÓN = QUERY + ANÁLISIS TOP-K + PAGINATION TOP-K)", style="bold")
    console.print("No hay candidatos.\n")
    console.print("PAGINATION TOP-K\n-")
    console.print("MINI-DOCUMENT-KNOWLEDGE-BASE (MINIDKB):")
    console.print("TOP-K DEL RERANKER (ordenado tras re-ranking)\n-")
    console.print("Dynamic Keywords (boost)\n-")
    console.print("SNIPPETS (preview)\n-")
else:
    # Rerank point-wise yes/no
    scores = rerank(query, [m['md'] for m in metas], _tok_rer, _mdl_rer, _yes_id, _no_id)
    order = np.argsort(scores)[::-1]

    # === SHORT: Top-1 + paginación de su vecindario ===
    top1 = metas[order[0]]
    expanded_top1 = expand_pagination(conn, top1, window=pagination_neighbors)
    contexts_short = [m['md'] for m in expanded_top1]
    resp_short = generate_answer_short(query, contexts_short, _tok_llm, _mdl_llm, max_new_tokens=320)

    # === LARGE: Top-K + paginación de cada uno ===
    topk_idx = order[:top_k]
    topk = [metas[i] for i in topk_idx]
    expanded_sets = []
    contexts_large = []
    for m in topk:
        expanded = expand_pagination(conn, m, window=pagination_neighbors)
        expanded_sets.append(expanded)
        contexts_large.extend([x['md'] for x in expanded])
    resp_large = generate_answer_large(query, contexts_large, _tok_llm, _mdl_llm, max_new_tokens=640)

    # === MINI-DKB: keywords + snippets (sobre Top-K principales, no expandidos) ===
    try:
        vec = TfidfVectorizer(ngram_range=(1,2), max_features=24)
        _ = vec.fit_transform([m['md'] for m in topk])
        dyn_keywords = sorted(vec.vocabulary_.keys())[:12]
    except Exception:
        dyn_keywords = []
    snips = snippets_for(topk, limit=420)

    # ======== SALIDA FORMATEADA ========
    console.rule("[bold blue]RAG BBVA[/bold blue]")

    # 1) USER QUERY
    console.print("USER QUERY", style="bold")
    console.print(Panel.fit(Text(query), border_style="blue"))

    # 2) SYSTEM RESPONSE SHORT
    console.print("SYSTEM RESPONSE SHORT: RESPONSE ONLY (CONSTRUCCIÓN = QUERY + TOP-1 + PAGINATION TOP-1)", style="bold")
    console.print(Panel(Markdown(resp_short), title="SHORT", border_style="green"))

    # 3) RESPONSE PAGINATION (TOP-1)
    console.print("RESPONSE PAGINATION (TOP-1)", style="bold")
    console.print(describe_chunks(expanded_top1))

    # 4) SYSTEM RESPONSE LARGE
    console.print("SYSTEM RESPONSE LARGE: RESPONSE ONLY (CONSTRUCCIÓN = QUERY + ANÁLISIS TOP-K + PAGINATION TOP-K)", style="bold")
    console.print(Panel(Markdown(resp_large), title="LARGE", border_style="magenta"))

    # 5) PAGINATION TOP-K
    console.print("PAGINATION TOP-K", style="bold")
    for i, ex in enumerate(expanded_sets, 1):
        console.print(Panel(describe_chunks(ex), title=f"Top-{i} vecindario", border_style="cyan"))

    # 6) MINI-DOCUMENT-KNOWLEDGE-BASE (MINIDKB)
    console.print("MINI-DOCUMENT-KNOWLEDGE-BASE (MINIDKB):", style="bold")

    # 6.1) TOP-K DEL RERANKER
    console.print("TOP-K DEL RERANKER (ordenado tras re-ranking)", style="bold")
    console.print(describe_chunks(topk))

    # 6.2) Dynamic Keywords (boost)
    console.print("Dynamic Keywords (boost)", style="bold")
    if dyn_keywords:
        kw_table = Table(show_header=False)
        for k in dyn_keywords:
            kw_table.add_row(k)
        console.print(kw_table)
    else:
        console.print("-")

    # 6.3) SNIPPETS (preview)
    console.print("SNIPPETS (preview)", style="bold")
    for i, sn in enumerate(snips, 1):
        console.print(Panel.fit(Markdown(sn if sn.strip() else "-"), title=f"Snippet {i}", border_style="yellow"))


In [None]:
#@title CHAT
#@markdown Genera SIEMPRE ambas salidas (SHORT y LARGE) y las muestra con **rich** en el formato solicitado.

# Parámetros del formulario (la asignación debe ir ANTES del #@param)
query = "¿qué cambios importantes hubo entre mayo y junio para la Banca Comercial?"  #@param {type:"string", "label":"query", "placeholder":"Escribe tu pregunta BBVA..."}
pagination_neighbors = 1  #@param {type:"slider", "label":"pagination_neighbors", "min":0, "max":2, "step":1}
top_k = 5  #@param {type:"slider", "label":"top_k (LARGE)", "min":3, "max":8, "step":1}

# ===== Rich (instalar on-the-fly si no está) =====
try:
    from rich.console import Console
    from rich.panel import Panel
    from rich.markdown import Markdown
    from rich.table import Table
    from rich.text import Text
except Exception:
    import sys, subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "rich"])
    from rich.console import Console
    from rich.panel import Panel
    from rich.markdown import Markdown
    from rich.table import Table
    from rich.text import Text

console = Console(width=120)

# ===== Helpers (apoyan al pipeline ya definido) =====
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np, json, sqlite3
from pathlib import Path

def _format_query_for_embedding(q: str) -> str:
    # Si definiste format_query en EMBEDDING (instruct-aware), úsalo.
    try:
        return format_query(q)  # opcional
    except NameError:
        return q

def embed_query(text: str) -> np.ndarray:
    return embed_texts([_format_query_for_embedding(text)], _tok_emb, _mdl_emb)[0]

def retrieve(conn, query: str, topN: int = 10):
    q_emb = embed_query(query)
    nn = search_vec(conn, q_emb, topN)  # search_vec ya debe usar 'k = ?' internamente
    metas = fetch_meta(conn, [r[0] for r in nn])
    return metas, nn

def describe_chunks(chunks_list):
    """Tabla rich de chunk_id / page / source."""
    table = Table(show_header=True, header_style="bold")
    table.add_column("Rank/Item", justify="right", style="cyan", no_wrap=True)
    table.add_column("chunk_id", overflow="fold")
    table.add_column("page", justify="right")
    table.add_column("source")
    for i, m in enumerate(chunks_list, 1):
        table.add_row(str(i), m["chunk_id"], str(m["page"]), Path(m["source_path"]).name)
    return table

def snippets_for(chunks_list, limit=420):
    out = []
    for m in chunks_list:
        md = (m.get("md") or "").strip()
        out.append(md[:limit])
    return out

def fetch_by_chunk_ids(conn: sqlite3.Connection, chunk_ids: list[str]) -> list[dict]:
    """Trae metadatos por chunk_id manteniendo el orden de entrada."""
    if not chunk_ids:
        return []
    q = f"""
    SELECT rowid, chunk_id, element_id, page, md, neighbors, source_path
    FROM chunks_meta
    WHERE chunk_id IN ({','.join('?'*len(chunk_ids))})
    """
    rows = conn.execute(q, chunk_ids).fetchall()
    cols = ["rowid", "chunk_id", "element_id", "page", "md", "neighbors", "source_path"]
    mapping = {r[1]: dict(zip(cols, r)) for r in rows}
    return [mapping[cid] for cid in chunk_ids if cid in mapping]

def expand_pagination(conn: sqlite3.Connection, meta: dict, window: int = 1) -> list[dict]:
    """
    Devuelve [meta_principal, vecinos...] usando neighbor_ids del chunk.
    window controla cuántos vecinos (K) añadimos.
    """
    try:
        neighbor_ids = json.loads(meta.get("neighbors", "[]"))
    except Exception:
        neighbor_ids = []
    neighbor_ids = neighbor_ids[: max(0, int(window))]
    return [meta] + fetch_by_chunk_ids(conn, neighbor_ids)

# ===== Pipeline de consulta =====
# Recupera candidatos iniciales
metas, _ = retrieve(conn, query, topN=max(10, top_k * 2))

if not metas:
    console.rule("[bold red]RAG BBVA[/bold red]")
    console.print("USER QUERY", style="bold")
    console.print(Panel.fit(Text(query), border_style="blue"))
    console.print("SYSTEM RESPONSE SHORT: RESPONSE ONLY (CONSTRUCCIÓN = QUERY + TOP-1 + PAGINATION TOP-1)", style="bold")
    console.print("No hay candidatos.\n")
    console.print("RESPONSE PAGINATION (TOP-1)\n-")
    console.print("SYSTEM RESPONSE LARGE: RESPONSE ONLY (CONSTRUCCIÓN = QUERY + ANÁLISIS TOP-K + PAGINATION TOP-K)", style="bold")
    console.print("No hay candidatos.\n")
    console.print("PAGINATION TOP-K\n-")
    console.print("MINI-DOCUMENT-KNOWLEDGE-BASE (MINIDKB):")
    console.print("TOP-K DEL RERANKER (ordenado tras re-ranking)\n-")
    console.print("Dynamic Keywords (boost)\n-")
    console.print("SNIPPETS (preview)\n-")
else:
    # Rerank point-wise yes/no
    scores = rerank(query, [m['md'] for m in metas], _tok_rer, _mdl_rer, _yes_id, _no_id)
    order = np.argsort(scores)[::-1]

    # === SHORT: Top-1 + paginación de su vecindario ===
    top1 = metas[order[0]]
    expanded_top1 = expand_pagination(conn, top1, window=pagination_neighbors)
    contexts_short = [m['md'] for m in expanded_top1]
    resp_short = generate_answer_short(query, contexts_short, _tok_llm, _mdl_llm, max_new_tokens=320)

    # === LARGE: Top-K + paginación de cada uno ===
    topk_idx = order[:top_k]
    topk = [metas[i] for i in topk_idx]
    expanded_sets = []
    contexts_large = []
    for m in topk:
        expanded = expand_pagination(conn, m, window=pagination_neighbors)
        expanded_sets.append(expanded)
        contexts_large.extend([x['md'] for x in expanded])
    resp_large = generate_answer_large(query, contexts_large, _tok_llm, _mdl_llm, max_new_tokens=640)

    # === MINI-DKB: keywords + snippets (sobre Top-K principales, no expandidos) ===
    try:
        vec = TfidfVectorizer(ngram_range=(1,2), max_features=24)
        _ = vec.fit_transform([m['md'] for m in topk])
        dyn_keywords = sorted(vec.vocabulary_.keys())[:12]
    except Exception:
        dyn_keywords = []
    snips = snippets_for(topk, limit=420)

    # ======== SALIDA FORMATEADA ========
    console.rule("[bold blue]RAG BBVA[/bold blue]")

    # 1) USER QUERY
    console.print("USER QUERY", style="bold")
    console.print(Panel.fit(Text(query), border_style="blue"))

    # 2) SYSTEM RESPONSE SHORT
    console.print("SYSTEM RESPONSE SHORT: RESPONSE ONLY (CONSTRUCCIÓN = QUERY + TOP-1 + PAGINATION TOP-1)", style="bold")
    console.print(Panel(Markdown(resp_short), title="SHORT", border_style="green"))

    # 3) RESPONSE PAGINATION (TOP-1)
    console.print("RESPONSE PAGINATION (TOP-1)", style="bold")
    console.print(describe_chunks(expanded_top1))

    # 4) SYSTEM RESPONSE LARGE
    console.print("SYSTEM RESPONSE LARGE: RESPONSE ONLY (CONSTRUCCIÓN = QUERY + ANÁLISIS TOP-K + PAGINATION TOP-K)", style="bold")
    console.print(Panel(Markdown(resp_large), title="LARGE", border_style="magenta"))

    # 5) PAGINATION TOP-K
    console.print("PAGINATION TOP-K", style="bold")
    for i, ex in enumerate(expanded_sets, 1):
        console.print(Panel(describe_chunks(ex), title=f"Top-{i} vecindario", border_style="cyan"))

    # 6) MINI-DOCUMENT-KNOWLEDGE-BASE (MINIDKB)
    console.print("MINI-DOCUMENT-KNOWLEDGE-BASE (MINIDKB):", style="bold")

    # 6.1) TOP-K DEL RERANKER
    console.print("TOP-K DEL RERANKER (ordenado tras re-ranking)", style="bold")
    console.print(describe_chunks(topk))

    # 6.2) Dynamic Keywords (boost)
    console.print("Dynamic Keywords (boost)", style="bold")
    if dyn_keywords:
        kw_table = Table(show_header=False)
        for k in dyn_keywords:
            kw_table.add_row(k)
        console.print(kw_table)
    else:
        console.print("-")

    # 6.3) SNIPPETS (preview)
    console.print("SNIPPETS (preview)", style="bold")
    for i, sn in enumerate(snips, 1):
        console.print(Panel.fit(Markdown(sn if sn.strip() else "-"), title=f"Snippet {i}", border_style="yellow"))


In [None]:
#@title CHAT
#@markdown Genera SIEMPRE ambas salidas (SHORT y LARGE) y las muestra con **rich** en el formato solicitado.

# Parámetros del formulario (la asignación debe ir ANTES del #@param)
query = "¿qué cambios importantes hubo entre junio y julio para la Banca Comercial?"  #@param {type:"string", "label":"query", "placeholder":"Escribe tu pregunta BBVA..."}
pagination_neighbors = 1  #@param {type:"slider", "label":"pagination_neighbors", "min":0, "max":2, "step":1}
top_k = 5  #@param {type:"slider", "label":"top_k (LARGE)", "min":3, "max":8, "step":1}

# ===== Rich (instalar on-the-fly si no está) =====
try:
    from rich.console import Console
    from rich.panel import Panel
    from rich.markdown import Markdown
    from rich.table import Table
    from rich.text import Text
except Exception:
    import sys, subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "rich"])
    from rich.console import Console
    from rich.panel import Panel
    from rich.markdown import Markdown
    from rich.table import Table
    from rich.text import Text

console = Console(width=120)

# ===== Helpers (apoyan al pipeline ya definido) =====
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np, json, sqlite3
from pathlib import Path

def _format_query_for_embedding(q: str) -> str:
    # Si definiste format_query en EMBEDDING (instruct-aware), úsalo.
    try:
        return format_query(q)  # opcional
    except NameError:
        return q

def embed_query(text: str) -> np.ndarray:
    return embed_texts([_format_query_for_embedding(text)], _tok_emb, _mdl_emb)[0]

def retrieve(conn, query: str, topN: int = 10):
    q_emb = embed_query(query)
    nn = search_vec(conn, q_emb, topN)  # search_vec ya debe usar 'k = ?' internamente
    metas = fetch_meta(conn, [r[0] for r in nn])
    return metas, nn

def describe_chunks(chunks_list):
    """Tabla rich de chunk_id / page / source."""
    table = Table(show_header=True, header_style="bold")
    table.add_column("Rank/Item", justify="right", style="cyan", no_wrap=True)
    table.add_column("chunk_id", overflow="fold")
    table.add_column("page", justify="right")
    table.add_column("source")
    for i, m in enumerate(chunks_list, 1):
        table.add_row(str(i), m["chunk_id"], str(m["page"]), Path(m["source_path"]).name)
    return table

def snippets_for(chunks_list, limit=420):
    out = []
    for m in chunks_list:
        md = (m.get("md") or "").strip()
        out.append(md[:limit])
    return out

def fetch_by_chunk_ids(conn: sqlite3.Connection, chunk_ids: list[str]) -> list[dict]:
    """Trae metadatos por chunk_id manteniendo el orden de entrada."""
    if not chunk_ids:
        return []
    q = f"""
    SELECT rowid, chunk_id, element_id, page, md, neighbors, source_path
    FROM chunks_meta
    WHERE chunk_id IN ({','.join('?'*len(chunk_ids))})
    """
    rows = conn.execute(q, chunk_ids).fetchall()
    cols = ["rowid", "chunk_id", "element_id", "page", "md", "neighbors", "source_path"]
    mapping = {r[1]: dict(zip(cols, r)) for r in rows}
    return [mapping[cid] for cid in chunk_ids if cid in mapping]

def expand_pagination(conn: sqlite3.Connection, meta: dict, window: int = 1) -> list[dict]:
    """
    Devuelve [meta_principal, vecinos...] usando neighbor_ids del chunk.
    window controla cuántos vecinos (K) añadimos.
    """
    try:
        neighbor_ids = json.loads(meta.get("neighbors", "[]"))
    except Exception:
        neighbor_ids = []
    neighbor_ids = neighbor_ids[: max(0, int(window))]
    return [meta] + fetch_by_chunk_ids(conn, neighbor_ids)

# ===== Pipeline de consulta =====
# Recupera candidatos iniciales
metas, _ = retrieve(conn, query, topN=max(10, top_k * 2))

if not metas:
    console.rule("[bold red]RAG BBVA[/bold red]")
    console.print("USER QUERY", style="bold")
    console.print(Panel.fit(Text(query), border_style="blue"))
    console.print("SYSTEM RESPONSE SHORT: RESPONSE ONLY (CONSTRUCCIÓN = QUERY + TOP-1 + PAGINATION TOP-1)", style="bold")
    console.print("No hay candidatos.\n")
    console.print("RESPONSE PAGINATION (TOP-1)\n-")
    console.print("SYSTEM RESPONSE LARGE: RESPONSE ONLY (CONSTRUCCIÓN = QUERY + ANÁLISIS TOP-K + PAGINATION TOP-K)", style="bold")
    console.print("No hay candidatos.\n")
    console.print("PAGINATION TOP-K\n-")
    console.print("MINI-DOCUMENT-KNOWLEDGE-BASE (MINIDKB):")
    console.print("TOP-K DEL RERANKER (ordenado tras re-ranking)\n-")
    console.print("Dynamic Keywords (boost)\n-")
    console.print("SNIPPETS (preview)\n-")
else:
    # Rerank point-wise yes/no
    scores = rerank(query, [m['md'] for m in metas], _tok_rer, _mdl_rer, _yes_id, _no_id)
    order = np.argsort(scores)[::-1]

    # === SHORT: Top-1 + paginación de su vecindario ===
    top1 = metas[order[0]]
    expanded_top1 = expand_pagination(conn, top1, window=pagination_neighbors)
    contexts_short = [m['md'] for m in expanded_top1]
    resp_short = generate_answer_short(query, contexts_short, _tok_llm, _mdl_llm, max_new_tokens=320)

    # === LARGE: Top-K + paginación de cada uno ===
    topk_idx = order[:top_k]
    topk = [metas[i] for i in topk_idx]
    expanded_sets = []
    contexts_large = []
    for m in topk:
        expanded = expand_pagination(conn, m, window=pagination_neighbors)
        expanded_sets.append(expanded)
        contexts_large.extend([x['md'] for x in expanded])
    resp_large = generate_answer_large(query, contexts_large, _tok_llm, _mdl_llm, max_new_tokens=640)

    # === MINI-DKB: keywords + snippets (sobre Top-K principales, no expandidos) ===
    try:
        vec = TfidfVectorizer(ngram_range=(1,2), max_features=24)
        _ = vec.fit_transform([m['md'] for m in topk])
        dyn_keywords = sorted(vec.vocabulary_.keys())[:12]
    except Exception:
        dyn_keywords = []
    snips = snippets_for(topk, limit=420)

    # ======== SALIDA FORMATEADA ========
    console.rule("[bold blue]RAG BBVA[/bold blue]")

    # 1) USER QUERY
    console.print("USER QUERY", style="bold")
    console.print(Panel.fit(Text(query), border_style="blue"))

    # 2) SYSTEM RESPONSE SHORT
    console.print("SYSTEM RESPONSE SHORT: RESPONSE ONLY (CONSTRUCCIÓN = QUERY + TOP-1 + PAGINATION TOP-1)", style="bold")
    console.print(Panel(Markdown(resp_short), title="SHORT", border_style="green"))

    # 3) RESPONSE PAGINATION (TOP-1)
    console.print("RESPONSE PAGINATION (TOP-1)", style="bold")
    console.print(describe_chunks(expanded_top1))

    # 4) SYSTEM RESPONSE LARGE
    console.print("SYSTEM RESPONSE LARGE: RESPONSE ONLY (CONSTRUCCIÓN = QUERY + ANÁLISIS TOP-K + PAGINATION TOP-K)", style="bold")
    console.print(Panel(Markdown(resp_large), title="LARGE", border_style="magenta"))

    # 5) PAGINATION TOP-K
    console.print("PAGINATION TOP-K", style="bold")
    for i, ex in enumerate(expanded_sets, 1):
        console.print(Panel(describe_chunks(ex), title=f"Top-{i} vecindario", border_style="cyan"))

    # 6) MINI-DOCUMENT-KNOWLEDGE-BASE (MINIDKB)
    console.print("MINI-DOCUMENT-KNOWLEDGE-BASE (MINIDKB):", style="bold")

    # 6.1) TOP-K DEL RERANKER
    console.print("TOP-K DEL RERANKER (ordenado tras re-ranking)", style="bold")
    console.print(describe_chunks(topk))

    # 6.2) Dynamic Keywords (boost)
    console.print("Dynamic Keywords (boost)", style="bold")
    if dyn_keywords:
        kw_table = Table(show_header=False)
        for k in dyn_keywords:
            kw_table.add_row(k)
        console.print(kw_table)
    else:
        console.print("-")

    # 6.3) SNIPPETS (preview)
    console.print("SNIPPETS (preview)", style="bold")
    for i, sn in enumerate(snips, 1):
        console.print(Panel.fit(Markdown(sn if sn.strip() else "-"), title=f"Snippet {i}", border_style="yellow"))
