# üß† Generador IA de bancos Moodle XML + Equilibrado y PDF

Este cuaderno:
1) **Sube un documento** base (PDF/DOCX/TXT).
2) La IA **genera preguntas MCQ** (autocontenidas, 1 correcta + 2 distractores plausibles).
3) **Equilibra** longitudes de opciones (¬±4 palabras) y **baraja** opciones.
4) Exporta **Moodle XML** listo para importar y un **PDF** de revisi√≥n (‚úÖ en la correcta).

**Uso:** ejecuta las celdas en orden. Cuando pida *subir archivo*, selecciona tu documento fuente.


In [None]:
!pip install openai==1.* beautifulsoup4 lxml reportlab pypdf python-docx tqdm --quiet

In [None]:
# üîë Establece tu API key de OpenAI de forma segura (no queda guardada en el cuaderno)
import os
from getpass import getpass

if not os.getenv("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass("Pega tu OPENAI_API_KEY y pulsa Enter: ")
print("‚úÖ API key configurada en el entorno de ejecuci√≥n.")

‚úÖ API key configurada en el entorno de ejecuci√≥n.


In [None]:
# =========================
#  Generar √≠tems con IA + equilibrar + exportar XML + PDF
# =========================
import os, json, random, re, traceback
from tqdm import tqdm
from pypdf import PdfReader
from docx import Document
from google.colab import files
from bs4 import BeautifulSoup
from bs4.element import Tag
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet
from lxml import etree
from openai import OpenAI

# ===== Par√°metros =====
THRESH_DIFF = 4            # diferencia m√°xima de palabras entre correcta e incorrectas
RANDOM_SEED = 42           # usa None para aleatoriedad no determinista
ITEMS_PER_BLOCK = 6        # preguntas IA por bloque de texto
CHUNK_MAX_CHARS = 6000     # tama√±o aprox. de cada bloque del documento fuente
OPENAI_MODEL = "gpt-4o-mini"  # modelo de generaci√≥n

if RANDOM_SEED is not None:
    random.seed(RANDOM_SEED)

# ===== Carga de documento base =====
print("üìÅ Sube tu documento base (PDF/DOCX/TXT)")
up = files.upload()
SRC = list(up.keys())[0]
print(f"‚úÖ Cargado: {SRC}")

def load_text(path):
    p = path.lower()
    if p.endswith(".pdf"):
        reader = PdfReader(path)
        return "\n".join([(page.extract_text() or "") for page in reader.pages])
    if p.endswith(".docx"):
        doc = Document(path)
        return "\n".join([para.text for para in doc.paragraphs])
    with open(path, "r", encoding="utf-8", errors="ignore") as f:
        return f.read()

raw_text = load_text(SRC)
assert raw_text.strip(), "El documento parece vac√≠o o no se pudo extraer texto."

def chunk_text(text, max_chars=10000):
    paras = [p.strip() for p in text.split("\n") if p.strip()]
    chunks, cur = [], ""
    for p in paras:
        if len(cur) + len(p) + 1 <= max_chars:
            cur += ("\n" + p) if cur else p
        else:
            chunks.append(cur); cur = p
    if cur: chunks.append(cur)
    return chunks

chunks = chunk_text(raw_text, max_chars=CHUNK_MAX_CHARS)
print(f"üß© Bloques de texto creados: {len(chunks)} (‚âà{CHUNK_MAX_CHARS} chars cada uno)")

# ===== Cliente OpenAI =====
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

SYSTEM_PROMPT = (
    "Eres un generador de √≠tems universitarios, preciso, en espa√±ol, y devuelves JSON v√°lido."
)

USER_PROMPT_TMPL = """
Eres un generador de preguntas universitarias en psicolog√≠a del lenguaje/lectura.
Crea {n} preguntas tipo test (MCQ) AUTOCONTENIDAS a partir del CONTENIDO. Nivel: universitario.
- 1 correcta + 2 distractores plausibles (sin 'todas las anteriores' / 'ninguna').
- Redacci√≥n clara, sin ambig√ºedad, NO dependas de "seg√∫n el texto".
- Incluye justificaci√≥n breve (1‚Äì2 frases) para la correcta.
- RESPONDE √öNICAMENTE con JSON. NO incluyas explicaciones, NO uses ```json ni ning√∫n fence.

Estructura EXACTA:
{{
  "items": [
    {{
      "id": "BLOQUE1-Q1",
      "stem": "ENUNCIADO AUTOCONTENIDO...",
      "options": ["A...", "B...", "C..."],
      "correct_index": 1,
      "justification": "Por qu√© es correcta...",
      "difficulty": "media",
      "tags": ["efectos de priming","l√©xico"]
    }}
  ]
}}

CONTENIDO:
{content}
"""




def wc(s):
    s = re.sub(r"\s+", " ", s or "").strip()
    return len([w for w in s.split(" ") if w])

import json, re

def _strip_code_fences(s: str) -> str:
    # quita ```json ... ``` o ``` ... ```
    s = s.strip()
    fence = re.compile(r"^```(?:json)?\s*([\s\S]*?)\s*```$", re.IGNORECASE)
    m = fence.match(s)
    return m.group(1).strip() if m else s

def _extract_json_object(s: str) -> str:
    # intenta extraer el primer objeto { ... } bien balanceado
    start = s.find("{")
    if start == -1:
        return s
    depth = 0
    for i, ch in enumerate(s[start:], start=start):
        if ch == "{":
            depth += 1
        elif ch == "}":
            depth -= 1
            if depth == 0:
                return s[start:i+1]
    return s  # si no balancea, devolvemos original

def llm_items_from_text(content, block_id="B1", n=6, debug=False):
    prompt = USER_PROMPT_TMPL.format(content=content, n=n)
    resp = client.chat.completions.create(
        model=OPENAI_MODEL,
        messages=[
            {"role":"system","content":SYSTEM_PROMPT},
            {"role":"user","content":prompt}
        ],
        temperature=0.4,
        # si tu cuenta no soporta response_format, quita la l√≠nea siguiente
        response_format={"type":"json_object"}
    )

    raw = resp.choices[0].message.content
    try:
        # intento 1: JSON directo
        data = json.loads(raw)
    except Exception:
        # intento 2: quitar fences
        cleaned = _strip_code_fences(raw)
        # intento 3: extraer el objeto { ... } balanceado
        cleaned = _extract_json_object(cleaned)
        if debug:
            print("DEBUG raw:", raw[:400])
            print("DEBUG cleaned:", cleaned[:400])
        data = json.loads(cleaned)  # si falla aqu√≠, la excepci√≥n sube y la capturamos arriba

    # tolerancia a claves con espacios/saltos accidentales
    if "items" not in data:
        # busca una clave que contenga 'items'
        for k in list(data.keys()):
            if "items" in k.replace("\n","").replace(" ",""):
                data["items"] = data.pop(k)
                break

    items = data.get("items", [])
    # normaliza ids
    for i, it in enumerate(items, start=1):
        it["id"] = it.get("id") or f"{block_id}-Q{i}"
    return items

def balance_and_shuffle(item, diff_threshold=4, seed=None):
    rnd = random.Random(seed)
    opts = item["options"]
    ci = item["correct_index"]
    Lc = wc(opts[ci])
    new_opts = opts[:]
    for i, opt in enumerate(new_opts):
        if i == ci: continue
        if (Lc - wc(opt)) > diff_threshold:
            extra = rnd.choice([
                " Este patr√≥n se ha descrito en estudios de priming y decisi√≥n l√©xica.",
                " La literatura lo vincula con activaci√≥n competitiva y control inhibitorio.",
                " Se replica en lectores con distintos niveles de proficiencia."
            ])
            new_opts[i] = (opt.strip() + extra)
    pairs = [(o, i==ci) for i,o in enumerate(new_opts)]
    rnd.shuffle(pairs)
    item["options"] = [p[0] for p in pairs]
    item["correct_index"] = next(i for i,p in enumerate(pairs) if p[1])
    return item

def validate_item(it):
    ok = True; reasons = []
    if len(it.get("options",[])) != 3:
        ok=False; reasons.append("No hay 3 opciones.")
    if not (0 <= it.get("correct_index", -1) < 3):
        ok=False; reasons.append("√çndice de correcta inv√°lido.")
    if ok:
        s = set([o.strip().lower() for o in it["options"]])
        if len(s) < 3:
            ok=False; reasons.append("Opciones duplicadas o id√©nticas.")
    if wc(it.get("stem","")) < 6:
        ok=False; reasons.append("Enunciado demasiado corto.")
    return ok, reasons

def to_moodle_xml(items, xml_path="equilibrado_IA.xml"):
    soup = BeautifulSoup('<?xml version="1.0" encoding="UTF-8"?><quiz></quiz>', "xml")
    quiz = soup.find("quiz")
    for it in items:
        q = soup.new_tag("question", type="multichoice")
        qt = soup.new_tag("questiontext", format="html")
        qt_text = soup.new_tag("text"); qt_text.string = it["stem"]
        qt.append(qt_text); q.append(qt)
        for i,opt in enumerate(it["options"]):
            ans = soup.new_tag("answer", fraction="100" if i==it["correct_index"] else "0")
            at = soup.new_tag("text"); at.string = opt
            ans.append(at); q.append(ans)
        quiz.append(q)
    # salida robusta con lxml
    xml_str = str(soup)
    parser = etree.XMLParser(recover=True)
    root = etree.fromstring(xml_str.encode("utf-8"), parser=parser)
    xml_bytes = etree.tostring(root, encoding="utf-8", xml_declaration=True, pretty_print=True)
    with open(xml_path, "wb") as f:
        f.write(xml_bytes)
    return xml_path

def to_pdf(items, pdf_path="equilibrado_IA.pdf"):
    doc = SimpleDocTemplate(pdf_path, pagesize=A4)
    styles = getSampleStyleSheet()
    story = [Paragraph("<b>Banco de preguntas (IA)</b>", styles["Title"]), Spacer(1,10)]
    for i,it in enumerate(items, start=1):
        story.append(Paragraph(f"<b>{i}. {it['stem']}</b>", styles["Normal"]))
        for j,opt in enumerate(it["options"]):
            mark = " ‚úÖ" if j==it["correct_index"] else ""
            story.append(Paragraph(f"{chr(97+j)}) {opt}{mark}", styles["Normal"]))
        if it.get("justification"):
            story.append(Paragraph(f"<i>Justificaci√≥n:</i> {it['justification']}", styles["Normal"]))
        story.append(Spacer(1,8))
    doc.build(story)
    return pdf_path

# ===== Generaci√≥n por bloques =====
all_items = []
for bi, ch in enumerate(tqdm(chunks, desc="Generando √≠tems IA"), start=1):
    try:
        items = llm_items_from_text(ch, block_id=f"B{bi}", n=ITEMS_PER_BLOCK)
        for it in items:
            it = balance_and_shuffle(it, diff_threshold=THRESH_DIFF, seed=RANDOM_SEED)
            ok, reasons = validate_item(it)
            if ok:
                all_items.append(it)
    except Exception as e:
        print(f"‚ö†Ô∏è Bloque {bi}: error de generaci√≥n ‚Üí", e)
        traceback.print_exc()

print(f"‚úÖ √çtems v√°lidos totales: {len(all_items)}")

# ===== Exportar =====
xml_path = to_moodle_xml(all_items, xml_path="equilibrado_IA.xml")
pdf_path = to_pdf(all_items, pdf_path="equilibrado_IA.pdf")
print("üì¶ XML:", xml_path)
print("üìÑ PDF:", pdf_path)

# Descargas
files.download(xml_path)
files.download(pdf_path)


üìÅ Sube tu documento base (PDF/DOCX/TXT)


Saving Neurociencia-del-lenguaje.pdf to Neurociencia-del-lenguaje (2).pdf
‚úÖ Cargado: Neurociencia-del-lenguaje (2).pdf
üß© Bloques de texto creados: 14 (‚âà6000 chars cada uno)


Generando √≠tems IA:   0%|          | 0/14 [00:00<?, ?it/s]Traceback (most recent call last):
  File "/tmp/ipython-input-2654049910.py", line 243, in <cell line: 0>
    items = llm_items_from_text(ch, block_id=f"B{bi}", n=ITEMS_PER_BLOCK)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-2654049910.py", line 128, in llm_items_from_text
    resp = client.chat.completions.create(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_utils/_utils.py", line 286, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/resources/chat/completions/completions.py", line 1147, in create
    return self._post(
           ^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_base_client.py", line 1259, in post
    return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
            

‚ö†Ô∏è Bloque 1: error de generaci√≥n ‚Üí Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}


Traceback (most recent call last):
  File "/tmp/ipython-input-2654049910.py", line 243, in <cell line: 0>
    items = llm_items_from_text(ch, block_id=f"B{bi}", n=ITEMS_PER_BLOCK)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-2654049910.py", line 128, in llm_items_from_text
    resp = client.chat.completions.create(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_utils/_utils.py", line 286, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/resources/chat/completions/completions.py", line 1147, in create
    return self._post(
           ^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_base_client.py", line 1259, in post
    return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

‚ö†Ô∏è Bloque 2: error de generaci√≥n ‚Üí Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}


Traceback (most recent call last):
  File "/tmp/ipython-input-2654049910.py", line 243, in <cell line: 0>
    items = llm_items_from_text(ch, block_id=f"B{bi}", n=ITEMS_PER_BLOCK)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-2654049910.py", line 128, in llm_items_from_text
    resp = client.chat.completions.create(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_utils/_utils.py", line 286, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/resources/chat/completions/completions.py", line 1147, in create
    return self._post(
           ^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_base_client.py", line 1259, in post
    return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

‚ö†Ô∏è Bloque 3: error de generaci√≥n ‚Üí Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}


Traceback (most recent call last):
  File "/tmp/ipython-input-2654049910.py", line 243, in <cell line: 0>
    items = llm_items_from_text(ch, block_id=f"B{bi}", n=ITEMS_PER_BLOCK)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-2654049910.py", line 128, in llm_items_from_text
    resp = client.chat.completions.create(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_utils/_utils.py", line 286, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/resources/chat/completions/completions.py", line 1147, in create
    return self._post(
           ^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_base_client.py", line 1259, in post
    return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

‚ö†Ô∏è Bloque 4: error de generaci√≥n ‚Üí Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}


Traceback (most recent call last):
  File "/tmp/ipython-input-2654049910.py", line 243, in <cell line: 0>
    items = llm_items_from_text(ch, block_id=f"B{bi}", n=ITEMS_PER_BLOCK)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-2654049910.py", line 128, in llm_items_from_text
    resp = client.chat.completions.create(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_utils/_utils.py", line 286, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/resources/chat/completions/completions.py", line 1147, in create
    return self._post(
           ^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_base_client.py", line 1259, in post
    return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

‚ö†Ô∏è Bloque 5: error de generaci√≥n ‚Üí Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}


Traceback (most recent call last):
  File "/tmp/ipython-input-2654049910.py", line 243, in <cell line: 0>
    items = llm_items_from_text(ch, block_id=f"B{bi}", n=ITEMS_PER_BLOCK)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-2654049910.py", line 128, in llm_items_from_text
    resp = client.chat.completions.create(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_utils/_utils.py", line 286, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/resources/chat/completions/completions.py", line 1147, in create
    return self._post(
           ^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_base_client.py", line 1259, in post
    return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

‚ö†Ô∏è Bloque 6: error de generaci√≥n ‚Üí Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}


Traceback (most recent call last):
  File "/tmp/ipython-input-2654049910.py", line 243, in <cell line: 0>
    items = llm_items_from_text(ch, block_id=f"B{bi}", n=ITEMS_PER_BLOCK)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-2654049910.py", line 128, in llm_items_from_text
    resp = client.chat.completions.create(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_utils/_utils.py", line 286, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/resources/chat/completions/completions.py", line 1147, in create
    return self._post(
           ^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_base_client.py", line 1259, in post
    return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

‚ö†Ô∏è Bloque 7: error de generaci√≥n ‚Üí Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}


Traceback (most recent call last):
  File "/tmp/ipython-input-2654049910.py", line 243, in <cell line: 0>
    items = llm_items_from_text(ch, block_id=f"B{bi}", n=ITEMS_PER_BLOCK)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-2654049910.py", line 128, in llm_items_from_text
    resp = client.chat.completions.create(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_utils/_utils.py", line 286, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/resources/chat/completions/completions.py", line 1147, in create
    return self._post(
           ^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_base_client.py", line 1259, in post
    return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

‚ö†Ô∏è Bloque 8: error de generaci√≥n ‚Üí Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}


Traceback (most recent call last):
  File "/tmp/ipython-input-2654049910.py", line 243, in <cell line: 0>
    items = llm_items_from_text(ch, block_id=f"B{bi}", n=ITEMS_PER_BLOCK)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-2654049910.py", line 128, in llm_items_from_text
    resp = client.chat.completions.create(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_utils/_utils.py", line 286, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/resources/chat/completions/completions.py", line 1147, in create
    return self._post(
           ^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_base_client.py", line 1259, in post
    return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

‚ö†Ô∏è Bloque 9: error de generaci√≥n ‚Üí Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}


Traceback (most recent call last):
  File "/tmp/ipython-input-2654049910.py", line 243, in <cell line: 0>
    items = llm_items_from_text(ch, block_id=f"B{bi}", n=ITEMS_PER_BLOCK)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-2654049910.py", line 128, in llm_items_from_text
    resp = client.chat.completions.create(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_utils/_utils.py", line 286, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/resources/chat/completions/completions.py", line 1147, in create
    return self._post(
           ^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_base_client.py", line 1259, in post
    return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

‚ö†Ô∏è Bloque 10: error de generaci√≥n ‚Üí Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}


Traceback (most recent call last):
  File "/tmp/ipython-input-2654049910.py", line 243, in <cell line: 0>
    items = llm_items_from_text(ch, block_id=f"B{bi}", n=ITEMS_PER_BLOCK)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-2654049910.py", line 128, in llm_items_from_text
    resp = client.chat.completions.create(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_utils/_utils.py", line 286, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/resources/chat/completions/completions.py", line 1147, in create
    return self._post(
           ^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_base_client.py", line 1259, in post
    return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

‚ö†Ô∏è Bloque 11: error de generaci√≥n ‚Üí Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}


Traceback (most recent call last):
  File "/tmp/ipython-input-2654049910.py", line 243, in <cell line: 0>
    items = llm_items_from_text(ch, block_id=f"B{bi}", n=ITEMS_PER_BLOCK)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-2654049910.py", line 128, in llm_items_from_text
    resp = client.chat.completions.create(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_utils/_utils.py", line 286, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/resources/chat/completions/completions.py", line 1147, in create
    return self._post(
           ^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_base_client.py", line 1259, in post
    return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

‚ö†Ô∏è Bloque 12: error de generaci√≥n ‚Üí Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}


Traceback (most recent call last):
  File "/tmp/ipython-input-2654049910.py", line 243, in <cell line: 0>
    items = llm_items_from_text(ch, block_id=f"B{bi}", n=ITEMS_PER_BLOCK)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-2654049910.py", line 128, in llm_items_from_text
    resp = client.chat.completions.create(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_utils/_utils.py", line 286, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/resources/chat/completions/completions.py", line 1147, in create
    return self._post(
           ^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_base_client.py", line 1259, in post
    return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

‚ö†Ô∏è Bloque 13: error de generaci√≥n ‚Üí Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}


Traceback (most recent call last):
  File "/tmp/ipython-input-2654049910.py", line 243, in <cell line: 0>
    items = llm_items_from_text(ch, block_id=f"B{bi}", n=ITEMS_PER_BLOCK)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-2654049910.py", line 128, in llm_items_from_text
    resp = client.chat.completions.create(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_utils/_utils.py", line 286, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/resources/chat/completions/completions.py", line 1147, in create
    return self._post(
           ^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/openai/_base_client.py", line 1259, in post
    return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

‚ö†Ô∏è Bloque 14: error de generaci√≥n ‚Üí Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
‚úÖ √çtems v√°lidos totales: 0
üì¶ XML: equilibrado_IA.xml
üìÑ PDF: equilibrado_IA.pdf





<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# =========================
#  Generaci√≥n por LOTES y objetivo total (p.ej., 200 preguntas)
#  - Pide ITEMS_PER_CALL √≠tems por llamada (p.ej., 20)
#  - Recorre los chunks del documento en round-robin
#  - Acumula hasta TARGET_ITEMS
#  - Exporta un √∫nico XML+PDF con todo
# =========================

# ‚öôÔ∏è Par√°metros de lote y objetivo
TARGET_ITEMS    = 200   # objetivo total del banco
ITEMS_PER_CALL  = 20    # tama√±o del lote por petici√≥n a la IA
ROUND_ROBIN     = True  # True: recorre los chunks en bucle; False: secuencial y se detiene al final
ALLOW_DUPLICATE_STEMS = False  # si False, se evita acumular enunciados repetidos

# (Opcional) reduce consumo si hace falta:
# ITEMS_PER_CALL = 10  # menos √≠tems por llamada
# TARGET_ITEMS   = 100 # banco m√°s peque√±o

assert 'chunks' in globals() and len(chunks) > 0, "No hay 'chunks' cargados. Sube antes tu documento y divide en chunks."
assert 'llm_items_from_text' in globals(), "Falta la funci√≥n llm_items_from_text (pega primero los helpers)."

collected = []
seen_stems = set()

def _norm_stem(s: str) -> str:
    return re.sub(r"\s+", " ", (s or "").strip().lower())

calls = 0
idx = 0
bar = tqdm(total=TARGET_ITEMS, desc="Acumulando √≠tems", unit="itm")

try:
    while len(collected) < TARGET_ITEMS:
        # Elegir chunk
        if idx >= len(chunks):
            if ROUND_ROBIN:
                idx = 0
            else:
                print("‚èπÔ∏è Fin de documento y objetivo no alcanzado.")
                break

        ch = chunks[idx]
        remaining = TARGET_ITEMS - len(collected)
        n_this_call = min(ITEMS_PER_CALL, remaining)

        try:
            # ‚õèÔ∏è Generar lote
            items = llm_items_from_text(ch, block_id=f"L{calls+1}", n=n_this_call)
        except RuntimeError as e:
            # p.ej., insufficient_quota (si usas mi helper con backoff)
            print(str(e))
            break
        except Exception as e:
            print(f"‚ö†Ô∏è Error en llamada {calls+1} (chunk {idx+1}):", e)
            traceback.print_exc()
            # Pasamos al siguiente chunk/lote
            idx += 1
            continue

        # Post-proceso y filtrado
        added_now = 0
        for it in items:
            it = balance_and_shuffle(it)
            if not validate_item(it):
                continue
            if not ALLOW_DUPLICATE_STEMS:
                key = _norm_stem(it.get("stem",""))
                if key in seen_stems:
                    continue
                seen_stems.add(key)

            collected.append(it)
            added_now += 1
            bar.update(1)
            if len(collected) >= TARGET_ITEMS:
                break

        # Avances
        calls += 1
        idx += 1

        # Si el chunk rinde poco, seguimos al siguiente; con ROUND_ROBIN True volveremos a √©l en una vuelta

finally:
    bar.close()
    print(f"‚úÖ Total acumulado: {len(collected)} √≠tems en {calls} llamadas")

# ‚úÇÔ∏è Ajustar exactamente al objetivo (por si nos pasamos en el √∫ltimo lote)
if len(collected) > TARGET_ITEMS:
    collected = collected[:TARGET_ITEMS]

# üóÇÔ∏è Exportar banco √∫nico
from datetime import datetime
stamp = datetime.now().strftime("%Y%m%d-%H%M%S")
xml_name = f"banco_IA_{len(collected)}_{stamp}.xml"
pdf_name = f"banco_IA_{len(collected)}_{stamp}.pdf"

xml_path = to_moodle_xml(collected, xml_path=xml_name)
pdf_path = to_pdf(collected, pdf_path=pdf_name)
print("üì¶ XML:", xml_path)
print("üìÑ PDF:", pdf_path)

# Descargas
from google.colab import files
files.download(xml_path)
files.download(pdf_path)
