In [None]:
!pip install beautifulsoup4 lxml reportlab --quiet


In [None]:
# =========================
#  Equilibrar bancos Moodle XML y generar PDF
# =========================
from bs4 import BeautifulSoup
from bs4.element import Tag
from google.colab import files
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet
from lxml import etree
import random, traceback

# ===== Par√°metros =====
THRESH_DIFF = 4  # si la correcta supera por >4 palabras, extendemos la(s) corta(s)
RANDOM_SEED = 42  # usa None para aleatoriedad no determinista
SHUFFLE_ANSWERS = True
SHUFFLE_QUESTIONS = False  # reordenar preguntas (recomendado False si el XML tiene categor√≠as)

# Extensiones sem√°nticas realistas (elige una al azar para no repetir siempre la misma)
EXTENSIONS_POOL = [
    " Este matiz ha sido descrito en investigaciones sobre lectura, priming y competencia l√©xica.",
    " Tal patr√≥n se observa en tareas de decisi√≥n l√©xica y paradigmas de facilitaci√≥n enmascarada.",
    " La literatura reporta efectos consistentes en estudios ERP y medidas oculomotoras.",
    " Se ha documentado en poblaciones biling√ºes y en lectores con diferentes niveles de proficiencia.",
    " Los resultados son congruentes con modelos de activaci√≥n competitiva y control inhibitorio."
]

if RANDOM_SEED is not None:
    random.seed(RANDOM_SEED)

# ===== Subir archivo =====
print("üìÅ Selecciona tu XML Moodle (por ejemplo: EFECTOS_proc_filtrado_longitud.xml)")
uploaded = files.upload()
SRC = list(uploaded.keys())[0]
print(f"‚úÖ Cargado: {SRC}")

with open(SRC, "r", encoding="utf-8") as f:
    xml_in = f.read()

# ===== Helpers =====
def get_text_or_empty(tag: Tag) -> str:
    if tag is None:
        return ""
    tnode = tag.find("text")
    if tnode is not None:
        return tnode.get_text(" ", strip=True)
    return tag.get_text(" ", strip=True)

def set_text_safely(soup, parent_tag: Tag, new_text: str):
    """Garantiza que exista un hijo <text> y pone el contenido ah√≠."""
    txt = parent_tag.find("text")
    if txt is None:
        txt = soup.new_tag("text")
        parent_tag.clear()
        parent_tag.append(txt)
    else:
        txt.clear()
    txt.append(new_text)

def wcount(s: str) -> int:
    return len([w for w in s.replace("\n"," ").split(" ") if w.strip()])

# ===== Procesar =====
soup = BeautifulSoup(xml_in, "xml")

# Opcional: reordenar preguntas completas
if SHUFFLE_QUESTIONS:
    all_questions = soup.find_all("question", {"type":"multichoice"})
    # reconstruimos el orden dentro del <quiz> manteniendo categor√≠as
    parent_quiz = soup.find("quiz")
    if parent_quiz and all_questions:
        # Quitamos temporalmente todas para barajar
        for q in all_questions:
            q.extract()
        random.shuffle(all_questions)
        for q in all_questions:
            parent_quiz.append(q)

questions = soup.find_all("question", {"type":"multichoice"})
print("üîé multichoice encontradas:", len(questions))

changed_q = 0
skipped_q = 0
for q in questions:
    try:
        # Respuestas directas; si no, b√∫squeda amplia
        answers = q.find_all("answer", recursive=False) or q.find_all("answer")
        if len(answers) != 3:
            skipped_q += 1
            continue

        # √çndice de la correcta
        correct_idx = next((i for i,a in enumerate(answers)
                            if a.get("fraction","0").strip() in ("100","100.0")), None)
        if correct_idx is None:
            skipped_q += 1
            continue

        # Longitudes y extensi√≥n de las cortas
        lens = [wcount(get_text_or_empty(a)) for a in answers]
        correct_len = lens[correct_idx]
        extended_here = False
        for i, a in enumerate(answers):
            if i == correct_idx:
                continue
            diff = correct_len - lens[i]
            if diff > THRESH_DIFF:
                base = get_text_or_empty(a)
                ext = random.choice(EXTENSIONS_POOL)
                set_text_safely(soup, a, base + ext)
                extended_here = True
        if extended_here:
            changed_q += 1

        # Reordenar opciones reconstruyendo (seguro)
        if SHUFFLE_ANSWERS:
            payloads = [(get_text_or_empty(a), i==correct_idx) for i, a in enumerate(answers)]
            random.shuffle(payloads)
            for old in list(q.find_all("answer")):
                old.decompose()
            for a_txt, is_correct in payloads:
                new_ans = soup.new_tag("answer")
                new_ans.attrs["fraction"] = "100" if is_correct else "0"
                tnode = soup.new_tag("text"); tnode.string = a_txt
                new_ans.append(tnode)
                q.append(new_ans)

    except Exception as e:
        skipped_q += 1
        print("‚ö†Ô∏è  Error al procesar una pregunta:", e)
        traceback.print_exc()

print(f"‚úçÔ∏è Preguntas con extensi√≥n aplicada: {changed_q} | ‚è≠Ô∏è omitidas: {skipped_q}")

# ===== Guardar XML (robusto con lxml) =====
xml_str = str(soup)
parser = etree.XMLParser(recover=True)
root = etree.fromstring(xml_str.encode("utf-8"), parser=parser)
xml_bytes = etree.tostring(root, encoding="utf-8", xml_declaration=True, pretty_print=True)

OUT_XML = "equilibrado.xml"
with open(OUT_XML, "wb") as f:
    f.write(xml_bytes)
print("üì¶ XML guardado:", OUT_XML)

# ===== Generar PDF =====
from bs4 import BeautifulSoup as BS2
with open(OUT_XML, "rb") as f:
    soup2 = BS2(f.read(), "xml")

pdf_path = "equilibrado.pdf"
doc = SimpleDocTemplate(pdf_path, pagesize=A4)
styles = getSampleStyleSheet()
story = [Paragraph("<b>Cuestionario equilibrado</b>", styles["Title"]), Spacer(1,10)]

printed = 0
for idx, q in enumerate(soup2.find_all("question", {"type":"multichoice"}), start=1):
    qtxt = get_text_or_empty(q.find("questiontext")) or "Sin enunciado"
    story.append(Paragraph(f"<b>{idx}. {qtxt}</b>", styles["Normal"]))
    for i, a in enumerate(q.find_all("answer")):
        mark = " ‚úÖ" if a.get("fraction","0").strip() in ("100","100.0") else ""
        story.append(Paragraph(f"{chr(97+i)}) {get_text_or_empty(a)}{mark}", styles["Normal"]))
    story.append(Spacer(1,6))
    printed += 1

doc.build(story)
print("üìÑ PDF generado:", pdf_path, "| preguntas impresas:", printed)

# ===== Descargas =====
files.download(OUT_XML)
files.download(pdf_path)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Carpeta destino en tu Drive
DEST = "/content/drive/MyDrive/MoodleXML/"
import os, shutil
os.makedirs(DEST, exist_ok=True)
shutil.copy("equilibrado.xml", DEST)
shutil.copy("equilibrado.pdf", DEST)
print("‚úÖ Copiados a", DEST)
