In [None]:
# @title Celda 0: Montar Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# @title Celda 1: Instalar Dependencias
!pip install -r /content/drive/MyDrive/mole_v1/requirements.txt

Collecting gradio (from -r /content/drive/MyDrive/mole_v1/requirements.txt (line 2))
  Downloading gradio-5.29.0-py3-none-any.whl.metadata (16 kB)
Collecting faiss-cpu (from -r /content/drive/MyDrive/mole_v1/requirements.txt (line 4))
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting PyMuPDF (from -r /content/drive/MyDrive/mole_v1/requirements.txt (line 10))
  Downloading pymupdf-1.25.5-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Collecting python-docx (from -r /content/drive/MyDrive/mole_v1/requirements.txt (line 11))
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio->-r /content/drive/MyDrive/mole_v1/requirements.txt (line 2))
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio->-r /content/drive/MyDrive/mole_v1/requirements.txt (line 2))
  Downloading fastapi-0.115.12-py3-none-any.whl.me

In [None]:
# -------------------------------
# 📦 CORE LOGIC FOR NABU RAG CHATBOT
# -------------------------------

import os
import re
import numpy as np
import docx
import fitz
import faiss
import torch
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM

In [None]:
# ✅ Estado global
embedder = SentenceTransformer("BAAI/bge-base-en-v1.5")
index = None
documents = []

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/777 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
# ✅ Hugging Face LLaMA 3
hf_token = "YOUR_HF_TOKEN"
llama3_model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

llama_tokenizer = AutoTokenizer.from_pretrained(llama3_model_id, use_auth_token=hf_token)
llama_model = AutoModelForCausalLM.from_pretrained(
    llama3_model_id,
    device_map="auto",
    torch_dtype=torch.float16,
    use_auth_token=hf_token
)



tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

In [None]:
# -------------------------------
# 📂 File Loading Utilities
# -------------------------------
def load_txt(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        return f.read()

def load_docx(file_path):
    doc = docx.Document(file_path)
    return "\n".join([p.text for p in doc.paragraphs])

def load_pdf(file_path):
    doc = fitz.open(file_path)
    return "\n".join([page.get_text() for page in doc])

def load_file(file_path):
    ext = os.path.splitext(file_path)[1].lower()
    if ext == ".txt": return load_txt(file_path)
    elif ext == ".docx": return load_docx(file_path)
    elif ext == ".pdf": return load_pdf(file_path)
    else: raise ValueError(f"❌ Unsupported format: {ext}")

In [None]:
# -------------------------------
# 🧹 Cleaning Function
# -------------------------------
def clean_paragraph(paragraph):
    if not paragraph.strip():
        return None
    paragraph = paragraph.strip()
    if re.search(r"(universidad|autor|traducci[oó]n|ISBN|©|derechos|publicado por|editorial|john c\. hull|miguel a[ná]ngel|pearson|atlacomulco|impreso en méxico|cámara nacional|registro)", paragraph.lower()):
        return None
    if sum(c.isupper() for c in paragraph) / max(1, len(paragraph)) > 0.5:
        return None
    if len(paragraph) < 100:
        return None
    return paragraph

In [None]:
# -------------------------------
# ⚙️ Main Processing Function
# -------------------------------
def process_uploaded_files(file_objs):
    global documents, index
    try:
        raw_docs = [load_file(f.name) for f in file_objs]
        clean_docs = []
        for doc in raw_docs:
            paragraphs = doc.split("\n")
            for p in paragraphs:
                cleaned = clean_paragraph(p)
                if cleaned:
                    clean_docs.append(cleaned)
        documents = clean_docs
        doc_embeddings = embedder.encode(clean_docs, convert_to_tensor=False)
        doc_embeddings_np = np.array(doc_embeddings).astype("float32")
        faiss.normalize_L2(doc_embeddings_np)
        embedding_dim = doc_embeddings_np.shape[1]
        index = faiss.IndexFlatIP(embedding_dim)
        index.add(doc_embeddings_np)
        return f"✅ Processing completed. Fragments loaded: {len(documents)}"
    except Exception as e:
        return f"❌ Error: {str(e)}"

In [None]:
# -------------------------------
# 🔍 Search Utility
# -------------------------------
def search_documents(query, k=3):
    prompt_query = "Represent this query for retrieval: " + query
    query_vec = embedder.encode([prompt_query], convert_to_tensor=False)
    query_vec = np.array(query_vec).astype("float32")
    faiss.normalize_L2(query_vec)
    scores, indices = index.search(query_vec, k)
    return [(documents[i], scores[0][j]) for j, i in enumerate(indices[0]) if i < len(documents)]

In [None]:
# -------------------------------
# 💬 Generation Functions
# -------------------------------
import re

def clean_model_output(raw_output):
    """
    Elimina cualquier texto residual de prompt o instrucciones del modelo.
    """
    # Quitar todo antes de '### Answer:' si existe
    cleaned = re.sub(r".*### Answer:", "", raw_output, flags=re.DOTALL)
    # Quitar posibles repeticiones de prompts anteriores
    cleaned = re.sub(r"You are an .*?### Answer:", "", cleaned, flags=re.DOTALL)
    return cleaned.strip()

def generate_response_insightful(context, question, max_new_tokens=600):
    prompt = (
        "Eres un asistente bilingüe (español e inglés), experto y pedagógico.\n"
        "⚠️ Siempre responde **solo** en el idioma de la pregunta.\n"
        "- Si la pregunta está en español, responde **únicamente** en español.\n"
        "- Si la pregunta está en inglés, responde **únicamente** in English.\n"
        "No hagas traducciones, ni mezcles idiomas.\n\n"
        "### Context:\n"
        f"{context}\n\n"
        "### Question:\n"
        f"{question}\n\n"
        "### Instructions:\n"
        "- Explicación clara y detallada.\n"
        "- Usa ejemplos o analogías prácticas cuando corresponda.\n"
        "- Evita jerga técnica innecesaria; explica términos complejos.\n"
        "- Concluye de forma natural.\n\n"
        "### Answer:"
    )
    return _generate_llama3_response(prompt, max_new_tokens)

def generate_response_expert(context, question, max_new_tokens=600):
    prompt = (
        "Eres un asistente bilingüe (español e inglés), de nivel experto técnico.\n"
        "⚠️ Responde **solo** en el idioma de la pregunta recibida.\n"
        "- Si la pregunta está en español, responde **únicamente** en español.\n"
        "- If the question is in English, answer **only** in English.\n"
        "No mezcles idiomas ni ofrezcas traducciones.\n\n"
        "### Context:\n"
        f"{context}\n\n"
        "### Question:\n"
        f"{question}\n\n"
        "### Instructions:\n"
        "- Tono profesional y riguroso.\n"
        "- Emplea términos técnicos y definiciones avanzadas.\n"
        "- Asume que el usuario tiene nivel intermedio-avanzado.\n"
        "- Finaliza con un resumen claro.\n\n"
        "### Answer:"
    )
    return _generate_llama3_response(prompt, max_new_tokens)

def _generate_llama3_response(prompt, max_new_tokens):
    inputs = llama_tokenizer(prompt, return_tensors="pt").to(llama_model.device)
    output = llama_model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        temperature=0.4,
        top_p=0.9,
        do_sample=True,
        pad_token_id=llama_tokenizer.eos_token_id,
        eos_token_id=llama_tokenizer.eos_token_id,
    )
    decoded = llama_tokenizer.decode(output[0], skip_special_tokens=True)
    return clean_model_output(decoded)



In [None]:
import re
import traceback
import logging

def generate_academic_summary(query: str, chat_history: list, context: str) -> str:
    """
    Generates an academic summary using llama_model.generate, with corrected parameters.
    """
    logger = logging.getLogger(__name__)
    logger.info("--- Running generate_academic_summary (corrected version) ---")

    # Initial checks
    if 'llama_model' not in globals() or 'llama_tokenizer' not in globals():
        logger.error("Model or tokenizer not initialized")
        return "Critical error: Model or tokenizer not initialized."

    if not isinstance(context, str) or len(context) < 50:
        logger.error(f"Invalid context. Type: {type(context)}, Length: {len(context)}")
        return "Error: Invalid or too short context."

    # Obtain the model’s ACTUAL limits
    model_max_length = llama_model.config.max_position_embeddings  # Real model limit
    max_new_tokens_target = 1024
    safe_max_length = model_max_length - max_new_tokens_target - 50  # Generous safety margin

    # Build the prompt
    system_prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are an expert in academic synthesis and critical analysis. Your task is to generate a structured, graduate-level summary **strictly** based on the PROVIDED DOCUMENT CONTEXT. You must not use external knowledge or information not present in the text.
You speak Spanish and English fluently. Always respond in the same language in which the question is asked.
# Essential Guidelines:
1. **Deep Analysis**: Identify the central arguments, methodologies, results, and conclusions presented in the context. Look for relationships and patterns.
2. **Logical Structure**: Organize the summary with a clear introduction, coherent thematic development (using numbered sections with descriptive titles), and well-supported conclusions.
3. **Rigorous Style**: Use formal, precise academic language. Implicitly cite ideas by referring to the context content. Clearly distinguish between content description and your critical analysis.
"""
    user_prompt = f"""<|start_header_id|>user<|end_header_id|>
[DOCUMENT CONTEXT]
{context}

[SUMMARY REQUIREMENT]
{query}

[FINAL INSTRUCTIONS]
1. Generate an academic summary of approximately 500–700 words at a graduate level, based **solely** on the above [DOCUMENT CONTEXT].
2. Structure the summary using numbered sections with descriptive titles (Begin **exactly** with '1. Introduction').
3. In the Critical Analysis section, briefly evaluate the strengths or limitations of the arguments/methodologies presented *in the context*.
4. In the Conclusions, synthesize the main points and mention practical implications or knowledge gaps identified *within the context*.
5. **DO NOT** generate a “Concept Map” as a list or text.
6. Ensure **ALL** content in the summary derives directly from the [DOCUMENT CONTEXT]. Do not fabricate information or make unsupported generalizations.
7. Begin your response **immediately after** the marker `<|start_header_id|>assistant<|end_header_id|>` with the section '1. Introduction'. Do not repeat any instructions or the prior context.<|eot_id|>"""

    assistant_prompt_marker = "<|start_header_id|>assistant<|end_header_id|>\n"
    full_prompt = system_prompt + user_prompt + assistant_prompt_marker

    try:
        # Tokenization with model-based limits
        inputs = llama_tokenizer(
            full_prompt,
            return_tensors="pt",
            truncation=True,
            max_length=safe_max_length
        ).to(llama_model.device)

        input_token_count = inputs['input_ids'].shape[1]
        logger.info(f"Processed tokens: {input_token_count}/{model_max_length}")

        # Dynamic calculation of new tokens
        remaining_tokens = model_max_length - input_token_count
        actual_max_new_tokens = min(max_new_tokens_target, remaining_tokens - 10)

        if actual_max_new_tokens < 50:
            logger.error("Document too long to generate summary")
            return "Error: The document exceeds the maximum length limit."

        # Generation with safe parameters
        output_sequences = llama_model.generate(
            input_ids=inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=actual_max_new_tokens,
            do_sample=True,
            temperature=0.4,
            top_p=0.9,
            pad_token_id=llama_tokenizer.eos_token_id,
            eos_token_id=llama_tokenizer.eos_token_id,
            repetition_penalty=1.1
        )

        # Decode the output
        generated_text = llama_tokenizer.decode(output_sequences[0], skip_special_tokens=False)

        # Extract content after the assistant marker
        marker = "<|start_header_id|>assistant<|end_header_id|>"
        marker_pos = generated_text.rfind(marker)

        if marker_pos != -1:
            summary = generated_text[marker_pos + len(marker):].strip()
            summary = re.sub(r'<\|.*?\|>', '', summary)  # Clean special tokens
        else:
            logger.warning("Marker not found, using full output")
            summary = generated_text

        # Final cleaning
        summary = re.sub(r'\n{3,}', '\n\n', summary)  # Normalize line breaks
        summary = re.sub(r'\[.*?\]', '', summary)     # Remove bracketed notes
        summary = summary.split('<|eot_id|>')[0].strip()

        # Minimum content validation
        if len(summary.split()) < 100:
            raise ValueError("Summary too short or incomplete")

        logger.info(f"Summary generated successfully ({len(summary.split())} words)")
        return summary

    except Exception as e:
        # Now traceback is defined, so we log it properly
        error_type = type(e).__name__
        error_trace = traceback.format_exc()
        logger.error(f"Generation error ({error_type}): {str(e)}\nTrace: {error_trace}")
        return f"Error: Could not generate the summary. ({error_type}: {str(e)})"


In [None]:
# -------------------------------
# 🎚️ Mode Switcher
# -------------------------------
def generate_response_mode_switcher(context, question, mode, max_new_tokens=600):
    if mode == "Insightful":
        return generate_response_insightful(context, question, max_new_tokens)
    elif mode == "Expert":
        return generate_response_expert(context, question, max_new_tokens)
    else:
        raise ValueError(f"❌ Unknown mode selected: {mode}")

In [None]:
# -------------------------------
# 🔗 Chatbot Flow (for Gradio) - MODIFIED RETURN VALUE
# -------------------------------
def rag_chatbot_conversation(message, history, mode):
    """
    Handles the chatbot conversation flow for Gradio.
    Takes user message, history (list of tuples), and mode.
    Returns ONLY the bot's response string.
    """
    print(f"\n[Chatbot Flow] Received Message: '{message}', Mode: {mode}")
    # El historial 'history' es recibido pero no se usa directamente para generar la respuesta RAG.
    # Su formato (lista de tuplas) es manejado por Gradio internamente ahora.

    # Primero, verificar si el índice está listo (después de procesar archivos)
    global index, documents
    if index is None or not documents:
        print("❌ Error: El índice FAISS o los documentos no están listos. Procesa archivos primero.")
        return "⚠️ Por favor, primero carga y procesa documentos antes de chatear."

    try:
        print("[Chatbot Flow] Searching documents...")
        # Llama a la función de búsqueda del backend V2
        top_fragments = search_documents(message, k=3) # k=3 definido en V2

        if not top_fragments:
            print("[Chatbot Flow] No relevant fragments found.")
            # Informar al LLM que no hay contexto específico
            combined_context = "No relevant context found in the documents."
        else:
            print(f"[Chatbot Flow] Found {len(top_fragments)} fragments.")
            # Combinar el texto de los fragmentos encontrados
            combined_context = "\n\n".join([frag for frag, score in top_fragments])
            # Opcional: registrar puntuaciones
            # scores_str = ", ".join([f"{score:.4f}" for _, score in top_fragments])
            # print(f"[Chatbot Flow] Fragment scores: {scores_str}")

        print("[Chatbot Flow] Generating response...")
        # Llama a la función de generación del backend V2 con el modo y contexto
        response = generate_response_mode_switcher(combined_context, message, mode)
        print(f"[Chatbot Flow] Generated Response: {response[:100]}...") # Log inicio de respuesta

        # --- MODIFICACIÓN CLAVE ---
        # Devolver SOLAMENTE la cadena de texto de la respuesta del bot
        return response
        # --- FIN MODIFICACIÓN CLAVE ---

    except Exception as e:
        # Capturar cualquier error durante el proceso y devolver un mensaje de error al usuario
        import traceback
        print(f"❌ Error interno en rag_chatbot_conversation: {str(e)}\n{traceback.format_exc()}")
        # Devolver un mensaje de error como respuesta del bot
        return f"🤖 ❌ Lo siento, ocurrió un error interno al procesar tu solicitud: {str(e)}"

In [None]:
# %% ==============================================
# %% ✅ Celda 4: Interfaz Gradio para Mole V2 (logo inline en Base64)
# %% ==============================================

import os
import base64
import logging
import traceback          # <-- import necesario para la función de resumen
import gradio as gr

# —————————————————————————————————————————
# Asegúrate de haber ejecutado antes las celdas del backend V2,
# donde defines:
#   process_uploaded_files, rag_chatbot_conversation,
#   generate_academic_summary, embedder, llama_model,
#   search_documents, generate_response_insightful,
#   generate_response_expert, _generate_llama3_response, y la lista global documents.
# —————————————————————————————————————————

# —————————————————————————————————————————
# Codifica tu logo.png a Base64 para incrustarlo
# —————————————————————————————————————————
logo_path = "/content/drive/MyDrive/mole_v1/logo.png"
if not os.path.isfile(logo_path):
    raise FileNotFoundError(f"No encontré el archivo de logo en: {logo_path}")
with open(logo_path, "rb") as f:
    logo_b64 = base64.b64encode(f.read()).decode("utf-8")

# —————————————————————————————————————————
# Ahora sí definimos el HTML con el Base64
# —————————————————————————————————————————
logo_img_html = f'<img src="data:image/png;base64,{logo_b64}" class="mole-logo" alt="Mole Logo" />'

# —————————————————————————————————————————
# CSS y tema personalizado
# —————————————————————————————————————————
COLOR_BACKGROUND_GRADIENT = "linear-gradient(135deg, #3D2C8D 0%, #6A1B9A 50%, #9C27B0 100%)"
COLOR_PRIMARY_ACCENT       = "#D81B60"
COLOR_TEXT_ON_DARK_BG      = "#EDE7F6"
COLOR_CONTAINER_BACKGROUND = "#FFFFFF"
COLOR_BORDER_LIGHT         = "#DDDDDD"

custom_css_v2_compact = f"""
body, .gradio-container {{
    background: {COLOR_BACKGROUND_GRADIENT} !important;
    color: {COLOR_TEXT_ON_DARK_BG} !important;
    font-family: 'Inter', sans-serif !important;
    height: 100vh !important;
    overflow-y: hidden !important;
}}
.gradio-container {{
    max-width: 100vw !important;
    margin: 0 !important;
    padding: 0.5rem !important;
    height: calc(100vh - 1rem) !important;
}}
.column-container {{ height: calc(100vh - 100px) !important; }}
.left-column-group {{ display: flex; flex-direction: column; height: 100%; }}
.left-column-group > .gr-form {{ flex-grow: 1; display: flex; flex-direction: column; }}
.left-column-group > .gr-form > .summary-section {{ margin-top: auto; }}

/* Header con logo */
.mole-header {{
    position: fixed !important;
    top: 0;
    left: 0;
    width: 100% !important;
    z-index: 1000 !important;
    margin: 0 !important;
    padding: 5px 10px !important;
    background: transparent !important;
}}
.mole-logo {{
    height: 145px !important;
    width: auto !important;
    filter: drop-shadow(0 2px 4px rgba(0,0,0,0.1));
}}
.header-text {{ display: flex; flex-direction: column; gap: 3px; }}
.mole-slogan {{
    font-size: 1.0em !important;
    opacity: 0.85 !important;
    color: {COLOR_TEXT_ON_DARK_BG} !important;
    margin: 0 !important;
    font-style: italic;
}}

.gr-panel, .gr-group {{
    background-color: {COLOR_CONTAINER_BACKGROUND} !important;
    border-radius: 8px !important;
    border: 1px solid {COLOR_BORDER_LIGHT} !important;
    box-shadow: 0 2px 4px rgba(0,0,0,0.06) !important;
    padding: 10px !important;
    margin: 5px 0 !important;
}}
.gr-button-primary {{
    background: {COLOR_PRIMARY_ACCENT} !important;
    border-color: {COLOR_PRIMARY_ACCENT} !important;
    color: #FFFFFF !important;
}}
@media (max-width: 700px) {{
    .column-container {{ height: auto !important; }}
    .mole-logo {{ height: 35px !important; }}
    .mole-slogan {{ font-size: 0.8em !important; }}
}}
"""

theme_v2_compact = gr.themes.Base(
    primary_hue    = gr.themes.colors.pink,
    secondary_hue  = gr.themes.colors.purple,
    neutral_hue    = gr.themes.colors.gray,
    font           = [gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui"]
)

# —————————————————————————————————————————
# Wrapper para el resumen académico (sin tocar)
# —————————————————————————————————————————
def trigger_academic_summary_wrapper():
    global documents
    logger = logging.getLogger(__name__)
    if 'documents' not in globals() or not documents:
        return "Error: No document processed"
    try:
        full = "\n\n".join(documents)
        if len(full) > 30000:
            full = full[:30000]
        return generate_academic_summary(
            query="Generate a detailed and structured academic summary of the following content.",
            chat_history=[],
            context=full
        )
    except Exception as e:
        logger.error(f"Resume error: {e}")
        return "Intern error"

# —————————————————————————————————————————
# Construcción de la interfaz Gradio
# —————————————————————————————————————————
with gr.Blocks(
    theme=theme_v2_compact,
    css=custom_css_v2_compact,
    title="Mole V2 – Dig into It!"
) as demo_v2_compact:

    # Header con logo inline
    with gr.Row():
        gr.HTML(f"""
        <div class="mole-header">
            {logo_img_html}
            <div class="header-text">
                <!-- opcional: podrías poner aquí un slogan -->
            </div>
        </div>
        """)

    # Layout principal: izquierda (carga + resumen), derecha (chat)
    with gr.Row(equal_height=True, elem_classes="layout-row"):

        # Columna izquierda: carga y resumen
        with gr.Column(scale=1, min_width=300, elem_classes="column-container"):
            with gr.Group(elem_classes="left-column-group"):
                file_input_v2 = gr.File(
                    label="Upload files (.txt, .pdf, .docx)",
                    file_count="multiple"
                )
                process_btn_v2 = gr.Button("🚀 Process", variant="primary")
                status_output_v2 = gr.Textbox(
                    label="Progress",
                    lines=3, interactive=False, max_lines=3
                )
                with gr.Group(elem_classes="summary-section"):
                    gr.Markdown(
                        "### 3. Academic Resume",
                        elem_classes="markdown-section-title"
                    )
                    summary_btn = gr.Button("📚 Resume!", variant="secondary", size="sm")
                    summary_output = gr.Textbox(
                        label="Resume:",
                        interactive=False, lines=10, max_lines=20,
                        elem_classes="summary-textbox"
                    )

        # Columna derecha: chat RAG
        with gr.Column(scale=2, min_width=600, elem_classes="column-container"):
            with gr.Group(elem_classes="right-column-chat-interface"):
                gr.Markdown(
                    "### 2. Chat with Mole",
                    elem_classes="markdown-section-title"
                )
                chat_interface_v2_compact = gr.ChatInterface(
                    fn=rag_chatbot_conversation,
                    additional_inputs_accordion=gr.Accordion("⚙️ Mode", open=False),
                    additional_inputs=[
                        gr.Radio(
                            choices=["Insightful", "Expert"],
                            value="Insightful",
                            label=None, show_label=False,
                            elem_classes="mode-radio-buttons"
                        )
                    ],
                    chatbot=gr.Chatbot(
                        label="Chat", height=400,
                        show_label=False, elem_id="mole_chatbot",
                        type='messages'
                    ),
                    textbox=gr.Textbox(
                        placeholder="Escribe tu pregunta aquí...",
                        show_label=False, lines=1, max_lines=3
                    ),
                    examples=[
                        ["What is the main topic of the document?", "Insightful"],
                        ["Explain to me in a few words what the document is about?", "Insightful"],
                        ["What are the main concepts of the document? List them.", "Expert"]
                    ],
                    cache_examples=False,
                )

    # Callbacks
    process_btn_v2.click(
        fn=process_uploaded_files,
        inputs=file_input_v2,
        outputs=status_output_v2,
        api_name="process_documents_v2_compact"
    )
    summary_btn.click(
        fn=trigger_academic_summary_wrapper,
        inputs=None,                   # función sin argumentos
        outputs=summary_output,        # un solo componente de salida
        api_name="generate_summary_v2_logged"
    )

# —————————————————————————————————————————
# Lanzamiento de la app
# —————————————————————————————————————————
if __name__ == "__main__":
    demo_v2_compact.launch(share=True, debug=False, show_error=True)





Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://6e204fdc499a86106c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
