<a href="https://colab.research.google.com/github/Kevin-2099/resumen-ia-demo/blob/main/Resumidor_de_documentos_IA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ============================================
# Instalar dependencias (Colab o Spaces)
# ============================================

!pip install transformers pdfplumber gradio --quiet

# ============================================
# Importar librerías
# ============================================

from transformers import pipeline
import pdfplumber
import gradio as gr
import csv
import os
from datetime import datetime

# ============================================
# Cargar modelo Hugging Face
# ============================================

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# ============================================
# Guardar logs de resúmenes generados
# ============================================

def guardar_log(nombre_archivo, resumen):
    nombre_log = "resumenes_log.csv"
    resumen_corto = resumen[:120].replace("\n", " ")
    fila = [datetime.now().isoformat(), nombre_archivo, resumen_corto]
    existe = os.path.isfile(nombre_log)
    with open(nombre_log, mode="a", newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        if not existe:
            writer.writerow(["fecha", "archivo", "resumen"])
        writer.writerow(fila)

# ============================================
# Función principal para resumir PDFs
# ============================================

def resumir_archivo(archivo):
    texto = ""
    try:
        with pdfplumber.open(archivo.name) as pdf:
            for pagina in pdf.pages[:10]:  # Limitar a 10 páginas
                contenido = pagina.extract_text()
                if contenido:
                    texto += contenido + "\n"
    except:
        return "❌ Error: No se pudo procesar el archivo PDF."

    texto = texto.replace("\n", " ").strip()
    if len(texto) < 300:
        return "❌ El documento es demasiado corto para generar un resumen."

    # Dividir texto en chunks
    chunks = [texto[i:i+700] for i in range(0, len(texto), 700)][:5]  # Máx 5 chunks
    resumenes = summarizer(chunks, max_length=100, min_length=30, do_sample=False)
    resumen_total = "\n".join([r['summary_text'] for r in resumenes])

    with open("resumen_salida.txt", "w", encoding="utf-8") as f:
        f.write(resumen_total)

    guardar_log(archivo.name, resumen_total)
    return resumen_total, "resumen_salida.txt"

# ============================================
# Interfaz visual con Gradio
# ============================================

interface = gr.Interface(
    fn=resumir_archivo,
    inputs=gr.File(label="📄 Sube tu documento PDF (en español o inglés)"),
    outputs=[
        gr.Textbox(label="🧠 Resumen generado por IA"),
        gr.File(label="⬇️ Descargar resumen")
    ],
    title="📚 Resumen Inteligente de Documentos con IA",
    description="Sube un documento PDF y obtén un resumen automático de alta calidad usando el modelo BART de Facebook.",
    theme="compact"
)

# ============================================
# Lanzar la app (Colab o Hugging Face Spaces)
# ============================================

interface.launch(share=True)


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cpu

Sorry, we can't find the page you are looking for.


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://1bbacac3210a5571f2.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


