In [1]:
import tkinter as tk
from tkinter import filedialog, messagebox
import pypdf
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist

nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)

def read_pdf(file_path):
    with open(file_path, 'rb') as file:
        reader = pypdf.PdfReader(file)
        text = ""
        for page in reader.pages:
            text += page.extract_text() + "\n"
    return text

def extract_topics(text, num_topics=5):
    words = word_tokenize(text.lower(), language='spanish')
    stop_words = set(stopwords.words('spanish'))
    word_freq = FreqDist(word for word in words if word not in stop_words and word.isalnum())
    return [word for word, _ in word_freq.most_common(num_topics)]

class PDFAnalyzerApp(tk.Tk):
    def __init__(self):
        super().__init__()
        self.title("Analizador de PDF - Demo")
        self.geometry("400x300")
        self.create_widgets()

    def create_widgets(self):
        tk.Label(self, text="Analizador de PDF", font=("Helvetica", 16)).pack(pady=10)
        tk.Button(self, text="Seleccionar PDF", command=self.analyze_pdf).pack(pady=10)
        self.result_text = tk.Text(self, height=10, width=40)
        self.result_text.pack(pady=10)
        tk.Button(self, text="Salir", command=self.quit).pack(pady=10)

    def analyze_pdf(self):
        file_path = filedialog.askopenfilename(filetypes=[("PDF files", "*.pdf")])
        if file_path:
            try:
                text = read_pdf(file_path)
                topics = extract_topics(text)
                result = "Temas principales:\n" + "\n".join(topics)
                self.result_text.delete(1.0, tk.END)
                self.result_text.insert(tk.END, result)
            except Exception as e:
                messagebox.showerror("Error", f"No se pudo procesar el archivo: {str(e)}")

if __name__ == "__main__":
    app = PDFAnalyzerApp()
    app.mainloop()