In [1]:
%pip install PyPDF2

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [6]:
import PyPDF2
import re
from googletrans import Translator
import pyttsx3
from pathlib import Path
import tkinter as tk
from tkinter import filedialog, messagebox, scrolledtext, simpledialog
from tkinter import ttk
from google.cloud import translate_v2 as translate

root = tk.Tk()  # Ensure this is at the beginning of your GUI code
root.title("PDF Bot")
root.geometry("800x600")

annotations = {}  # A dictionary to store annotations. Format: {page_num: [(start_idx, end_idx, annotation)]}

def annotate_text():
    try:
        selected_text = results_text.get(tk.SEL_FIRST, tk.SEL_LAST).strip()
        if not selected_text:
            raise ValueError("No text selected")
        annotation = simpledialog.askstring("Annotate", "Enter your annotation:")
        if annotation:
            page_num = int(simpledialog.askstring("Page Number", "Enter the page number of the selected text:"))
            start_idx = results_text.index(tk.SEL_FIRST)
            end_idx = results_text.index(tk.SEL_LAST)
            add_annotation(page_num, start_idx, end_idx, annotation)
    except (tk.TclError, ValueError):
        messagebox.showinfo("PDF Bot", "Please select some text to annotate first.")

def add_annotation(page_num, start_idx, end_idx, annotation):
    if page_num not in annotations:
        annotations[page_num] = []
    annotations[page_num].append((start_idx, end_idx, annotation))
    messagebox.showinfo("Annotation", "Annotation added successfully!")

def save_annotations():
    file_path = filedialog.asksaveasfilename(defaultextension=".txt", filetypes=[("Text Files", "*.txt"), ("All Files", "*.*")])
    if file_path:
        with open(file_path, 'w', encoding='utf-8') as file:
            for page, annotation_list in annotations.items():
                file.write(f"Page {page} Annotations:\n")
                for start_idx, end_idx, annotation in annotation_list:
                    file.write(f"  Text ({start_idx}-{end_idx}): {annotation}\n")
                file.write("\n")
        messagebox.showinfo("Success", "Annotations saved successfully!")

# Add buttons for annotation and saving
ttk.Button(root, text="Annotate Selected Text", command=annotate_text).grid(row=4, column=0, pady=10)
ttk.Button(root, text="Save Annotations", command=save_annotations).grid(row=4, column=1, pady=10)


def translate_text(text, target_language="en"):
    """Translates selected text to the specified target language."""
    try:
        translator = Translator()
        translated = translator.translate(text, dest=target_language)
        return translated.text
    except Exception as e:
        print(f"Error during translation: {e}")  # Debug log
        return f"Error: {e}"




def read_pdf(file_path):
    try:
        pdf_content_by_page = []
        with open(file_path, 'rb') as pdf_file:
            reader = PyPDF2.PdfReader(pdf_file)
            for page_num, page in enumerate(reader.pages):
                page_text = page.extract_text() or "[No text found on this page]"
                pdf_content_by_page.append((page_num + 1, page_text))
            return pdf_content_by_page
    except Exception as e:
        return f"Error: {e}"



def get_pdf_metadata(file_path):
    try:
        with open(file_path, 'rb') as pdf_file:
            reader = PyPDF2.PdfReader(pdf_file)
            return reader.metadata
    except Exception as e:
        return f"Error: {e}"

def highlight_query(snippet, query):
    return re.sub(f"({re.escape(query)})", r"\033[1;32m\1\033[0m", snippet, flags=re.IGNORECASE)

def search_query(content_by_page, query):
    results = []
    for page_num, page_text in content_by_page:
        matches = [m.start() for m in re.finditer(re.escape(query), page_text, re.IGNORECASE)]
        for start_idx in matches:
            snippet = page_text[max(0, start_idx - 100):start_idx + 400]
            highlighted_snippet = highlight_query(snippet, query)
            results.append((page_num, highlighted_snippet))
    return results

def save_results(results):
    file_path = filedialog.asksaveasfilename(defaultextension=".txt", filetypes=[("Text Files", "*.txt"), ("All Files", "*.*")])
    if file_path:
        with open(file_path, 'w', encoding='utf-8') as file:
            for i, (page_num, snippet) in enumerate(results, 1):
                file.write(f"Result {i} (Page {page_num}):\n{snippet}...\n\n")
        messagebox.showinfo("Success", "Search results saved successfully.")

def read_aloud(text):
    try:
        if not text.strip():
            messagebox.showinfo("PDF Bot", "No text available to read aloud.")
            return
        engine = pyttsx3.init()
        engine.setProperty('rate', 150)  # Speed of speech
        engine.setProperty('volume', 0.9)  # Volume (0.0 to 1.0)
        voices = engine.getProperty('voices')
        engine.setProperty('voice', voices[0].id)  # Use the first available voice
        engine.say(text)
        engine.runAndWait()
    except Exception as e:
        messagebox.showerror("Error", f"Text-to-Speech failed: {e}")

root = tk.Tk()
root.title("Enhanced PDF Chatbot")
root.geometry("1000x800")
root.configure(bg="#f0f0f0")

history_text = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=30, height=10)
history_text.grid(row=5, column=2, rowspan=2, padx=10, pady=10)
history_text.insert(tk.END, "Search History:")

results_text = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=90, height=20)
results_text.grid(row=5, column=0, columnspan=2, padx=10, pady=10)

metadata_text = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=90, height=8)
metadata_text.grid(row=6, column=0, columnspan=2, padx=10, pady=10)


def handle_translation():
    """Handles the translation of selected text."""
    try:
        selected_text = results_text.get(tk.SEL_FIRST, tk.SEL_LAST).strip()
        if not selected_text:
            raise ValueError("No text selected")
        print(f"Selected Text: {selected_text}")  # Debug log
    except (tk.TclError, ValueError):
        messagebox.showinfo("PDF Bot", "Please select some text first to translate.")
        return

    target_language = simpledialog.askstring("Translate", "Enter target language (e.g., 'fr' for French):")
    if target_language:
        translated_text = translate_text(selected_text, target_language)
        results_text.insert(tk.END, f"\nTranslated Text:\n{translated_text}\n")





def show_message(title, message, dark_mode):
    popup = tk.Toplevel()
    popup.title(title)
    popup.geometry("300x150")
    popup.configure(bg="#2e2e2e" if dark_mode else "#f0f0f0")

    label = tk.Label(popup, text=message, bg="#2e2e2e" if dark_mode else "#f0f0f0", fg="white" if dark_mode else "black", font=("Arial", 12))
    label.pack(pady=20)

    button = ttk.Button(popup, text="OK", command=popup.destroy)
    button.pack(pady=10)

def summarize_content(content_by_page):
    """Summarize the content by extracting the first few lines from each page."""
    summary = []
    for page_num, page_text in content_by_page:
        lines = page_text.split('\n')
        summary.append(f"Page {page_num} Summary:\n" + "\n".join(lines[:5]) + "\n\n")  # Take first 5 lines as summary
    return "".join(summary)

def download_summary(content_by_page):
    """Save the summary to a text file."""
    if not content_by_page:
        messagebox.showerror("Error", "No valid PDF content loaded to summarize.")
        return
    summary = summarize_content(content_by_page)
    file_path = filedialog.asksaveasfilename(defaultextension=".txt", filetypes=[("Text Files", "*.txt"), ("All Files", "*.*")])
    if file_path:
        with open(file_path, 'w', encoding='utf-8') as file:
            file.write(summary)
        messagebox.showinfo("Success", "Summary saved successfully.")



def chat_with_pdf(content_by_page):
    search_history = []
    search_results = []
    root = tk.Tk()
    root.title("Enhanced PDF Chatbot")
    root.geometry("1000x800")
    root.configure(bg="#f0f0f0")

    style = ttk.Style()
    style.configure("TButton", font=("Arial", 12), padding=5)
    style.configure("TLabel", font=("Arial", 12), background="#f0f0f0")
    ttk.Button(root, text="Translate Selected Text", command=handle_translation).grid(row=4, column=0, pady=10)

    
    dark_mode_var = tk.BooleanVar()
    def toggle_dark_mode():
        if dark_mode_var.get():
            root.configure(bg="#2e2e2e")
            style.configure("TLabel", background="#2e2e2e", foreground="white")
            results_text.configure(bg="#2e2e2e", fg="white", insertbackground="white")
            history_text.configure(bg="#2e2e2e", fg="white", insertbackground="white")
            metadata_text.configure(bg="#2e2e2e", fg="white", insertbackground="white")
        else:
            root.configure(bg="#f0f0f0")
            style.configure("TLabel", background="#f0f0f0", foreground="black")
            results_text.configure(bg="white", fg="black", insertbackground="black")
            history_text.configure(bg="white", fg="black", insertbackground="black")
            metadata_text.configure(bg="white", fg="black", insertbackground="black")



    def handle_search():
        nonlocal search_results
        query = query_entry.get()
        if not query:
            messagebox.showinfo("PDF Bot", "Please enter a query.")
            return
        if not content_by_page:
            messagebox.showerror("Error", "No valid PDF content loaded.")
            return
        search_results = search_query(content_by_page, query)
        results_text.delete(1.0, tk.END)
        if search_results:
            search_history.append(query)
            history_text.delete(1.0, tk.END)
            history_text.insert(tk.END, "Search History:\n" + "\n".join(search_history))
            for i, (page_num, snippet) in enumerate(search_results, 1):
                results_text.insert(tk.END, f"\nResult {i} (Page {page_num}):\n{snippet}...\n")
        else:
            results_text.insert(tk.END, "PDF Bot: Sorry, nothing found.")

    def handle_clear():
        results_text.delete(1.0, tk.END)
        history_text.delete(1.0, tk.END)
        search_history.clear()
        history_text.insert(tk.END, "Search History:")

    def handle_read_aloud():
        text = results_text.get(1.0, tk.END).strip()
        if text:
            read_aloud(text)
        else:
            messagebox.showinfo("PDF Bot", "No search results to read aloud.")

    def upload_pdf():
        file_path = filedialog.askopenfilename(filetypes=[("PDF Files", "*.pdf")])
        if file_path:
            pdf_content_by_page = read_pdf(file_path)
            if isinstance(pdf_content_by_page, str) and "Error" in pdf_content_by_page:
                messagebox.showerror("Error", pdf_content_by_page)
            else:
                metadata = get_pdf_metadata(file_path)
                metadata_text.delete(1.0, tk.END)
                metadata_text.insert(tk.END, "PDF Metadata:\n")
                for key, value in metadata.items():
                    metadata_text.insert(tk.END, f"{key}: {value}\n")
                chat_with_pdf(pdf_content_by_page)


    

    ttk.Label(root, text="Upload your PDF:").grid(row=0, column=0, columnspan=3, pady=10)
    ttk.Button(root, text="Upload PDF", command=upload_pdf).grid(row=1, column=0, pady=10)

        # Add "Download Summary" Button
    ttk.Button(root, text="Download Summary", command=lambda: download_summary(content_by_page)).grid(row=7, column=0, pady=10)


    query_entry = ttk.Entry(root, width=50)
    query_entry.grid(row=2, column=0, columnspan=3, padx=10, pady=10)

    ttk.Button(root, text="Search", command=handle_search).grid(row=3, column=0, pady=10)
    ttk.Button(root, text="Clear", command=handle_clear).grid(row=3, column=1, pady=10)
    ttk.Button(root, text="Save Results", command=lambda: save_results(search_results)).grid(row=3, column=2, pady=10)
    ttk.Button(root, text="Read Aloud", command=handle_read_aloud).grid(row=4, column=1, pady=10)

    dark_mode_toggle = ttk.Checkbutton(root, text="Dark Mode", variable=dark_mode_var, command=toggle_dark_mode)
    dark_mode_toggle.grid(row=4, column=2, pady=10)



    results_text = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=90, height=20)
    results_text.grid(row=5, column=0, columnspan=2, padx=10, pady=10)

    metadata_text = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=90, height=8)
    metadata_text.grid(row=6, column=0, columnspan=2, padx=10, pady=10)
    
    ttk.Button(root, text="Exit", command=root.quit).grid(row=7, column=1, pady=10)

    root.mainloop()

if __name__ == "__main__":
    chat_with_pdf(None)
