In [1]:
%pip install PyPDF2

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import tkinter as tk
from tkinter import messagebox, simpledialog, filedialog, scrolledtext, ttk
from PyPDF2 import PdfReader
from googletrans import Translator
import re
import threading
import pyttsx3

# Initialize the text-to-speech engine
engine = pyttsx3.init()
engine.setProperty('rate', 150)  # Speed of speech
engine.setProperty('volume', 0.9)  # Volume (0.0 to 1.0)
voices = engine.getProperty('voices')
engine.setProperty('voice', voices[0].id)  # Use the first available voice

# Global variables for annotations
annotations = {}  # Format: {page_num: [(start_idx, end_idx, annotation)]}

# Function to read a PDF file
def read_pdf(file_path):
    try:
        pdf_content_by_page = []
        with open(file_path, 'rb') as pdf_file:
            reader = PdfReader(pdf_file)
            for page_num, page in enumerate(reader.pages):
                page_text = page.extract_text() or "[No text found on this page]"
                pdf_content_by_page.append((page_num + 1, page_text))
        return pdf_content_by_page
    except Exception as e:
        return f"Error: {e}"

# Function to get PDF metadata
def get_pdf_metadata(file_path):
    try:
        with open(file_path, 'rb') as pdf_file:
            reader = PdfReader(pdf_file)
            return reader.metadata
    except Exception as e:
        return f"Error: {e}"

# Function to search for a query in the PDF content
def search_query(content_by_page, query, snippet_length=100, case_sensitive=False, whole_word=False):
    results = []
    flags = 0 if case_sensitive else re.IGNORECASE
    pattern = r"\b{}\b".format(re.escape(query)) if whole_word else re.escape(query)

    for page_num, page_text in content_by_page:
        matches = [m.start() for m in re.finditer(pattern, page_text, flags)]
        for start_idx in matches:
            snippet = page_text[max(0, start_idx - snippet_length):start_idx + snippet_length]
            highlighted_snippet = re.sub(pattern, lambda m: f"[{m.group(0)}]", snippet, flags=flags)
            results.append((page_num, highlighted_snippet))
    return results

# Function to save search results
def save_results(results):
    file_path = filedialog.asksaveasfilename(defaultextension=".txt", filetypes=[("Text Files", "*.txt"), ("All Files", "*.*")])
    if file_path:
        with open(file_path, 'w', encoding='utf-8') as file:
            for i, (page_num, snippet) in enumerate(results, 1):
                file.write(f"Result {i} (Page {page_num}):\n{snippet}...\n\n")
        messagebox.showinfo("Success", "Search results saved successfully.")

# Function to read text aloud
def read_aloud(text):
    def speak():
        try:
            if not text.strip():
                messagebox.showinfo("PDF Bot", "No text available to read aloud.")
                return
            engine.stop()
            engine.say(text)
            engine.runAndWait()
        except Exception as e:
            messagebox.showerror("Error", f"Text-to-Speech failed: {e}")

    threading.Thread(target=speak, daemon=True).start()

# Function to handle translation
def handle_translation(results_text):
    try:
        selected_text = results_text.get(tk.SEL_FIRST, tk.SEL_LAST).strip()
        if not selected_text:
            raise ValueError("No text selected")
        target_language = simpledialog.askstring("Translate", "Enter target language code (e.g., 'fr' for French):")
        if target_language:
            translator = Translator()
            translated_text = translator.translate(selected_text, dest=target_language).text
            results_text.insert(tk.END, f"\n\n[Translated Text ({target_language})]:\n{translated_text}\n", "translated")
            results_text.tag_config("translated", foreground="blue", font=("Arial", 12, "italic"))
    except (tk.TclError, ValueError):
        messagebox.showinfo("PDF Bot", "Please select some text first to translate.")

# Main application class
class PDFBotApp:
    def __init__(self, root):
        self.root = root
        self.root.title("PDF Bot")
        self.root.geometry("800x600")

        # Initialize UI components
        self.setup_ui()

    def setup_ui(self):
        # Upload PDF button
        ttk.Label(self.root, text="Upload your PDF:").grid(row=0, column=0, columnspan=3, pady=10)
        ttk.Button(self.root, text="Upload PDF", command=self.upload_pdf).grid(row=1, column=0, pady=10)

        # Search bar
        self.query_entry = ttk.Entry(self.root, width=50)
        self.query_entry.grid(row=2, column=0, columnspan=3, padx=10, pady=10)

        # Buttons
        ttk.Button(self.root, text="Search", command=self.handle_search).grid(row=3, column=0, pady=10)
        ttk.Button(self.root, text="Clear", command=self.handle_clear).grid(row=3, column=1, pady=10)
        ttk.Button(self.root, text="Save Results", command=lambda: save_results(self.search_results)).grid(row=3, column=3, pady=10)
        ttk.Button(self.root, text="Read Aloud", command=self.handle_read_aloud).grid(row=3, column=4, pady=10)
        ttk.Button(self.root, text="Translate Selected Text", command=lambda: handle_translation(self.results_text)).grid(row=4, column=0, pady=10)

        # Results text area
        self.results_text = scrolledtext.ScrolledText(self.root, wrap=tk.WORD, width=90, height=20)
        self.results_text.grid(row=5, column=0, columnspan=2, padx=10, pady=10)

        # Metadata text area
        self.metadata_text = scrolledtext.ScrolledText(self.root, wrap=tk.WORD, width=90, height=8)
        self.metadata_text.grid(row=6, column=0, columnspan=2, padx=10, pady=10)

        # Exit button
        ttk.Button(self.root, text="Exit", command=self.root.quit).grid(row=7, column=1, pady=10)

        # Initialize search history and results
        self.search_history = []
        self.search_results = []

    def upload_pdf(self):
        file_path = filedialog.askopenfilename(filetypes=[("PDF Files", "*.pdf")])
        if file_path:
            self.pdf_content_by_page = read_pdf(file_path)
            if isinstance(self.pdf_content_by_page, str) and "Error" in self.pdf_content_by_page:
                messagebox.showerror("Error", self.pdf_content_by_page)
            else:
                metadata = get_pdf_metadata(file_path)
                self.metadata_text.delete(1.0, tk.END)
                self.metadata_text.insert(tk.END, "PDF Metadata:\n")
                for key, value in metadata.items():
                    self.metadata_text.insert(tk.END, f"{key}: {value}\n")

    def handle_search(self):
        query = self.query_entry.get()
        if not query:
            messagebox.showinfo("PDF Bot", "Please enter a query.")
            return
        if not hasattr(self, 'pdf_content_by_page'):
            messagebox.showerror("Error", "No valid PDF content loaded.")
            return
        self.search_results = search_query(self.pdf_content_by_page, query)
        self.results_text.delete(1.0, tk.END)
        if self.search_results:
            self.search_history.append(query)
            for i, (page_num, snippet) in enumerate(self.search_results, 1):
                self.results_text.insert(tk.END, f"\nResult {i} (Page {page_num}):\n{snippet}...\n")
        else:
            self.results_text.insert(tk.END, "PDF Bot: Sorry, nothing found.")

    def handle_clear(self):
        self.results_text.delete(1.0, tk.END)
        self.search_history.clear()

    def handle_read_aloud(self):
        text = self.results_text.get(1.0, tk.END).strip()
        if text:
            read_aloud(text)
        else:
            messagebox.showinfo("PDF Bot", "No search results to read aloud.")

# Run the application
if __name__ == "__main__":
    root = tk.Tk()
    app = PDFBotApp(root)
    root.mainloop()

ModuleNotFoundError: No module named 'googletrans'