In [14]:
#working

import fitz
import tkinter as tk
from tkinter import filedialog, messagebox
import io

class PDFRedactor:
    def __init__(self):
        self.window = tk.Tk()
        self.window.title("PDF Redaction Tool")
        
        # Improved Layout
        self.window.geometry("400x300")
        self.label = tk.Label(self.window, text="PDF Redaction Tool", font=("Helvetica", 16))
        self.label.pack(pady=10)
        
        self.open_button = tk.Button(self.window, text="Open PDF", command=self.open_pdf)
        self.open_button.pack(pady=10)

        self.redact_button = tk.Button(self.window, text="Redact Selected Areas", command=self.redact)
        self.redact_button.pack(pady=10)

        self.page_control_frame = tk.Frame(self.window)
        self.page_control_frame.pack(pady=10)

        self.prev_button = tk.Button(self.page_control_frame, text="Previous Page", command=self.previous_page)
        self.prev_button.grid(row=0, column=0, padx=5)

        self.page_label = tk.Label(self.page_control_frame, text="Page 1")
        self.page_label.grid(row=0, column=1)

        self.next_button = tk.Button(self.page_control_frame, text="Next Page", command=self.next_page)
        self.next_button.grid(row=0, column=2, padx=5)
        
        self.selected_rectangles = []

        self.window.mainloop()

    def open_pdf(self):
        file_path = filedialog.askopenfilename(title="Select a PDF File", filetypes=[("PDF Files", "*.pdf")])
        if file_path:
            self.pdf_document = fitz.open(file_path)
            self.total_pages = self.pdf_document.page_count
            self.current_page = 0
            self.selected_rectangles = [[] for _ in range(self.total_pages)]  
            self.display_pdf()
            
    def display_pdf(self):
        self.page_image = self.pdf_document[self.current_page].get_pixmap()
        self.img_bytes = io.BytesIO(self.page_image.tobytes())  
        
        # Display PDF page
        if hasattr(self, 'pdf_window'):
            self.pdf_window.destroy()
        
        self.pdf_window = tk.Toplevel(self.window)
        self.pdf_window.title(f"PDF Viewer - Page {self.current_page + 1}")
        
        self.canvas = tk.Canvas(self.pdf_window, width=self.page_image.width, height=self.page_image.height)
        self.canvas.pack()
        
        self.photo = tk.PhotoImage(data=self.img_bytes.getvalue())
        self.canvas.create_image(0, 0, image=self.photo, anchor='nw')
        
        # Binding mouse events
        self.canvas.bind("<ButtonPress-1>", self.on_button_press)
        self.canvas.bind("<ButtonRelease-1>", self.on_button_release)
        
        self.page_label.config(text=f"Page {self.current_page + 1} / {self.total_pages}")

    def on_button_press(self, event):
        self.start_x = event.x
        self.start_y = event.y

    def on_button_release(self, event):
        end_x = event.x
        end_y = event.y
        self.selected_rectangles[self.current_page].append((self.start_x, self.start_y, end_x, end_y))
        self.canvas.create_rectangle(self.start_x, self.start_y, end_x, end_y, outline="red")

    def redact(self):
        output_pdf = "redacted_output.pdf"
        for i, page in enumerate(self.pdf_document):
            for rect in self.selected_rectangles[i]:
                self.redact_area(page, rect)
        self.pdf_document.save(output_pdf)
        messagebox.showinfo("Success", f"Redacted PDF saved as: {output_pdf}")

    def redact_area(self, page, rect):
        rect_fitz = fitz.Rect(rect)
        page.add_redact_annot(rect_fitz)
        page.apply_redactions()

    def next_page(self):
        if self.current_page < self.total_pages - 1:
            self.current_page += 1
            self.display_pdf()

    def previous_page(self):
        if self.current_page > 0:
            self.current_page -= 1
            self.display_pdf()

if __name__ == "__main__":
    PDFRedactor()

In [None]:
#improvement 1

In [22]:
import fitz
import tkinter as tk
from tkinter import filedialog, messagebox, simpledialog
import io
import re

class PDFRedactor:
    def __init__(self):
        self.window = tk.Tk()
        self.window.title("PDF Redaction Tool")
        
        # Improved Layout
        self.window.geometry("400x400")
        self.label = tk.Label(self.window, text="PDF Redaction Tool", font=("Helvetica", 16))
        self.label.pack(pady=10)
        
        self.open_button = tk.Button(self.window, text="Open PDF", command=self.open_pdf)
        self.open_button.pack(pady=10)

        self.redact_button = tk.Button(self.window, text="Redact Selected Areas", command=self.redact)
        self.redact_button.pack(pady=10)

        self.auto_redact_button = tk.Button(self.window, text="Auto Redact Sensitive Info", command=self.auto_redact)
        self.auto_redact_button.pack(pady=10)

        self.page_control_frame = tk.Frame(self.window)
        self.page_control_frame.pack(pady=10)

        self.prev_button = tk.Button(self.page_control_frame, text="Previous Page", command=self.previous_page)
        self.prev_button.grid(row=0, column=0, padx=5)

        self.page_label = tk.Label(self.page_control_frame, text="Page 1")
        self.page_label.grid(row=0, column=1)

        self.next_button = tk.Button(self.page_control_frame, text="Next Page", command=self.next_page)
        self.next_button.grid(row=0, column=2, padx=5)

        self.jump_button = tk.Button(self.page_control_frame, text="Jump to Page", command=self.jump_to_page)
        self.jump_button.grid(row=1, column=1, pady=5)

        self.zoom_in_button = tk.Button(self.page_control_frame, text="Zoom In", command=self.zoom_in)
        self.zoom_in_button.grid(row=2, column=0, pady=5)

        self.zoom_out_button = tk.Button(self.page_control_frame, text="Zoom Out", command=self.zoom_out)
        self.zoom_out_button.grid(row=2, column=2, pady=5)

        self.selected_rectangles = []
        self.zoom_level = 1.0
        self.patterns = [
            r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b",  # for Email pattern
            r"\b\d{10}\b",   # for phone no. pattern
        ]

        self.window.mainloop()

    def open_pdf(self):
        file_path = filedialog.askopenfilename(title="Select a PDF File", filetypes=[("PDF Files", "*.pdf")])
        if file_path:
            self.pdf_document = fitz.open(file_path)
            self.total_pages = self.pdf_document.page_count
            self.current_page = 0
            self.selected_rectangles = [[] for _ in range(self.total_pages)]  # List for rectangles for each page
            self.display_pdf()
            
    def display_pdf(self):
        self.page_image = self.pdf_document[self.current_page].get_pixmap(matrix=fitz.Matrix(self.zoom_level, self.zoom_level))
        self.img_bytes = io.BytesIO(self.page_image.tobytes())  
        
        if hasattr(self, 'pdf_window'):
            self.pdf_window.destroy()
        
        self.pdf_window = tk.Toplevel(self.window)
        self.pdf_window.title(f"PDF Viewer - Page {self.current_page + 1}")
        
        self.canvas = tk.Canvas(self.pdf_window, width=self.page_image.width, height=self.page_image.height)
        self.canvas.pack()
        
        self.photo = tk.PhotoImage(data=self.img_bytes.getvalue())
        self.canvas.create_image(0, 0, image=self.photo, anchor='nw')
        
        self.canvas.bind("<ButtonPress-1>", self.on_button_press)
        self.canvas.bind("<ButtonRelease-1>", self.on_button_release)
        
        self.page_label.config(text=f"Page {self.current_page + 1} / {self.total_pages}")

    def on_button_press(self, event):
        self.start_x = event.x
        self.start_y = event.y

    def on_button_release(self, event):
        end_x = event.x
        end_y = event.y
        self.selected_rectangles[self.current_page].append((self.start_x, self.start_y, end_x, end_y))
        self.canvas.create_rectangle(self.start_x, self.start_y, end_x, end_y, outline="red")

    def redact(self):
        output_pdf = "redacted_output.pdf"
        for i, page in enumerate(self.pdf_document):
            for rect in self.selected_rectangles[i]:
                self.redact_area(page, rect)
        self.pdf_document.save(output_pdf)
        messagebox.showinfo("Success", f"Redacted PDF saved as: {output_pdf}")

    def redact_area(self, page, rect):
        rect_fitz = fitz.Rect(rect)
        page.add_redact_annot(rect_fitz)
        page.apply_redactions()

    def next_page(self):
        if self.current_page < self.total_pages - 1:
            self.current_page += 1
            self.display_pdf()

    def previous_page(self):
        if self.current_page > 0:
            self.current_page -= 1
            self.display_pdf()

    def jump_to_page(self):
        page_number = simpledialog.askinteger("Jump to Page", f"Enter page number (1-{self.total_pages}):", minvalue=1, maxvalue=self.total_pages)
        if page_number:
            self.current_page = page_number - 1
            self.display_pdf()
 
    def zoom_in(self):
        self.zoom_level += 0.2
        self.display_pdf()

    def zoom_out(self):
        self.zoom_level = max(1.0, self.zoom_level - 0.2)  
        self.display_pdf()

    def auto_redact(self):
        keywords = simpledialog.askstring("Keywords", "Enter custom keywords to redact (comma separated):")
        if keywords:
            self.patterns += [r'\b' + re.escape(word.strip()) + r'\b' for word in keywords.split(',')]
        
        for page_num in range(self.total_pages):
            page = self.pdf_document.load_page(page_num)
            page_text = page.get_text("text")
            for pattern in self.patterns:
                matches = re.finditer(pattern, page_text)
                for match in matches:
                    highlight_areas = page.search_for(match.group())
                    for area in highlight_areas:
                        self.selected_rectangles[page_num].append(area)
                        self.redact_area(page, area)
        
        output_pdf = "auto_redacted_output.pdf"
        self.pdf_document.save(output_pdf)
        messagebox.showinfo("Success", f"Automatically redacted PDF saved as: {output_pdf}")

if __name__ == "__main__":
    PDFRedactor()


In [27]:
#improvements 2  ..... acceptable this moment

In [None]:
import fitz
import tkinter as tk
from tkinter import filedialog, messagebox, simpledialog
import io
import re

class PDFRedactor:
    def __init__(self):
        self.window = tk.Tk()
        self.window.title("PDF Redaction Tool")
        
        self.window.geometry("400x500")
        self.label = tk.Label(self.window, text="PDF Redaction Tool", font=("Helvetica", 16))
        self.label.pack(pady=10)
        
        self.open_button = tk.Button(self.window, text="Open PDF", command=self.open_pdf)
        self.open_button.pack(pady=10)

        self.redact_button = tk.Button(self.window, text="Redact Selected Areas", command=self.redact)
        self.redact_button.pack(pady=10)

        self.auto_redact_button = tk.Button(self.window, text="Auto Redact Sensitive Info", command=self.select_redaction_level)
        self.auto_redact_button.pack(pady=10)

        self.page_control_frame = tk.Frame(self.window)
        self.page_control_frame.pack(pady=10)

        self.prev_button = tk.Button(self.page_control_frame, text="Previous Page", command=self.previous_page)
        self.prev_button.grid(row=0, column=0, padx=5)

        self.page_label = tk.Label(self.page_control_frame, text="Page 1")
        self.page_label.grid(row=0, column=1)

        self.next_button = tk.Button(self.page_control_frame, text="Next Page", command=self.next_page)
        self.next_button.grid(row=0, column=2, padx=5)

        self.jump_button = tk.Button(self.page_control_frame, text="Jump to Page", command=self.jump_to_page)
        self.jump_button.grid(row=1, column=1, pady=5)

        self.zoom_in_button = tk.Button(self.page_control_frame, text="Zoom In", command=self.zoom_in)
        self.zoom_in_button.grid(row=2, column=0, pady=5)

        self.zoom_out_button = tk.Button(self.page_control_frame, text="Zoom Out", command=self.zoom_out)
        self.zoom_out_button.grid(row=2, column=2, pady=5)

        self.selected_rectangles = []
        self.zoom_level = 1.0
        
        self.patterns = {
            'email': r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b",  
            'phone': r"\b\d{10}\b", 
            'custom': [], 
            'name': r"\b[A-Z][a-z]* [A-Z][a-z]*\b",  
            'date': r"\b\d{2}/\d{2}/\d{4}\b",  
        }

        self.redaction_level = 'low' 

        self.window.mainloop()

    def open_pdf(self):
        file_path = filedialog.askopenfilename(title="Select a PDF File", filetypes=[("PDF Files", "*.pdf")])
        if file_path:
            self.pdf_document = fitz.open(file_path)
            self.total_pages = self.pdf_document.page_count
            self.current_page = 0
            self.selected_rectangles = [[] for _ in range(self.total_pages)]  
            self.display_pdf()
            
    def display_pdf(self):
        self.page_image = self.pdf_document[self.current_page].get_pixmap(matrix=fitz.Matrix(self.zoom_level, self.zoom_level))
        self.img_bytes = io.BytesIO(self.page_image.tobytes())  
        
        # Display PDF pagesss......
        if hasattr(self, 'pdf_window'):
            self.pdf_window.destroy()
        
        self.pdf_window = tk.Toplevel(self.window)
        self.pdf_window.title(f"PDF Viewer - Page {self.current_page + 1}")
        
        self.canvas = tk.Canvas(self.pdf_window, width=self.page_image.width, height=self.page_image.height)
        self.canvas.pack()
        
        self.photo = tk.PhotoImage(data=self.img_bytes.getvalue())
        self.canvas.create_image(0, 0, image=self.photo, anchor='nw')
        
        self.canvas.bind("<ButtonPress-1>", self.on_button_press)
        self.canvas.bind("<ButtonRelease-1>", self.on_button_release)
        
        self.page_label.config(text=f"Page {self.current_page + 1} / {self.total_pages}")

    def on_button_press(self, event):
        self.start_x = event.x
        self.start_y = event.y

    def on_button_release(self, event):
        end_x = event.x
        end_y = event.y
        self.selected_rectangles[self.current_page].append((self.start_x, self.start_y, end_x, end_y))
        self.canvas.create_rectangle(self.start_x, self.start_y, end_x, end_y, outline="green")

    def redact(self):
        output_pdf = "redacted_output.pdf"
        for i, page in enumerate(self.pdf_document):
            for rect in self.selected_rectangles[i]:
                self.redact_area(page, rect)
        self.pdf_document.save(output_pdf)
        messagebox.showinfo("Success", f"Redacted PDF saved as: {output_pdf}")

    def redact_area(self, page, rect):
        rect_fitz = fitz.Rect(rect)
        page.add_redact_annot(rect_fitz, fill=(0, 0, 0))
        page.apply_redactions()

    def next_page(self):
        if self.current_page < self.total_pages - 1:
            self.current_page += 1
            self.display_pdf()

    def previous_page(self):
        if self.current_page > 0:
            self.current_page -= 1
            self.display_pdf()

    def jump_to_page(self):
        page_number = simpledialog.askinteger("Jump to Page", f"Enter page number (1-{self.total_pages}):", minvalue=1, maxvalue=self.total_pages)
        if page_number:
            self.current_page = page_number - 1
            self.display_pdf()

    def zoom_in(self):
        self.zoom_level += 0.2
        self.display_pdf()

    def zoom_out(self):
        self.zoom_level = max(1.0, self.zoom_level - 0.2)  
        self.display_pdf()

    def select_redaction_level(self):
        self.redaction_level = simpledialog.askstring("Redaction Level", "Enter redaction level (high, medium, low):").lower()
        
        if self.redaction_level not in ['high', 'medium', 'low']:
            messagebox.showwarning("Invalid Input", "Please select a valid redaction level: high, medium, or low.")
        else:
            self.auto_redact()

    def auto_redact(self):
        if self.redaction_level in ['medium', 'high']:
            keywords = simpledialog.askstring("Keywords", "Enter custom keywords to redact (comma separated):")
            if keywords:
                self.patterns['custom'] = [r'\b' + re.escape(word.strip()) + r'\b' for word in keywords.split(',')]

        for page_num in range(self.total_pages):
            page = self.pdf_document.load_page(page_num)
            page_text = page.get_text("text")
            patterns_to_apply = []

            if self.redaction_level == 'low':
                patterns_to_apply = [self.patterns['email'], self.patterns['phone']]
            elif self.redaction_level == 'medium':
                patterns_to_apply = [self.patterns['email'], self.patterns['phone']] + self.patterns['custom']
            elif self.redaction_level == 'high':
                patterns_to_apply = list(self.patterns.values())  

            for pattern in patterns_to_apply:
                matches = re.finditer(pattern, page_text)
                for match in matches:
                    highlight_areas = page.search_for(match.group())
                    for area in highlight_areas:
                        self.selected_rectangles[page_num].append(area)
                        self.redact_area(page, area)
        
        output_pdf = f"auto_redacted_output_{self.redaction_level}.pdf"
        self.pdf_document.save(output_pdf)
        messagebox.showinfo("Success", f"Automatically redacted PDF saved as: {output_pdf}")

if __name__ == "__main__":
    PDFRedactor()
