In [4]:
#########  Background:  #########
# Easy to use and secure redaction tool “RE-DACT” which allows redaction/masking/anonymization on various input formats based on a gradational scale defined by the user and providing customized output. 👇🏼
# Over a time, model will learn and have the ability to generate realistic synthetic data in any sought format. 

######### Description: #########
# The proposed solution is a natural language processing (machine learning) based redaction tool.👇🏼
# The tool will redact or obfuscate from original data leaving the output structurally/logically the same but stripped of key identifiers and 👇🏼
# other content which may in any way allow the identity, actual data, markers or issues in the input content to be revealed. 👇🏼
# The correlational logic may be appropriately obfuscated based on the degree of redaction. 👇🏼
# This will have an easy to use GUI and will be available for use on online and offline systems. 👇🏼
# The degree of the redaction will be up to the user- the higher the degree set by the user, the more the degree of redaction. 👇🏼
# This will work with all different commonly used formats for text and data sets. 👇🏼
# Security of data will be assured by ensuring that the input data is not stored or retrievable in any fashion by third party entities.👇🏼
# User will have complete control over the input data. 👇🏼
# It is also an important aspect that sometimes data may be required to be stored or submitted, however specific sensitive details may not necessarily be required. 👇🏼
# In such a situation- anonymized data authenticated as having being redacted from original would suffice. 👇🏼
# Declassification processes are long and arduous; anonymization is largely manual or custom script driven.👇🏼
# By providing a gradational redaction option, ordinary users can strip away the specificity to the extent of liking-from merely name removal/anonymization to completely synthetic data with only faint traces of original structure/pattern. 👇🏼
# This can allow generation of large number of databases with realistic but anonymized data that can be shared for learning, growth and commercial ventures.👇🏼
# Expected Solution: Problem Statement: Easy to use and secure redaction tool “RE-DACT” which allows redaction/masking/anonymization on various input formats based on a gradational scale defined by the user and providing customized output.👇🏼
# Over a time, model will learn and have the ability to generate realistic synthetic data in any sought format. 

# Stage 1 Data: Curate and use own data set for building PoC Task/Result: Input/Output: Supports common input formats (text files, images) and basic output formats (redacted files, logs). 👇🏼
# Web based version Training Dataset: Publicly available dataset can be used for the purpose. Metrics: Precision, Recall, F1 Score on Open Source Testing dataset.

# Stage 2 Data: Dataset will be provided in Grand Finale Task/Result: Input/Output: Expands to handle more data formats (e.g., PDFs, videos) and advanced output options (e.g., redacted versions with annotations). 👇🏼
# Training Dataset: Diverse data set prepared from commonly used formats Metrics: Precision, Recall, F1 Score on Open Source Testing dataset. Performance/Evaluation Criteria: • PoC will be preferred over just concept or presentation.👇🏼
# The performance may be ascertained on the following metrics: • Efficacy of the redaction/anonymization- whether appropriate data has been redacted • Gradational effect achieved based on user preference and ability to calibrate. • Ability to work on a variety of input sources • Security of the input data by minimal retention • Speed • Optimized computing usage and ability to operate at scale. • Ease of use, UI, UX. • Performance benchmarked against COTS solutions. • Web Based and Offline solution. • Minimal API dependency • Use of Secure Coding Practices and cybersecurity built in design....

In [24]:
#improvement 01 ==>  Major feature is Automatically Redaction.

In [10]:
import fitz
import tkinter as tk
from tkinter import filedialog, messagebox, simpledialog
import io
import re

class PDFRedactor:
    def __init__(self):
        self.window = tk.Tk()
        self.window.title("PDF Redaction Tool")
        self.window.geometry("400x500")
        
        self.label = tk.Label(self.window, text="PDF Redaction Tool", font=("Helvetica", 16))
        self.label.pack(pady=10)
        
        self.open_button = tk.Button(self.window, text="Open PDF", command=self.open_pdf)
        self.open_button.pack(pady=10)

        # Add Redaction Level Button..
        self.redact_button = tk.Button(self.window, text="Redact Automatically", command=self.select_redaction_level)
        self.redact_button.pack(pady=10)

        self.page_control_frame = tk.Frame(self.window)
        self.page_control_frame.pack(pady=10)

        self.prev_button = tk.Button(self.page_control_frame, text="Previous Page", command=self.previous_page)
        self.prev_button.grid(row=0, column=0, padx=5)

        self.page_label = tk.Label(self.page_control_frame, text="Page 1")
        self.page_label.grid(row=0, column=1)

        self.next_button = tk.Button(self.page_control_frame, text="Next Page", command=self.next_page)
        self.next_button.grid(row=0, column=2, padx=5)

        self.selected_rectangles = []
        self.zoom_level = 1.0

        # Define limited entities
        self.patterns = {
            'name': r"\b[A-Z][a-z]* [A-Z][a-z]*\b",  
            'registration': r"\b[A-Z0-9]{10,15}\b",  
            'mobile': r"\b\d{10}\b",  
            'aadhaar': r"\b\d{4} \d{4} \d{4}\b",  
            'address': r"\b\d+\s+\w+\s+\w+",  
            'dob': r"\b\d{2}/\d{2}/\d{4}\b",  
            'signature': r"\b(Signature|Signed by)\b",
            'account_number': r"\b\d{9,18}\b"
        }

        self.colors = {
            'name': (1, 0, 0), 
            'registration': (0, 1, 0),  
            'mobile': (0, 0, 1),  
            'aadhaar': (1, 1, 0),  
            'address': (1, 0.5, 0), 
            'dob': (0, 1, 1), 
            'signature': (1, 0, 1),
            'account_number': (0.5, 0, 0.5) 
        }

        self.redaction_level = 'low'

        self.window.mainloop()

    def open_pdf(self):
        file_path = filedialog.askopenfilename(title="Select a PDF File", filetypes=[("PDF Files", "*.pdf")])
        if file_path:
            self.pdf_document = fitz.open(file_path)
            self.total_pages = self.pdf_document.page_count
            self.current_page = 0
            self.selected_rectangles = [[] for _ in range(self.total_pages)]  
            self.display_pdf()

    def display_pdf(self):
        self.page_image = self.pdf_document[self.current_page].get_pixmap(matrix=fitz.Matrix(self.zoom_level, self.zoom_level))
        self.img_bytes = io.BytesIO(self.page_image.tobytes())  
        
        if hasattr(self, 'pdf_window'):
            self.pdf_window.destroy()
        
        self.pdf_window = tk.Toplevel(self.window)
        self.pdf_window.title(f"PDF Viewer - Page {self.current_page + 1}")
        
        self.canvas = tk.Canvas(self.pdf_window, width=self.page_image.width, height=self.page_image.height)
        self.canvas.pack()

        self.photo = tk.PhotoImage(data=self.img_bytes.getvalue())
        self.canvas.create_image(0, 0, image=self.photo, anchor='nw')

        self.page_label.config(text=f"Page {self.current_page + 1} / {self.total_pages}")

    def select_redaction_level(self):
        self.redaction_level = simpledialog.askstring("Redaction Level", "Enter redaction level (high, medium, low):").lower()
        
        if self.redaction_level not in ['high', 'medium', 'low']:
            messagebox.showwarning("Invalid Input", "Please select a valid redaction level: high, medium, or low.")
        else:
            self.auto_highlight_and_redact()

    def auto_highlight_and_redact(self):
        for page_num in range(self.total_pages):
            page = self.pdf_document.load_page(page_num)
            page_text = page.get_text("text")

            patterns_to_apply = []

            if self.redaction_level == 'low':
                patterns_to_apply = ['mobile', 'account_number']
            elif self.redaction_level == 'medium':
                patterns_to_apply = ['mobile', 'account_number', 'aadhaar', 'name', 'dob']
            elif self.redaction_level == 'high':
                patterns_to_apply = list(self.patterns.keys())

            for entity in patterns_to_apply:
                matches = re.finditer(self.patterns[entity], page_text)
                for match in matches:
                    highlight_areas = page.search_for(match.group())
                    color = self.colors[entity]
                    for area in highlight_areas:
                        page.add_freetext_annot(area, "", text_color=color, fill_color=color)
                        self.redact_area(page, area)

        output_pdf = f"auto_redacted_output_{self.redaction_level}.pdf"
        self.pdf_document.save(output_pdf)
        messagebox.showinfo("Success", f"Automatically redacted PDF saved as: {output_pdf}")

    def redact_area(self, page, rect):
        rect_fitz = fitz.Rect(rect)
        page.add_redact_annot(rect_fitz, fill=(0, 0, 0))
        page.apply_redactions()

    def next_page(self):
        if self.current_page < self.total_pages - 1:
            self.current_page += 1
            self.display_pdf()

    def previous_page(self):
        if self.current_page > 0:
            self.current_page -= 1
            self.display_pdf()

if __name__ == "__main__":
    PDFRedactor()


In [None]:
# improvement 02 >  Major feature is Manually Redaction + Automatically Redaction  in one prg.           
# Redact only english pdf...

In [16]:
import fitz
import tkinter as tk
from tkinter import filedialog, messagebox, simpledialog
import io
import re

class PDFRedactor:
    def __init__(self):
        self.window = tk.Tk()
        self.window.title("PDF Redaction Tool")
        self.window.geometry("400x500")
        
        self.label = tk.Label(self.window, text="PDF Redaction Tool", font=("Helvetica", 18))
        self.label.pack(pady=10)
        
        self.open_button = tk.Button(self.window, text="Open PDF", command=self.open_pdf)
        self.open_button.pack(pady=10)

        self.redact_button = tk.Button(self.window, text="Select Redaction Level", command=self.select_redaction_level)
        self.redact_button.pack(pady=10)

        self.page_control_frame = tk.Frame(self.window)
        self.page_control_frame.pack(pady=10)

        self.prev_button = tk.Button(self.page_control_frame, text="Previous Page", command=self.previous_page)
        self.prev_button.grid(row=0, column=0, padx=5)

        self.page_label = tk.Label(self.page_control_frame, text="Page 1")
        self.page_label.grid(row=0, column=1)

        self.next_button = tk.Button(self.page_control_frame, text="Next Page", command=self.next_page)
        self.next_button.grid(row=0, column=2, padx=5)

        self.selected_rectangles = []
        self.manual_selection_enabled = True 
        self.zoom_level = 1.0

        # Define limited entities..
        self.patterns = {
            'name': r"\b[A-Z][a-z]* [A-Z][a-z]*\b",  
            'registration': r"\b[A-Z0-9]{10,15}\b",  
            'mobile': r"\b\d{10}\b",  
            'aadhaar': r"\b\d{4} \d{4} \d{4}\b",  
            'address': r"\b\d+\s+\w+\s+\w+",  
            'dob': r"\b\d{2}/\d{2}/\d{4}\b",  
            'signature': r"\b(Signature|Signed by)\b",
            'account_number': r"\b\d{9,18}\b"
        }

        self.colors = {
            'name': (1, 0, 0),  
            'registration': (0, 1, 0),  
            'mobile': (0, 0, 1), 
            'aadhaar': (1, 1, 0),  
            'address': (1, 0.5, 0),  
            'dob': (0, 1, 1),  
            'signature': (1, 0, 1),  
            'account_number': (0.5, 0, 0.5)  
        }

        self.redaction_level = 'low'
        self.pdf_document = None
        self.current_page = 0
        self.total_pages = 0

        self.window.mainloop()

    def open_pdf(self):
        file_path = filedialog.askopenfilename(title="Select a PDF File", filetypes=[("PDF Files", "*.pdf")])
        if file_path:
            self.pdf_document = fitz.open(file_path)
            self.total_pages = self.pdf_document.page_count
            self.current_page = 0
            self.selected_rectangles = [[] for _ in range(self.total_pages)]  
            self.display_pdf()

    def display_pdf(self):
        if not self.pdf_document:
            return

        self.page_image = self.pdf_document[self.current_page].get_pixmap(matrix=fitz.Matrix(self.zoom_level, self.zoom_level))
        self.img_bytes = io.BytesIO(self.page_image.tobytes())  
        
        if hasattr(self, 'pdf_window'):
            self.pdf_window.destroy()
        
        self.pdf_window = tk.Toplevel(self.window)
        self.pdf_window.title(f"PDF Viewer - Page {self.current_page + 1}")
        
        self.canvas = tk.Canvas(self.pdf_window, width=self.page_image.width, height=self.page_image.height)
        self.canvas.pack()

        self.photo = tk.PhotoImage(data=self.img_bytes.getvalue())
        self.canvas.create_image(0, 0, image=self.photo, anchor='nw')

        self.page_label.config(text=f"Page {self.current_page + 1} / {self.total_pages}")

        self.canvas.bind("<ButtonPress-1>", self.start_rectangle)
        self.canvas.bind("<B1-Motion>", self.update_rectangle)
        self.canvas.bind("<ButtonRelease-1>", self.end_rectangle)

    def select_redaction_level(self):
        self.redaction_level = simpledialog.askstring("Redaction Level", "Enter redaction level (high, medium, low):").lower()
        
        if self.redaction_level not in ['high', 'medium', 'low']:
            messagebox.showwarning("Invalid Input", "Please select a valid redaction level: high, medium, or low.")
        else:
            self.auto_redact()

    def auto_redact(self):
        for page_num in range(self.total_pages):
            page = self.pdf_document.load_page(page_num)
            page_text = page.get_text("text")

            patterns_to_apply = []

            if self.redaction_level == 'low':
                patterns_to_apply = ['mobile', 'account_number']
            elif self.redaction_level == 'medium':
                patterns_to_apply = ['mobile', 'account_number', 'aadhaar', 'name', 'dob']
            elif self.redaction_level == 'high':
                patterns_to_apply = list(self.patterns.keys())

            for entity in patterns_to_apply:
                matches = re.finditer(self.patterns[entity], page_text)
                for match in matches:
                    highlight_areas = page.search_for(match.group())
                    for area in highlight_areas:
                        page.add_redact_annot(fitz.Rect(area), fill=(0, 0, 0))
                        page.apply_redactions()

            for rect_coords in self.selected_rectangles[page_num]:
                x0, y0, x1, y1 = rect_coords
                manual_rect = fitz.Rect(x0, y0, x1, y1)
                page.add_redact_annot(manual_rect, fill=(0, 0, 0))
                page.apply_redactions()

        output_pdf = f"redacted_output_{self.redaction_level}.pdf"
        self.pdf_document.save(output_pdf)
        messagebox.showinfo("Success", f"Redacted PDF saved as: {output_pdf}")

    def start_rectangle(self, event):
        if self.manual_selection_enabled:
            self.rect_start = (event.x, event.y)
            self.rect = self.canvas.create_rectangle(self.rect_start[0], self.rect_start[1], event.x, event.y, outline="red")

    def update_rectangle(self, event):
        if self.manual_selection_enabled and self.rect:
            self.canvas.coords(self.rect, self.rect_start[0], self.rect_start[1], event.x, event.y)

    def end_rectangle(self, event):
        if self.manual_selection_enabled and self.rect:
            self.selected_rectangles[self.current_page].append(self.canvas.coords(self.rect))
            self.canvas.delete(self.rect)

    def next_page(self):
        if self.current_page < self.total_pages - 1:
            self.current_page += 1
            self.display_pdf()

    def previous_page(self):
        if self.current_page > 0:
            self.current_page -= 1
            self.display_pdf()

if __name__ == "__main__":
    PDFRedactor()


In [16]:
# improvement 02 >  Major feature is Manually Redaction + Automatically Redaction  in one prg. + passwd set (optional)       
# Redact only english pdf...

In [21]:
import fitz
import tkinter as tk
from tkinter import filedialog, messagebox, simpledialog
import io
import re
from PyPDF2 import PdfReader, PdfWriter

class PDFRedactor:
    def __init__(self):
        self.window = tk.Tk()
        self.window.title("PDF Redaction Tool")
        self.window.geometry("400x500")

        self.label = tk.Label(self.window, text="PDF Redaction Tool", font=("Helvetica", 18))
        self.label.pack(pady=10)

        self.open_button = tk.Button(self.window, text="Open PDF", command=self.open_pdf)
        self.open_button.pack(pady=10)

        self.redact_button = tk.Button(self.window, text="Select Redaction Level", command=self.select_redaction_level)
        self.redact_button.pack(pady=10)

        self.page_control_frame = tk.Frame(self.window)
        self.page_control_frame.pack(pady=10)

        self.prev_button = tk.Button(self.page_control_frame, text="Previous Page", command=self.previous_page)
        self.prev_button.grid(row=0, column=0, padx=5)

        self.page_label = tk.Label(self.page_control_frame, text="Page 1")
        self.page_label.grid(row=0, column=1)

        self.next_button = tk.Button(self.page_control_frame, text="Next Page", command=self.next_page)
        self.next_button.grid(row=0, column=2, padx=5)

        self.jump_button = tk.Button(self.window, text="Jump to Page", command=self.jump_to_page)
        self.jump_button.pack(pady=10)

        self.selected_rectangles = []
        self.manual_selection_enabled = True
        self.zoom_level = 1.0

        self.patterns = {
            'name': r"\b[A-Z][a-z]* [A-Z][a-z]*\b",
            'registration': r"\b[A-Z0-9]{10,15}\b",
            'mobile': r"\b\d{10}\b",
            'aadhaar': r"\b\d{4} \d{4} \d{4}\b",
            'address': r"\b\d+\s+\w+\s+\w+",
            'dob': r"\b\d{2}/\d{2}/\d{4}\b",
            'signature': r"\b(Signature|Signed by)\b",
            'account_number': r"\b\d{9,18}\b",
            'hindi_text': r"[\u0900-\u097F]+"
        }

        self.colors = {
            'name': (1, 0, 0),
            'registration': (0, 1, 0),
            'mobile': (0, 0, 1),
            'aadhaar': (1, 1, 0),
            'address': (1, 0.5, 0),
            'dob': (0, 1, 1),
            'signature': (1, 0, 1),
            'account_number': (0.5, 0, 0.5),
            'hindi_text': (1, 0.65, 0) 
        }

        self.redaction_level = 'low'
        self.pdf_document = None
        self.current_page = 0
        self.total_pages = 0
        self.window.mainloop()

    def open_pdf(self):
        file_path = filedialog.askopenfilename(title="Select a PDF File", filetypes=[("PDF Files", "*.pdf")])
        if file_path:
            self.pdf_document = fitz.open(file_path)
            self.total_pages = self.pdf_document.page_count
            self.current_page = 0
            self.selected_rectangles = [[] for _ in range(self.total_pages)]
            self.display_pdf()

    def display_pdf(self):
        if not self.pdf_document:
            return

        self.page_image = self.pdf_document[self.current_page].get_pixmap(matrix=fitz.Matrix(self.zoom_level, self.zoom_level))
        self.img_bytes = io.BytesIO(self.page_image.tobytes())

        if hasattr(self, 'pdf_window'):
            self.pdf_window.destroy()

        self.pdf_window = tk.Toplevel(self.window)
        self.pdf_window.title(f"PDF Viewer - Page {self.current_page + 1}")

        self.canvas = tk.Canvas(self.pdf_window, width=self.page_image.width, height=self.page_image.height)
        self.canvas.pack()

        self.photo = tk.PhotoImage(data=self.img_bytes.getvalue())
        self.canvas.create_image(0, 0, image=self.photo, anchor='nw')

        self.page_label.config(text=f"Page {self.current_page + 1} / {self.total_pages}")

        self.canvas.bind("<ButtonPress-1>", self.start_rectangle)
        self.canvas.bind("<B1-Motion>", self.update_rectangle)
        self.canvas.bind("<ButtonRelease-1>", self.end_rectangle)

    def select_redaction_level(self):
        self.redaction_level = simpledialog.askstring("Redaction Level", "Enter redaction level (high, medium, low):").lower()

        if self.redaction_level not in ['high', 'medium', 'low']:
            messagebox.showwarning("Invalid Input", "Please select a valid redaction level: high, medium, or low.")
        else:
            self.auto_redact()

    def auto_redact(self):
        for page_num in range(self.total_pages):
            page = self.pdf_document.load_page(page_num)
            page_text = page.get_text("text")

            patterns_to_apply = []

            if self.redaction_level == 'low':
                patterns_to_apply = ['mobile', 'account_number', 'hindi_text']
            elif self.redaction_level == 'medium':
                patterns_to_apply = ['mobile', 'account_number', 'aadhaar', 'name', 'dob', 'hindi_text']
            elif self.redaction_level == 'high':
                patterns_to_apply = list(self.patterns.keys())

            for entity in patterns_to_apply:
                matches = re.finditer(self.patterns[entity], page_text)
                for match in matches:
                    highlight_areas = page.search_for(match.group())
                    for area in highlight_areas:
                        page.add_redact_annot(area, fill=(0, 0, 0))
                        page.apply_redactions()

            for rect_coords in self.selected_rectangles[page_num]:
                x0, y0, x1, y1 = rect_coords
                manual_rect = fitz.Rect(x0, y0, x1, y1)
                page.add_redact_annot(manual_rect, fill=(0, 0, 0))
                page.apply_redactions()

        output_pdf = f"redacted_output_{self.redaction_level}.pdf"
        self.pdf_document.save(output_pdf)

        self.ask_for_password_protection(output_pdf)

    def ask_for_password_protection(self, pdf_path):
        response = messagebox.askyesno("Password Protection", "Do you want to set a password for the PDF?")
        if response:  # Agr koe user password select krta h tb 
            self.add_password_protection(pdf_path)
        else:
            messagebox.showinfo("PDF Saved", f"PDF saved without password as: {pdf_path}")

    def add_password_protection(self, pdf_path):
        password = simpledialog.askstring("Set Password", "Enter a password to protect the PDF:")

        if password:
            reader = PdfReader(pdf_path)
            writer = PdfWriter()

            for page_num in range(len(reader.pages)):
                writer.add_page(reader.pages[page_num])

            writer.encrypt(password)

            # Saving the password
            protected_pdf_path = f"protected_{pdf_path}"
            with open(protected_pdf_path, "wb") as f_out:
                writer.write(f_out)

            messagebox.showinfo("Success", f"Password-protected PDF saved as: {protected_pdf_path}")
        else:
            messagebox.showwarning("No Password", "No password set. PDF saved without password protection.")

    def jump_to_page(self):
        page_number = simpledialog.askinteger("Jump to Page", "Enter page number:")
        if page_number and 1 <= page_number <= self.total_pages:
            self.current_page = page_number - 1
            self.display_pdf()
        else:
            messagebox.showerror("Invalid Page", "Please enter a valid page number.")

    def previous_page(self):
        if self.current_page > 0:
            self.current_page -= 1
            self.display_pdf()

    def next_page(self):
        if self.current_page < self.total_pages - 1:
            self.current_page += 1
            self.display_pdf()

    def start_rectangle(self, event):
        if self.manual_selection_enabled:
            self.rect_start = (event.x, event.y)
            self.rect = self.canvas.create_rectangle(self.rect_start[0], self.rect_start[1], event.x, event.y, outline="green")

    def update_rectangle(self, event):
        if self.manual_selection_enabled and self.rect:
            self.canvas.coords(self.rect, self.rect_start[0], self.rect_start[1], event.x, event.y)

    def end_rectangle(self, event):
        if self.manual_selection_enabled and self.rect:
            rect_coords = (self.rect_start[0], self.rect_start[1], event.x, event.y)
            self.selected_rectangles[self.current_page].append(rect_coords)
            self.canvas.delete(self.rect)

if __name__ == "__main__":
    PDFRedactor()

In [None]:

# improvement 03 >  Major feature is:  Manually Redaction + Automatically Redaction in one prg. + customize colour Redaction + #passwd set (optional)  
# Redact only english pdf...

In [25]:
import fitz
import tkinter as tk
from tkinter import filedialog, messagebox, simpledialog, colorchooser
import io
import re
from PyPDF2 import PdfReader, PdfWriter


class PDFRedactor:
    def __init__(self):
        self.window = tk.Tk()
        self.window.title("PDF Redaction Tool")
        self.window.geometry("400x500")

        self.label = tk.Label(self.window, text="PDF Redaction Tool", font=("Helvetica", 18))
        self.label.pack(pady=10)

        self.open_button = tk.Button(self.window, text="Open PDF", command=self.open_pdf)
        self.open_button.pack(pady=10)

        self.redact_button = tk.Button(self.window, text="Select Redaction Level", command=self.select_redaction_level)
        self.redact_button.pack(pady=10)

        self.color_button = tk.Button(self.window, text="Customize Colors", command=self.customize_colors)
        self.color_button.pack(pady=10)

        self.page_control_frame = tk.Frame(self.window)
        self.page_control_frame.pack(pady=10)

        self.prev_button = tk.Button(self.page_control_frame, text="Previous Page", command=self.previous_page)
        self.prev_button.grid(row=0, column=0, padx=5)

        self.page_label = tk.Label(self.page_control_frame, text="Page 1")
        self.page_label.grid(row=0, column=1)

        self.next_button = tk.Button(self.page_control_frame, text="Next Page", command=self.next_page)
        self.next_button.grid(row=0, column=2, padx=5)

        self.jump_button = tk.Button(self.window, text="Jump to Page", command=self.jump_to_page)
        self.jump_button.pack(pady=10)

        self.selected_rectangles = []
        self.manual_selection_enabled = True
        self.zoom_level = 1.0

        self.patterns = {
            'name': r"\b[A-Z][a-z]* [A-Z][a-z]*\b",
            'registration': r"\b[A-Z0-9]{10,15}\b",
            'mobile': r"\b\d{10}\b",
            'aadhaar': r"\b\d{4} \d{4} \d{4}\b",
            'address': r"\b\d+\s+\w+\s+\w+",
            'dob': r"\b\d{2}/\d{2}/\d{4}\b",
            'signature': r"\b(Signature|Signed by)\b",
            'account_number': r"\b\d{9,18}\b",
            'hindi_text': r"[\u0900-\u097F]+"
        }

        self.colors = {
            'name': (1, 0, 0),
            'registration': (0, 1, 0),
            'mobile': (0, 0, 1),
            'aadhaar': (1, 1, 0),
            'address': (1, 0.5, 0),
            'dob': (0, 1, 1),
            'signature': (1, 0, 1),
            'account_number': (0.5, 0, 0.5),
            'hindi_text': (1, 0.65, 0)  
        }

        self.redaction_level = 'low'
        self.pdf_document = None
        self.current_page = 0
        self.total_pages = 0
        self.window.mainloop()

    def open_pdf(self):
        file_path = filedialog.askopenfilename(title="Select a PDF File", filetypes=[("PDF Files", "*.pdf")])
        if file_path:
            self.pdf_document = fitz.open(file_path)
            self.total_pages = self.pdf_document.page_count
            self.current_page = 0
            self.selected_rectangles = [[] for _ in range(self.total_pages)]
            self.display_pdf()

    def display_pdf(self):
        if not self.pdf_document:
            return

        self.page_image = self.pdf_document[self.current_page].get_pixmap(matrix=fitz.Matrix(self.zoom_level, self.zoom_level))
        self.img_bytes = io.BytesIO(self.page_image.tobytes())

        if hasattr(self, 'pdf_window'):
            self.pdf_window.destroy()

        self.pdf_window = tk.Toplevel(self.window)
        self.pdf_window.title(f"PDF Viewer - Page {self.current_page + 1}")

        self.canvas = tk.Canvas(self.pdf_window, width=self.page_image.width, height=self.page_image.height)
        self.canvas.pack()

        self.photo = tk.PhotoImage(data=self.img_bytes.getvalue())
        self.canvas.create_image(0, 0, image=self.photo, anchor='nw')

        self.page_label.config(text=f"Page {self.current_page + 1} / {self.total_pages}")

        self.canvas.bind("<ButtonPress-1>", self.start_rectangle)
        self.canvas.bind("<B1-Motion>", self.update_rectangle)
        self.canvas.bind("<ButtonRelease-1>", self.end_rectangle)

    def select_redaction_level(self):
        self.redaction_level = simpledialog.askstring("Redaction Level", "Enter redaction level (high, medium, low):").lower()

        if self.redaction_level not in ['high', 'medium', 'low']:
            messagebox.showwarning("Invalid Input", "Please select a valid redaction level: high, medium, or low.")
        else:
            self.auto_redact()

    def customize_colors(self):
        for entity in self.patterns.keys():
            color = colorchooser.askcolor(title=f"Select color for {entity.capitalize()}")[0]
            if color:
                # Convert RGB to 0-1 range
                self.colors[entity] = (color[0] / 255, color[1] / 255, color[2] / 255)

    def auto_redact(self):
        for page_num in range(self.total_pages):
            page = self.pdf_document.load_page(page_num)
            page_text = page.get_text("text")

            patterns_to_apply = []

            if self.redaction_level == 'low':
                patterns_to_apply = ['mobile', 'account_number', 'hindi_text']
            elif self.redaction_level == 'medium':
                patterns_to_apply = ['mobile', 'account_number', 'aadhaar', 'name', 'dob', 'hindi_text']
            elif self.redaction_level == 'high':
                patterns_to_apply = list(self.patterns.keys())

            for entity in patterns_to_apply:
                matches = re.finditer(self.patterns[entity], page_text)
                for match in matches:
                    highlight_areas = page.search_for(match.group())
                    for area in highlight_areas:
                        page.add_redact_annot(area, fill=self.colors[entity])  
                        page.apply_redactions()

            for rect_coords in self.selected_rectangles[page_num]:
                x0, y0, x1, y1 = rect_coords
                manual_rect = fitz.Rect(x0, y0, x1, y1)
                page.add_redact_annot(manual_rect, fill=(0, 0, 0))
                page.apply_redactions()

        output_pdf = f"redacted_output_{self.redaction_level}.pdf"
        self.pdf_document.save(output_pdf)

        self.ask_for_password_protection(output_pdf)

    def ask_for_password_protection(self, pdf_path):
        response = messagebox.askyesno("Password Protection", "Do you want to set a password for the PDF?")
        if response:  # If the user will selects passwd ...
            self.add_password_protection(pdf_path)
        else:
            messagebox.showinfo("PDF Saved", f"PDF saved without password as: {pdf_path}")

    def add_password_protection(self, pdf_path):
        password = simpledialog.askstring("Set Password", "Enter a password to protect the PDF:")

        if password:
            reader = PdfReader(pdf_path)
            writer = PdfWriter()

            for page_num in range(len(reader.pages)):
                writer.add_page(reader.pages[page_num])

            writer.encrypt(password)

            # Saving the password-protected PDF
            protected_pdf_path = f"protected_{pdf_path}"
            with open(protected_pdf_path, "wb") as f_out:
                writer.write(f_out)

            messagebox.showinfo("Success", f"Password-protected PDF saved as: {protected_pdf_path}")
        else:
            messagebox.showwarning("No Password", "No password set. PDF saved without password protection.")

    def jump_to_page(self):
        page_number = simpledialog.askinteger("Jump to Page", "Enter page number:")
        if page_number and 1 <= page_number <= self.total_pages:
            self.current_page = page_number - 1
            self.display_pdf()
        else:
            messagebox.showerror("Invalid Page", "Please enter a valid page number.")

    def previous_page(self):
        if self.current_page > 0:
            self.current_page -= 1
            self.display_pdf()

    def next_page(self):
        if self.current_page < self.total_pages - 1:
            self.current_page += 1
            self.display_pdf()

    def start_rectangle(self, event):
        if self.manual_selection_enabled:
            self.start_x = event.x
            self.start_y = event.y
            self.rect = self.canvas.create_rectangle(self.start_x, self.start_y, self.start_x, self.start_y, outline="red")

    def update_rectangle(self, event):
        if self.manual_selection_enabled:
            self.canvas.coords(self.rect, self.start_x, self.start_y, event.x, event.y)

    def end_rectangle(self, event):
        if self.manual_selection_enabled:
            x0, y0, x1, y1 = self.start_x, self.start_y, event.x, event.y
            self.selected_rectangles[self.current_page].append((x0, y0, x1, y1))
            self.canvas.create_rectangle(x0, y0, x1, y1, outline="red")

if __name__ == "__main__":
    PDFRedactor()
