<a href="https://colab.research.google.com/github/AmolSadana012/MyPdf-Editor/blob/main/PDFEditor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install PyPDF2
!pip install pdfplumber
!pip install PyMuPDF

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1
Collecting pdfplumber
  Downloading pdfplumber-0.11.7-py3-none-any.whl.metadata (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pdfminer.six==20250506 (from pdfplumber)
  Downloading pdfminer_six-20250506-py3-none-any.whl.metadata (4.2 kB)
Collecting pypdfium2>=4.18.0 (from pdfplumber)
  Downloading pypdfium2-4.30.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.5/48.5 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
Downloading pdfplumber-0.11.7-py3-none-any.whl (60 kB)
[2K  

In [2]:
import PyPDF2
import pdfplumber
import fitz  # PyMuPDF
import os

def merge_pdfs():
    pdf_list = input("Enter PDF filenames to merge (comma-separated): ").split(',')
    pdf_list = [pdf.strip() for pdf in pdf_list]
    output_path = input("Enter output filename (e.g., merged.pdf): ")

    pdf_writer = PyPDF2.PdfWriter()
    try:
        for pdf in pdf_list:
            if not os.path.exists(pdf):
                print(f"File not found: {pdf}")
                continue

            pdf_reader = PyPDF2.PdfReader(pdf)
            for page in pdf_reader.pages:
                pdf_writer.add_page(page)

        with open(output_path, 'wb') as out:
            pdf_writer.write(out)
        print(f'Merged PDF saved as {output_path}')
    except Exception as e:
        print(f"Error merging PDFs: {e}")

def split_pdf():
    pdf_path = input("Enter PDF filename to split: ")
    output_dir = input("Enter output directory: ")

    if not os.path.exists(pdf_path):
        print(f"File not found: {pdf_path}")
        return

    try:
        doc = fitz.open(pdf_path)
        total_pages = doc.page_count
        print(f"The PDF has {total_pages} pages.")

        num_parts = int(input("Enter the number of parts you want to split the PDF into: "))
        split_points = []
        start_page = 0

        for i in range(num_parts):
            while True:
                start_page_input = int(input(f"Enter the starting page number for part {i + 1} (1-{total_pages}): "))
                if 1 <= start_page_input <= total_pages and (i == 0 or start_page_input > split_points[-1][1]):
                    start_page = start_page_input - 1
                    break
                else:
                    print(f"Invalid input. Please enter a valid starting page number.")

            while True:
                end_page_input = int(input(f"Enter the ending page number for part {i + 1} (1-{total_pages}): "))
                if start_page < end_page_input <= total_pages:
                    split_points.append((start_page, end_page_input))
                    break
                else:
                    print(f"Invalid input. Please enter a valid ending page number.")

        os.makedirs(output_dir, exist_ok=True)
        for part_num, (start, end) in enumerate(split_points):
            new_doc = fitz.open()
            for page_num in range(start, end):
                new_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)
            output_path = f'{output_dir}/part_{part_num + 1}.pdf'
            new_doc.save(output_path)
            print(f'Saved {output_path}')
    except Exception as e:
        print(f"Error splitting PDF: {e}")

def extract_text():
    pdf_path = input("Enter PDF filename to extract text: ")
    output_text_path = input("Enter output text filename: ")

    if not os.path.exists(pdf_path):
        print(f"File not found: {pdf_path}")
        return

    try:
        with pdfplumber.open(pdf_path) as pdf:
            full_text = [page.extract_text() for page in pdf.pages if page.extract_text()]

        with open(output_text_path, 'w', encoding='utf-8') as f:
            f.write("\n".join(full_text))

        print(f'Extracted text saved as {output_text_path}')
    except Exception as e:
        print(f"Error extracting text: {e}")

def extract_images():
    pdf_path = input("Enter PDF filename to extract images: ")
    output_dir = input("Enter output directory for images: ")

    if not os.path.exists(pdf_path):
        print(f"File not found: {pdf_path}")
        return
    os.makedirs(output_dir, exist_ok=True)

    try:
        pdf_document = fitz.open(pdf_path)

        for page_index, page in enumerate(pdf_document):
            image_list = page.get_images(full=True)

            for img_index, img in enumerate(image_list):
                xref = img[0]
                base_image = pdf_document.extract_image(xref)
                image_bytes = base_image["image"]
                image_ext = base_image["ext"]
                image_filename = os.path.join(output_dir, f"image_{page_index + 1}_{img_index + 1}.{image_ext}")

                with open(image_filename, 'wb') as image_file:
                    image_file.write(image_bytes)
                print(f'Saved {image_filename}')

        pdf_document.close()
    except Exception as e:
        print(f"Error extracting images: {e}")

def encrypt_pdf():
    input_pdf = input("Enter PDF filename to encrypt: ")
    output_pdf = input("Enter output encrypted PDF filename: ")
    password = input("Enter encryption password: ")

    if not os.path.exists(input_pdf):
        print(f"File not found: {input_pdf}")
        return

    try:
        with open(input_pdf, "rb") as file:
            reader = PyPDF2.PdfReader(file)
            writer = PyPDF2.PdfWriter()

            for page in reader.pages:
                writer.add_page(page)

            writer.encrypt(password)

            with open(output_pdf, "wb") as output:
                writer.write(output)
        print(f"Encrypted PDF saved as {output_pdf}")
    except Exception as e:
        print(f"Error encrypting PDF: {e}")

if __name__ == "__main__":
    while True:
        print("\nChoose an option:")
        print("1. Merge PDFs")
        print("2. Split PDF")
        print("3. Extract Text from PDF")
        print("4. Extract Images from PDF")
        print("5. Encrypt PDF")
        print("6. Exit")

        choice = input("Enter your choice: ")
        if choice == '1':
            merge_pdfs()
        elif choice == '2':
            split_pdf()
        elif choice == '3':
            extract_text()
        elif choice == '4':
            extract_images()
        elif choice == '5':
            encrypt_pdf()
        elif choice == '6':
            print("Exiting...")
            break
        else:
            print("Invalid choice. Please try again.")



Choose an option:
1. Merge PDFs
2. Split PDF
3. Extract Text from PDF
4. Extract Images from PDF
5. Encrypt PDF
6. Exit
Enter your choice: 1
Enter PDF filenames to merge (comma-separated): LQT Day 1 Sorting,images_ishan
Enter output filename (e.g., merged.pdf): merged.pdf1
File not found: LQT Day 1 Sorting
File not found: images_ishan
Merged PDF saved as merged.pdf1

Choose an option:
1. Merge PDFs
2. Split PDF
3. Extract Text from PDF
4. Extract Images from PDF
5. Encrypt PDF
6. Exit
Enter your choice: 1
Enter PDF filenames to merge (comma-separated): images_ishan.pdf,LQT Day 1 Sorting.pdf
Enter output filename (e.g., merged.pdf): merged.pdf
Merged PDF saved as merged.pdf

Choose an option:
1. Merge PDFs
2. Split PDF
3. Extract Text from PDF
4. Extract Images from PDF
5. Encrypt PDF
6. Exit
Enter your choice: 2
Enter PDF filename to split: images_ishan.pdf
Enter output directory: downloads
The PDF has 14 pages.
Enter the number of parts you want to split the PDF into: 2
Enter the sta



Extracted text saved as text_extract.pdf

Choose an option:
1. Merge PDFs
2. Split PDF
3. Extract Text from PDF
4. Extract Images from PDF
5. Encrypt PDF
6. Exit
Enter your choice: 4
Enter PDF filename to extract images: images_ishan.pdf
Enter output directory for images: downloads
Saved downloads/image_1_1.jpeg
Saved downloads/image_1_2.jpeg
Saved downloads/image_2_1.jpeg
Saved downloads/image_2_2.jpeg
Saved downloads/image_2_3.jpeg
Saved downloads/image_2_4.jpeg
Saved downloads/image_3_1.jpeg
Saved downloads/image_3_2.jpeg
Saved downloads/image_3_3.jpeg
Saved downloads/image_4_1.jpeg
Saved downloads/image_4_2.png
Saved downloads/image_4_3.jpeg
Saved downloads/image_5_1.jpeg
Saved downloads/image_5_2.png
Saved downloads/image_6_1.png
Saved downloads/image_6_2.png
Saved downloads/image_7_1.png
Saved downloads/image_7_2.png
Saved downloads/image_8_1.png
Saved downloads/image_8_2.png
Saved downloads/image_9_1.png
Saved downloads/image_9_2.jpeg
Saved downloads/image_10_1.jpeg
Saved downlo