In [7]:
# !pip install PyPDF2 reportlab

from PyPDF2 import PdfReader, PdfWriter
from reportlab.pdfgen import canvas
import io

def paint_top_bottom_strip(input_pdf, output_pdf, top_strip=50, bottom_strip=30):
    reader = PdfReader(input_pdf)
    writer = PdfWriter()
    total_pages = len(reader.pages)

    for page_num, page in enumerate(reader.pages, start=1):
        text = page.extract_text()

        if not text or text.strip() == "":
            print(f"Page {page_num}/{total_pages}: image found → skipping")
            writer.add_page(page)
            continue

        width = float(page.mediabox.width)
        height = float(page.mediabox.height)

        # Create overlay with white strips
        packet = io.BytesIO()
        c = canvas.Canvas(packet, pagesize=(width, height))
        c.setFillColorRGB(1,1,1)  # white

        # Top strip
        c.rect(0, height - top_strip, width, top_strip, fill=1, stroke=0)
        # Bottom strip
        c.rect(0, 0, width, bottom_strip, fill=1, stroke=0)

        c.save()
        packet.seek(0)

        overlay_pdf = PdfReader(packet)
        overlay_page = overlay_pdf.pages[0]

        # Merge overlay with original page
        page.merge_page(overlay_page)
        writer.add_page(page)

        print(f"Page {page_num}/{total_pages}: completed")

    # Save result
    with open(output_pdf, "wb") as f:
        writer.write(f)

# Example usage
paint_top_bottom_strip("file.pdf", "output.pdf", top_strip=50, bottom_strip=50)


Page 1/352: image found → skipping
Page 2/352: image found → skipping
Page 3/352: image found → skipping
Page 4/352: completed
Page 5/352: completed
Page 6/352: completed
Page 7/352: completed
Page 8/352: completed
Page 9/352: completed
Page 10/352: completed
Page 11/352: completed
Page 12/352: completed
Page 13/352: completed
Page 14/352: completed
Page 15/352: completed
Page 16/352: completed
Page 17/352: completed
Page 18/352: completed
Page 19/352: completed
Page 20/352: completed
Page 21/352: completed
Page 22/352: completed
Page 23/352: completed
Page 24/352: image found → skipping
Page 25/352: completed
Page 26/352: completed
Page 27/352: completed
Page 28/352: completed
Page 29/352: completed
Page 30/352: completed
Page 31/352: completed
Page 32/352: completed
Page 33/352: completed
Page 34/352: completed
Page 35/352: completed
Page 36/352: completed
Page 37/352: completed
Page 38/352: completed
Page 39/352: completed
Page 40/352: completed
Page 41/352: completed
Page 42/352: c

In [8]:
# !pip install pymupdf

# from google.colab import files
# from IPython.display import display
# import fitz  # PyMuPDF


# def show_pdf(path):
#     # open pdf
#     doc = fitz.open(path)
#     for page in doc:
#         pix = page.get_pixmap()
#         display(Image(data=pix.tobytes("png")))

# show_pdf("output.pdf")


In [11]:
# !pip install pymupdf

# import fitz

# def redact_strips(input_pdf, output_pdf, top_strip=50, bottom_strip=30):
#     doc = fitz.open(input_pdf)

#     for page_num, page in enumerate(doc, start=1):
#         rect_top = fitz.Rect(0, 0, page.rect.width, top_strip)  # bottom strip
#         rect_bottom = fitz.Rect(0, page.rect.height - bottom_strip, page.rect.width, page.rect.height)  # top strip

#         # Add redaction annotations
#         page.add_redact_annot(rect_top, fill=(1,1,1))
#         page.add_redact_annot(rect_bottom, fill=(1,1,1))

#         # Apply redactions (removes text + images in those areas permanently)
#         page.apply_redactions()

#         print(f"Page {page_num}/{len(doc)} redacted")

#     doc.save(output_pdf)

# # Example usage
# redact_strips("file.pdf", "output.pdf", top_strip=30, bottom_strip=50)


In [12]:
# !pip install pymupdf

import fitz

def redact_strips(input_pdf, output_pdf, top_strip=50, bottom_strip=30):
    doc = fitz.open(input_pdf)
    total_pages = len(doc)

    for page_num, page in enumerate(doc, start=1):
        text = page.get_text("text")

        if not text.strip():  # Image-only page → skip
            print(f"Page {page_num}/{total_pages}: image page → skipping")
            continue

        # Define top & bottom strip rectangles
        rect_top = fitz.Rect(0, 0, page.rect.width, top_strip)
        rect_bottom = fitz.Rect(0, page.rect.height - bottom_strip, page.rect.width, page.rect.height)

        # Add redaction annotations
        page.add_redact_annot(rect_top, fill=(1,1,1))
        page.add_redact_annot(rect_bottom, fill=(1,1,1))

        # Apply redactions (removes text/images permanently in those areas)
        page.apply_redactions()

        print(f"Page {page_num}/{total_pages}: redacted")

    # Save with compression + garbage collection to reduce file size
    doc.save(output_pdf, deflate=True, garbage=4, clean=True)
    print(f"\n✅ Saved redacted PDF as: {output_pdf}")

# Example usage
redact_strips("file.pdf", "output.pdf", top_strip=30, bottom_strip=50)


Page 1/352: image page → skipping
Page 2/352: image page → skipping
Page 3/352: image page → skipping
Page 4/352: redacted
Page 5/352: redacted
Page 6/352: redacted
Page 7/352: redacted
Page 8/352: redacted
Page 9/352: redacted
Page 10/352: redacted
Page 11/352: redacted
Page 12/352: redacted
Page 13/352: redacted
Page 14/352: redacted
Page 15/352: redacted
Page 16/352: redacted
Page 17/352: redacted
Page 18/352: redacted
Page 19/352: redacted
Page 20/352: redacted
Page 21/352: redacted
Page 22/352: redacted
Page 23/352: redacted
Page 24/352: image page → skipping
Page 25/352: redacted
Page 26/352: redacted
Page 27/352: redacted
Page 28/352: redacted
Page 29/352: redacted
Page 30/352: redacted
Page 31/352: redacted
Page 32/352: redacted
Page 33/352: redacted
Page 34/352: redacted
Page 35/352: redacted
Page 36/352: redacted
Page 37/352: redacted
Page 38/352: redacted
Page 39/352: redacted
Page 40/352: redacted
Page 41/352: redacted
Page 42/352: redacted
Page 43/352: redacted
Page 44/352