In [1]:
from PyPDF2 import PdfReader, PdfWriter
import os


In [7]:
def split_pdf_by_size(input_pdf, max_size_mb):
    reader = PdfReader(input_pdf)
    total_pages = len(reader.pages)
    max_size_bytes = max_size_mb * 1024 * 1024

    output_dir = "output"
    os.makedirs(output_dir, exist_ok=True)

    start_page = 0
    part = 1

    while start_page < total_pages:
        writer = PdfWriter()
        current_size = 0
        end_page = start_page

        # Try adding pages until size exceeds
        while end_page < total_pages:
            writer.add_page(reader.pages[end_page])

            # Estimate file size in memory
            from io import BytesIO
            temp_stream = BytesIO()
            writer.write(temp_stream)
            current_size = temp_stream.tell()

            if current_size > max_size_bytes:
                # Remove last page that caused overflow
                writer = PdfWriter()
                for i in range(start_page, end_page):  # exclude end_page
                    writer.add_page(reader.pages[i])
                break

            end_page += 1

        # Write current part to disk
        part_path = os.path.join(output_dir, f"part_{part}.pdf")
        with open(part_path, "wb") as f:
            writer.write(f)

        actual_size_mb = os.path.getsize(part_path) / (1024 * 1024)
        print(f"Created: {part_path} ({actual_size_mb:.2f} MB, pages {start_page} to {end_page - 1})")

        start_page = end_page
        part += 1

        

In [8]:
filename="input\العربية للناشئين - كتاب التلميذ ١ - Arabic for Young Learners - Pupil Book 1.pdf"
# Example usage:
split_pdf_by_size(filename, 5)  # Split into ~5MB chunks

Created: output\part_1.pdf (4.89 MB, pages 0 to 15)
Created: output\part_2.pdf (4.85 MB, pages 16 to 33)
Created: output\part_3.pdf (4.86 MB, pages 34 to 56)
Created: output\part_4.pdf (4.97 MB, pages 57 to 80)
Created: output\part_5.pdf (4.99 MB, pages 81 to 104)
Created: output\part_6.pdf (4.75 MB, pages 105 to 125)
Created: output\part_7.pdf (4.90 MB, pages 126 to 148)
Created: output\part_8.pdf (4.83 MB, pages 149 to 170)
Created: output\part_9.pdf (4.96 MB, pages 171 to 193)
Created: output\part_10.pdf (4.81 MB, pages 194 to 215)
Created: output\part_11.pdf (2.93 MB, pages 216 to 230)
