In [1]:
from PyPDF2 import PdfReader, PdfWriter
import os
from pathlib import Path

In [2]:

def split_pdf_by_size(input_pdf, max_size_mb, output_filename_prefix="part_", output_dir="output"):
    reader = PdfReader(input_pdf)
    total_pages = len(reader.pages)
    max_size_bytes = max_size_mb * 1024 * 1024

    os.makedirs(output_dir, exist_ok=True)

    start_page = 0
    part = 1

    while start_page < total_pages:
        writer = PdfWriter()
        current_size = 0
        end_page = start_page

        # Try adding pages until size exceeds
        while end_page < total_pages:
            writer.add_page(reader.pages[end_page])

            # Estimate file size in memory
            from io import BytesIO
            temp_stream = BytesIO()
            writer.write(temp_stream)
            current_size = temp_stream.tell()

            if current_size > max_size_bytes:
                # Remove last page that caused overflow
                writer = PdfWriter()
                for i in range(start_page, end_page):  # exclude end_page
                    writer.add_page(reader.pages[i])
                break

            end_page += 1

        # Write current part to disk
        part_path = os.path.join(output_dir, f"{output_filename_prefix}{part}.pdf")
        with open(part_path, "wb") as f:
            writer.write(f)

        actual_size_mb = os.path.getsize(part_path) / (1024 * 1024)
        print(f"    Created: ({actual_size_mb:.2f} MB, pages {start_page+1} to {end_page - 1 + 1}) {part_path}")

        start_page = end_page
        part += 1

        

In [3]:
filepath="input\Arabic for Young Learners - Pupil Book 1.pdf"
file_size_limit_mb = 5  # Set the size limit for each split part in MB

path = Path(filepath)
filename = path.name # e.g., "file.pdf"
extension = path.suffix  # e.g., ".pdf"
filename_without_extension = path.stem  # e.g., "file"

print(f"Splitting PDF: {filepath}")

output_dir=f"output/{filename_without_extension}"
output_filename_prefix = filename_without_extension + "_"
# Example usage:
split_pdf_by_size(filepath, file_size_limit_mb, output_filename_prefix , output_dir)  # Split into ~5MB chunks
print(f"PDF split into chunks in {output_dir}")

  filepath="input\Arabic for Young Learners - Pupil Book 1.pdf"


Splitting PDF: input\Arabic for Young Learners - Pupil Book 1.pdf
    Created: (4.89 MB, pages 1 to 16) output/Arabic for Young Learners - Pupil Book 1\Arabic for Young Learners - Pupil Book 1_1.pdf
    Created: (4.85 MB, pages 17 to 34) output/Arabic for Young Learners - Pupil Book 1\Arabic for Young Learners - Pupil Book 1_2.pdf
    Created: (4.86 MB, pages 35 to 57) output/Arabic for Young Learners - Pupil Book 1\Arabic for Young Learners - Pupil Book 1_3.pdf
    Created: (4.97 MB, pages 58 to 81) output/Arabic for Young Learners - Pupil Book 1\Arabic for Young Learners - Pupil Book 1_4.pdf
    Created: (4.99 MB, pages 82 to 105) output/Arabic for Young Learners - Pupil Book 1\Arabic for Young Learners - Pupil Book 1_5.pdf
    Created: (4.75 MB, pages 106 to 126) output/Arabic for Young Learners - Pupil Book 1\Arabic for Young Learners - Pupil Book 1_6.pdf
    Created: (4.90 MB, pages 127 to 149) output/Arabic for Young Learners - Pupil Book 1\Arabic for Young Learners - Pupil Book 