In [1]:
# Set up a <PdfFileSplitter> class

import pathlib
import PyPDF2

class PdfFileSplitter:

    # Prepare two class attributes, each foreseen to receive the resulting <PdfFileWriter> instances after specifying the breakpoint
    writer1 = PyPDF2.PdfFileWriter()
    writer2 = PyPDF2.PdfFileWriter()

    def __init__(self,filepath):
        self.filepath = pathlib.Path(filepath)  # initially create a pathlib <Path> object from the user's input
        self.filename = self.filepath.stem  # extract the stem name of the file we received
        
        self.reader_object = PyPDF2.PdfFileReader(str(self.filepath))  # turn the <Path> object into a <PdfFileReader> object


    def __str__(self):
        print(self.filepath)  # provide a possibility to show the filepath of our source file


    def split(self,breakpoint):

        for i in self.reader_object.pages[0:int(breakpoint)-1]:  # iterate over all .pages from 1 to <breakpoint> -1
            PdfFileSplitter.writer1.addPage(i) # add them to our 1st <PdfFileWriter> object

        for i in self.reader_object.pages[int(breakpoint):]:  # iterate over all .pages from <breakpoint> to the end
            PdfFileSplitter.writer2.addPage(i)  # add them to our 2nd <PdfFileWriter> object

        print(f"resulting number of pages in the 1st writer object: {PdfFileSplitter.writer1.getNumPages()}"
              f" (pages {1} - {breakpoint - 1})")
        print(f"resulting number of pages in the 2nd writer object: {PdfFileSplitter.writer2.getNumPages()}"
              f" (pages {breakpoint} - {breakpoint +  PdfFileSplitter.writer2.getNumPages()})")


    def write(self,filename):
        result_dir = self.filepath.parent / "split_results"  # prepare a directory for the resulting files
        result_dir.mkdir(parents=True,exist_ok=True)

        # Create two new file paths for the resulting files
        split_result_path1 = result_dir.joinpath(str(filename) + "_split1.pdf")
        split_result_path2 = result_dir.joinpath(str(filename) + "_split2.pdf")

        # open directly handles the '.touch()' method itself
        with open(split_result_path1, mode="wb") as result_file1:  # open the 1st new filepath
            PdfFileSplitter.writer1.write(result_file1)  # tell the 1st <PdfFileWriter> where to paste its content

        with open(split_result_path2, mode="wb") as result_file2:  # open the 2nd new filepath
            PdfFileSplitter.writer2.write(result_file2)  # tell the 2nd <PdfFileWriter> where to paste its content
    
        print("Split was done - Check your result folder!")



In [2]:

# test the class by instantiating a <PdfFileSplitter> object
mypdf = PdfFileSplitter(r"C:\Users\Joshua Albert\PycharmProjects\realPythonExercises\9_pdfHandling\practice_files\Pride_and_Prejudice.pdf")

# test the class by calling its '.split' method
mypdf.split(150)
mypdf.write("Ergebnis")


resulting number of pages in the 1st writer object: 149 (pages 1 - 149)
resulting number of pages in the 2nd writer object: 84 (pages 150 - 234)
Split was done - Check your result folder!
