In [2]:
pip install PyPDF2

Note: you may need to restart the kernel to use updated packages.


In [1]:
pip install pdfplumber pypdf2

Note: you may need to restart the kernel to use updated packages.


In [3]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers (page 3 is page 37 in the pdf file)
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 33
        pdf_end = end_page + 33
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
    {"title": "v1_Reflections_of_a_Young_Man_on_Choice_of_Profession", "start_page": 3, "end_page": 9},
    {"title": "v1_Letter_from_Marx_to_His_Father", "start_page": 10, "end_page": 21},
    {"title": "v1_Wild_Songs_01_The_Fiddler", "start_page": 22, "end_page": 22},
    {"title": "v1_Wild_Songs_02_Nocturnal_Love", "start_page": 23, "end_page": 24},
    
    {"title": "v1_Democritean_Epicurean_Philosophy_01_Dedication", "start_page": 27, "end_page": 28},
    {"title": "v1_Democritean_Epicurean_Philosophy_02_Foreword", "start_page": 29, "end_page": 31},
    {"title": "v1_Democritean_Epicurean_Philosophy_03_Contents", "start_page": 32, "end_page": 33},
    {"title": "v1_Democritean_Epicurean_Philosophy_04_Part1_Subject_of_Treatise", "start_page": 34, "end_page": 35},
    {"title": "v1_Democritean_Epicurean_Philosophy_05_Part1_Opinions_on_Relationship", "start_page": 36, "end_page": 37},
    {"title": "v1_Democritean_Epicurean_Philosophy_06_Part1_Difficulties_Concerning_Identity", "start_page": 38, "end_page": 45},
    {"title": "v1_Democritean_Epicurean_Philosophy_07_Part2_Ch1_Declination_of_Atom", "start_page": 46, "end_page": 52},
    {"title": "v1_Democritean_Epicurean_Philosophy_08_Part2_Ch2_Qualities_of_Atom", "start_page": 53, "end_page": 57},
    {"title": "v1_Democritean_Epicurean_Philosophy_09_Part2_Ch3_Atomen_and_Stoicheia", "start_page": 58, "end_page": 62},
    {"title": "v1_Democritean_Epicurean_Philosophy_10_Part2_Ch4_Time", "start_page": 63, "end_page": 65},
    {"title": "v1_Democritean_Epicurean_Philosophy_11_Part2_Ch5_The_Meteors", "start_page": 66, "end_page": 73},
    {"title": "v1_Democritean_Epicurean_Philosophy_12_Fragment_Individual_Immortality", "start_page": 74, "end_page": 76},
    {"title": "v1_Democritean_Epicurean_Philosophy_13_Notes_Part_One", "start_page": 77, "end_page": 88},
    {"title": "v1_Democritean_Epicurean_Philosophy_14_Notes_Part_Two", "start_page": 89, "end_page": 101},
    {"title": "v1_Democritean_Epicurean_Philosophy_15_Appendix", "start_page": 102, "end_page": 105},
    {"title": "v1_Democritean_Epicurean_Philosophy_16_Draft_of_New_Preface", "start_page": 106, "end_page": 108},
    
    {"title": "v1_Comments_on_Latest_Prussian_Censorship_Instruction", "start_page": 109, "end_page": 131},
    {"title": "v1_Rhine_Province_Assembly_First_Article_Press_Freedom", "start_page": 132, "end_page": 181},
    {"title": "v1_Question_of_Centralisation_Rheinische_Zeitung", "start_page": 182, "end_page": 183},
    {"title": "v1_Leading_Article_No179_Kolnische_Zeitung", "start_page": 184, "end_page": 202},
    {"title": "v1_Philosophical_Manifesto_Historical_School_of_Law", "start_page": 203, "end_page": 210},
    {"title": "v1_Yet_Another_Word_on_Bruno_Bauer_Academic_Freedom", "start_page": 211, "end_page": 214},
    {"title": "v1_Communism_and_Augsburg_Allgemeine_Zeitung", "start_page": 215, "end_page": 221},
    {"title": "v1_Communism_Augsburg_Editorial_Note", "start_page": 222, "end_page": 223},
    {"title": "v1_Rhine_Province_Assembly_Third_Article_Thefts_of_Wood", "start_page": 224, "end_page": 263},
    {"title": "v1_Liberal_Opposition_Hanover_Editorial_Note", "start_page": 264, "end_page": 265},
    {"title": "v1_Communal_Reform_and_Kolnische_Zeitung", "start_page": 266, "end_page": 273},
    {"title": "v1_Divorce_Bill_Editorial_Note", "start_page": 274, "end_page": 276},
    {"title": "v1_Correspondent_Kolnische_vs_Rheinische_Zeitung", "start_page": 277, "end_page": 279},
    {"title": "v1_Cabinet_Order_on_Daily_Press", "start_page": 280, "end_page": 281},
    {"title": "v1_Renards_Letter_to_Oberpraesident_von_Schaper", "start_page": 282, "end_page": 285},
    {"title": "v1_Industrialists_Hanover_Protective_Tariffs", "start_page": 286, "end_page": 286},
    {"title": "v1_Attitude_Herwegh_Ruge_to_The_Free", "start_page": 287, "end_page": 287},
    {"title": "v1_Polemical_Tactics_Augsburg_Newspaper", "start_page": 288, "end_page": 291},
    {"title": "v1_Supplement_Nos335_336_Augsburg_Allgemeine_Zeitung", "start_page": 292, "end_page": 306},
    {"title": "v1_The_Divorce_Bill", "start_page": 307, "end_page": 310},
    {"title": "v1_Ban_on_Leipziger_Allgemeine_Zeitung", "start_page": 311, "end_page": 330},
    {"title": "v1_Announcement_Editors_Rheinische_Zeitung_Reply", "start_page": 331, "end_page": 331},
    {"title": "v1_Justification_Correspondent_from_Mosel", "start_page": 332, "end_page": 358},
    {"title": "v1_Polemical_Articles_Against_Allgemeine_Zeitung", "start_page": 359, "end_page": 360},
    {"title": "v1_Marginal_Notes_Accusations_Ministerial_Rescript", "start_page": 361, "end_page": 365},
    {"title": "v1_Local_Election_Deputies_Provincial_Assembly", "start_page": 366, "end_page": 369},
    {"title": "v1_Rhein_Mosel_Zeitung_as_Grand_Inquisitor", "start_page": 370, "end_page": 372},
    {"title": "v1_Stylistic_Exercises_Rhein_Mosel_Zeitung", "start_page": 373, "end_page": 375},
    {"title": "v1_Announcement_March_17_1843", "start_page": 376, "end_page": 376},
    
    {"title": "v1_Letter_01_to_Carl_Friedrich_Bachmann_Apr6_1841", "start_page": 379, "end_page": 379},
    {"title": "v1_Letter_02_to_Oscar_Ludwig_Wolff_Apr7_1841", "start_page": 380, "end_page": 380},
    {"title": "v1_Letter_03_to_Arnold_Ruge_Feb10_1842", "start_page": 381, "end_page": 381},
    {"title": "v1_Letter_04_to_Arnold_Ruge_Mar5_1842", "start_page": 382, "end_page": 382},
    {"title": "v1_Letter_05_to_Arnold_Ruge_Mar20_1842", "start_page": 383, "end_page": 386},
    {"title": "v1_Letter_06_to_Arnold_Ruge_Apr27_1842", "start_page": 387, "end_page": 388},
    {"title": "v1_Letter_07_to_Arnold_Ruge_Jul9_1842", "start_page": 389, "end_page": 390},
    {"title": "v1_Letter_08_to_Dagobert_Oppenheim_Aug25_1842", "start_page": 391, "end_page": 392},
    {"title": "v1_Letter_09_to_Arnold_Ruge_Nov30_1842", "start_page": 393, "end_page": 395},
    {"title": "v1_Letter_10_to_Arnold_Ruge_Jan25_1843", "start_page": 396, "end_page": 397},
    {"title": "v1_Letter_11_to_Arnold_Ruge_Mar13_1843", "start_page": 398, "end_page": 402},
    
    {"title": "v1_Epicurean_Philosophy_Notebook_01_First", "start_page": 405, "end_page": 416},
    {"title": "v1_Epicurean_Philosophy_Notebook_02_Second", "start_page": 417, "end_page": 441},
    {"title": "v1_Epicurean_Philosophy_Notebook_03_Third", "start_page": 442, "end_page": 463},
    {"title": "v1_Epicurean_Philosophy_Notebook_04_Fourth", "start_page": 464, "end_page": 478},
    {"title": "v1_Epicurean_Philosophy_Notebook_05_Fifth", "start_page": 479, "end_page": 488},
    {"title": "v1_Epicurean_Philosophy_Notebook_06_Sixth", "start_page": 489, "end_page": 500},
    {"title": "v1_Epicurean_Philosophy_Notebook_07_Seventh", "start_page": 501, "end_page": 509},
    {"title": "v1_Plan_of_Hegels_Philosophy_of_Nature", "start_page": 510, "end_page": 516},
    
    {"title": "v1_Poems_to_Jenny_01_Concluding_Sonnets", "start_page": 517, "end_page": 520},
    {"title": "v1_Poems_to_Jenny_02_To_Jenny", "start_page": 521, "end_page": 521},
    {"title": "v1_Poems_to_Jenny_03_To_Jenny_Second", "start_page": 521, "end_page": 522},
    {"title": "v1_Poems_to_Jenny_04_My_World", "start_page": 523, "end_page": 524},
    {"title": "v1_Poems_to_Jenny_05_Feelings", "start_page": 525, "end_page": 527},
    {"title": "v1_Poems_to_Jenny_06_Transformation", "start_page": 528, "end_page": 530},
    
    {"title": "v1_Book_of_Verse_to_Father_01_Contents", "start_page": 533, "end_page": 533},
    {"title": "v1_Book_of_Verse_to_Father_02_To_My_Father", "start_page": 534, "end_page": 534},
    {"title": "v1_Book_of_Verse_to_Father_03_Creation", "start_page": 534, "end_page": 534},
    {"title": "v1_Book_of_Verse_to_Father_04_Poetry", "start_page": 535, "end_page": 535},
    {"title": "v1_Book_of_Verse_to_Father_05_The_Forest_Spring", "start_page": 535, "end_page": 535},
    {"title": "v1_Book_of_Verse_to_Father_06_The_Magic_Harp", "start_page": 536, "end_page": 536},
    {"title": "v1_Book_of_Verse_to_Father_07_The_Abduction", "start_page": 537, "end_page": 537},
    {"title": "v1_Book_of_Verse_to_Father_08_Yearning", "start_page": 538, "end_page": 538},
    {"title": "v1_Book_of_Verse_to_Father_09_Viennese_Ape_Theatre_Berlin", "start_page": 539, "end_page": 539},
    {"title": "v1_Book_of_Verse_to_Father_10_Sir_Glucks_Armide", "start_page": 540, "end_page": 540},
    {"title": "v1_Book_of_Verse_to_Father_11_Terms_of_Engagement", "start_page": 540, "end_page": 540},
    {"title": "v1_Book_of_Verse_to_Father_12_Sentimental_Souls", "start_page": 540, "end_page": 540},
    {"title": "v1_Book_of_Verse_to_Father_13_Romanticism_a_la_Mode", "start_page": 541, "end_page": 541},
    {"title": "v1_Book_of_Verse_to_Father_14_To_Sun_of_Truth_Quednow", "start_page": 541, "end_page": 541},
    {"title": "v1_Book_of_Verse_to_Father_15_On_Certain_Knight_Hero", "start_page": 541, "end_page": 541},
    {"title": "v1_Book_of_Verse_to_Father_16_To_My_Neighbour_Across_Street", "start_page": 541, "end_page": 541},
    {"title": "v1_Book_of_Verse_to_Father_17_Siren_Song", "start_page": 542, "end_page": 544},
    {"title": "v1_Book_of_Verse_to_Father_18_Philistine_Wonders", "start_page": 545, "end_page": 545},
    {"title": "v1_Book_of_Verse_to_Father_19_Mathematical_Wisdom", "start_page": 545, "end_page": 545},
    {"title": "v1_Book_of_Verse_to_Father_20_Little_Old_Man_of_Water", "start_page": 546, "end_page": 546},
    {"title": "v1_Book_of_Verse_to_Father_21_To_Medical_Students", "start_page": 547, "end_page": 547},
    {"title": "v1_Book_of_Verse_to_Father_22_Medical_Student_Psychology", "start_page": 547, "end_page": 547},
    {"title": "v1_Book_of_Verse_to_Father_23_Medical_Student_Metaphysics", "start_page": 547, "end_page": 547},
    {"title": "v1_Book_of_Verse_to_Father_24_Medical_Student_Anthropology", "start_page": 547, "end_page": 547},
    {"title": "v1_Book_of_Verse_to_Father_25_Medical_Student_Ethics", "start_page": 548, "end_page": 548},
    {"title": "v1_Book_of_Verse_to_Father_26_First_Elegy_Ovids_Tristia", "start_page": 548, "end_page": 556},
    {"title": "v1_Book_of_Verse_to_Father_27_Concluding_Sonnet_to_Jenny", "start_page": 557, "end_page": 557},
    {"title": "v1_Book_of_Verse_to_Father_28_The_Madwoman", "start_page": 558, "end_page": 558},
    {"title": "v1_Book_of_Verse_to_Father_29_Two_Songs_to_Jenny", "start_page": 559, "end_page": 559},
    {"title": "v1_Book_of_Verse_to_Father_30_Sought", "start_page": 559, "end_page": 559},
    {"title": "v1_Book_of_Verse_to_Father_31_Found", "start_page": 559, "end_page": 559},
    {"title": "v1_Book_of_Verse_to_Father_32_Flower_King", "start_page": 560, "end_page": 560},
    {"title": "v1_Book_of_Verse_to_Father_33_Sea_Rock", "start_page": 561, "end_page": 561},
    {"title": "v1_Book_of_Verse_to_Father_34_The_Awakening", "start_page": 562, "end_page": 562},
    {"title": "v1_Book_of_Verse_to_Father_35_Night_Thoughts", "start_page": 563, "end_page": 563},
    {"title": "v1_Book_of_Verse_to_Father_36_Invocation_One_in_Despair", "start_page": 563, "end_page": 563},
    {"title": "v1_Book_of_Verse_to_Father_37_Three_Little_Lights", "start_page": 564, "end_page": 564},
    {"title": "v1_Book_of_Verse_to_Father_38_Man_in_the_Moon", "start_page": 565, "end_page": 565},
    {"title": "v1_Book_of_Verse_to_Father_39_Lucinda", "start_page": 565, "end_page": 570},
    {"title": "v1_Book_of_Verse_to_Father_40_Dialogue_with", "start_page": 571, "end_page": 571},
    {"title": "v1_Book_of_Verse_to_Father_41_The_Last_Judgment", "start_page": 572, "end_page": 573},
    {"title": "v1_Book_of_Verse_to_Father_42_Two_Singers_Harp", "start_page": 574, "end_page": 574},
    {"title": "v1_Book_of_Verse_to_Father_43_Epigrams_I_to_VIII", "start_page": 575, "end_page": 579},
    {"title": "v1_Book_of_Verse_to_Father_44_Concluding_Epigram_Puff_Pastry_Cook", "start_page": 580, "end_page": 580},
    {"title": "v1_Book_of_Verse_to_Father_45_Harmony", "start_page": 580, "end_page": 580},
    {"title": "v1_Book_of_Verse_to_Father_46_Distraught", "start_page": 581, "end_page": 582},
    {"title": "v1_Book_of_Verse_to_Father_47_Man_and_Drum", "start_page": 583, "end_page": 583},
    {"title": "v1_Book_of_Verse_to_Father_48_Human_Pride", "start_page": 584, "end_page": 586},
    {"title": "v1_Book_of_Verse_to_Father_49_Evening_Stroll", "start_page": 587, "end_page": 587},
    {"title": "v1_Book_of_Verse_to_Father_50_Scenes_from_Oulanem_Tragedy", "start_page": 588, "end_page": 607},
    {"title": "v1_Book_of_Verse_to_Father_51_Song_to_the_Stars", "start_page": 608, "end_page": 608},
    {"title": "v1_Book_of_Verse_to_Father_52_Dream_Vision", "start_page": 609, "end_page": 609},
    {"title": "v1_Book_of_Verse_to_Father_53_Song_of_Sailor_at_Sea", "start_page": 610, "end_page": 610},
    {"title": "v1_Book_of_Verse_to_Father_54_The_Magic_Ship", "start_page": 611, "end_page": 611},
    {"title": "v1_Book_of_Verse_to_Father_55_The_Pale_Maiden", "start_page": 612, "end_page": 615},
    {"title": "v1_Book_of_Verse_to_Father_56_Scorpion_and_Felix_Humoristic_Novel", "start_page": 616, "end_page": 634}
]

    
def main():
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 1_ Ka - Karl Marx.pdf")
    #you can modify this line based on the position of your file
    
    output_dir = "marx_chapters_v1"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 1_ Ka - Karl Marx.pdf
folder_name: marx_chapters_v1
expected_file_numbers 130

[1/130] is processing: v1_Reflections_of_a_Young_Man_on_Choice_of_Profession
✓ generated：marx_chapters_v1/v1_Reflections_of_a_Young_Man_on_Choice_of_Profession.pdf (page_num 3-9 -> PDF_page 37-43)
[2/130] is processing: v1_Letter_from_Marx_to_His_Father
✓ generated：marx_chapters_v1/v1_Letter_from_Marx_to_His_Father.pdf (page_num 10-21 -> PDF_page 44-55)
[3/130] is processing: v1_Wild_Songs_01_The_Fiddler
✓ generated：marx_chapters_v1/v1_Wild_Songs_01_The_Fiddler.pdf (page_num 22-22 -> PDF_page 56-56)
[4/130] is processing: v1_Wild_Songs_02_Nocturnal_Love
✓ generated：marx_chapters_v1/v1_Wild_Songs_02_Nocturnal_Love.pdf (page_num 23-24 -> PDF_page 57-58)
[5/130] is processing: v1_Democritean_Epicurean_Philosophy_01_Dedication
✓ generated：marx_chapters_v1/v1_Democritean_Epicurean_Philosophy_01_Dedication.pdf (page_num 27-28 -> PDF

In [1]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers (page 3 is page 26 in the pdf file)
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 22  # page 3 starts at PDF page 26, so offset is 23
        pdf_end = end_page + 22
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
    # Karl Marx Works (March 1843-August 1844)
    {"title": "v3_Contribution_to_Critique_of_Hegels_Philosophy_of_Law", "start_page": 3, "end_page": 129},
    {"title": "v3_Passage_from_Kreuznach_Notebooks_1843", "start_page": 130, "end_page": 130},
    {"title": "v3_Draft_Programme_Deutsch_Franzosische_Jahrbucher", "start_page": 131, "end_page": 131},
    {"title": "v3_Letter_to_Editor_Democratie_pacifique", "start_page": 132, "end_page": 132},
    {"title": "v3_Letters_from_Deutsch_Franzosische_Jahrbucher", "start_page": 133, "end_page": 145},
    {"title": "v3_On_the_Jewish_Question", "start_page": 146, "end_page": 174},
    {"title": "v3_Contribution_to_Critique_Hegels_Philosophy_Law_Introduction", "start_page": 175, "end_page": 187},
    {"title": "v3_Letter_to_Editor_Allgemeine_Zeitung_Augsburg", "start_page": 188, "end_page": 188},
    {"title": "v3_Critical_Marginal_Notes_King_of_Prussia_Social_Reform", "start_page": 189, "end_page": 206},
    {"title": "v3_Illustrations_Latest_Exercise_Cabinet_Style_Frederick_William_IV", "start_page": 207, "end_page": 210},
    {"title": "v3_Comments_on_James_Mill_Elements_economie_politique", "start_page": 211, "end_page": 228},
    {"title": "v3_Economic_Philosophic_Manuscripts_1844_Preface", "start_page": 229, "end_page": 234},
    {"title": "v3_Economic_Philosophic_Manuscripts_First_Manuscript", "start_page": 235, "end_page": 245},
    {"title": "v3_Economic_Philosophic_Manuscripts_Wages_of_Labour", "start_page": 235, "end_page": 245},
    {"title": "v3_Economic_Philosophic_Manuscripts_Profit_of_Capital", "start_page": 246, "end_page": 249},
    {"title": "v3_Economic_Philosophic_Manuscripts_Capital", "start_page": 246, "end_page": 246},
    {"title": "v3_Economic_Philosophic_Manuscripts_Profit_of_Capital_Section", "start_page": 247, "end_page": 249},
    {"title": "v3_Economic_Philosophic_Manuscripts_Rule_Capital_Over_Labour", "start_page": 250, "end_page": 250},
    {"title": "v3_Economic_Philosophic_Manuscripts_Accumulation_of_Capitals", "start_page": 250, "end_page": 258},
    {"title": "v3_Economic_Philosophic_Manuscripts_Rent_of_Land", "start_page": 259, "end_page": 269},
    {"title": "v3_Economic_Philosophic_Manuscripts_Estranged_Labour", "start_page": 270, "end_page": 282},
    {"title": "v3_Economic_Philosophic_Manuscripts_Second_Manuscript", "start_page": 283, "end_page": 289},
    {"title": "v3_Economic_Philosophic_Manuscripts_Antithesis_Capital_Labour", "start_page": 283, "end_page": 289},
    {"title": "v3_Economic_Philosophic_Manuscripts_Third_Manuscript", "start_page": 290, "end_page": 325},
    {"title": "v3_Economic_Philosophic_Manuscripts_Private_Property_Labour", "start_page": 290, "end_page": 292},
    {"title": "v3_Economic_Philosophic_Manuscripts_Private_Property_Communism", "start_page": 293, "end_page": 305},
    {"title": "v3_Economic_Philosophic_Manuscripts_Human_Requirements_Division_Labour", "start_page": 306, "end_page": 321},
    {"title": "v3_Economic_Philosophic_Manuscripts_Power_of_Money", "start_page": 322, "end_page": 325},
    {"title": "v3_Economic_Philosophic_Manuscripts_Critique_Hegelian_Dialectic", "start_page": 326, "end_page": 348},
    
    # Letters (October 1843-August 1844)
    {"title": "v3_Letter_to_Ludwig_Feuerbach_Oct_3_1843", "start_page": 349, "end_page": 350},
    {"title": "v3_Letter_to_Julius_Frobel_Nov_21_1843", "start_page": 351, "end_page": 353},
    {"title": "v3_Letter_to_Ludwig_Feuerbach_Aug_11_1844", "start_page": 354, "end_page": 360},
    
    # From the Preparatory Materials
    {"title": "v3_From_Memoires_de_R_Levasseur_De_La_Sarthe_Paris_1829", "start_page": 361, "end_page": 374},
    {"title": "v3_Summary_Frederick_Engels_Article_Outlines_Critique_Political_Economy", "start_page": 375, "end_page": 378}
]

    
def main():
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 3_ Ka - Karl Marx.pdf")
    #you can modify this line based on the position of your file
    
    output_dir = "marx_chapters_v3"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 3_ Ka - Karl Marx.pdf
folder_name: marx_chapters_v3
expected_file_numbers 34

[1/34] is processing: v3_Contribution_to_Critique_of_Hegels_Philosophy_of_Law
✓ generated：marx_chapters_v3/v3_Contribution_to_Critique_of_Hegels_Philosophy_of_Law.pdf (page_num 3-129 -> PDF_page 26-152)
[2/34] is processing: v3_Passage_from_Kreuznach_Notebooks_1843
✓ generated：marx_chapters_v3/v3_Passage_from_Kreuznach_Notebooks_1843.pdf (page_num 130-130 -> PDF_page 153-153)
[3/34] is processing: v3_Draft_Programme_Deutsch_Franzosische_Jahrbucher
✓ generated：marx_chapters_v3/v3_Draft_Programme_Deutsch_Franzosische_Jahrbucher.pdf (page_num 131-131 -> PDF_page 154-154)
[4/34] is processing: v3_Letter_to_Editor_Democratie_pacifique
✓ generated：marx_chapters_v3/v3_Letter_to_Editor_Democratie_pacifique.pdf (page_num 132-132 -> PDF_page 155-155)
[5/34] is processing: v3_Letters_from_Deutsch_Franzosische_Jahrbucher
✓ generated：marx_c

In [5]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 25 
        pdf_end = end_page + 25
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
    # The Holy Family (pages 5-212)
    {"title": "v4_Holy_Family_Preface", "start_page": 5, "end_page": 6},
    {"title": "v4_Holy_Family_Foreword", "start_page": 7, "end_page": 8},
    {"title": "v4_Holy_Family_Ch1_Critical_Criticism_as_Master_Bookbinder", "start_page": 9, "end_page": 11},
    {"title": "v4_Holy_Family_Ch2_Critical_Criticism_as_Mill_Owner", "start_page": 12, "end_page": 16},
    {"title": "v4_Holy_Family_Ch3_Thoroughness_of_Critical_Criticism", "start_page": 17, "end_page": 18},
    {"title": "v4_Holy_Family_Ch4_Critical_Criticism_as_Tranquillity_of_Knowledge", "start_page": 19, "end_page": 54},
    {"title": "v4_Holy_Family_Ch4_1_Flora_Tristan_Union_Ouvriere", "start_page": 19, "end_page": 19},
    {"title": "v4_Holy_Family_Ch4_2_Beraud_on_Prostitutes", "start_page": 20, "end_page": 20},
    {"title": "v4_Holy_Family_Ch4_3_Love", "start_page": 20, "end_page": 22},
    {"title": "v4_Holy_Family_Ch4_4_Proudhon", "start_page": 23, "end_page": 54},
    {"title": "v4_Holy_Family_Ch5_Critical_Criticism_as_Mystery_Monger", "start_page": 55, "end_page": 77},
    {"title": "v4_Holy_Family_Ch5_1_Mystery_of_Degeneracy_in_Civilisation", "start_page": 56, "end_page": 56},
    {"title": "v4_Holy_Family_Ch5_2_Mystery_of_Speculative_Construction", "start_page": 57, "end_page": 60},
    {"title": "v4_Holy_Family_Ch5_3_Mystery_of_Educated_Society", "start_page": 61, "end_page": 68},
    {"title": "v4_Holy_Family_Ch5_4_Mystery_of_Probity_and_Piety", "start_page": 69, "end_page": 71},
    {"title": "v4_Holy_Family_Ch5_5_Mystery_a_Mockery", "start_page": 72, "end_page": 74},
    {"title": "v4_Holy_Family_Ch5_6_Turtle_Dove_Rigolette", "start_page": 75, "end_page": 75},
    {"title": "v4_Holy_Family_Ch5_7_World_System_of_Mysteries_of_Paris", "start_page": 76, "end_page": 77},
    {"title": "v4_Holy_Family_Ch6_Absolute_Critical_Criticism", "start_page": 78, "end_page": 143},
    {"title": "v4_Holy_Family_Ch6_1_Absolute_Criticisms_First_Campaign", "start_page": 78, "end_page": 91},
    {"title": "v4_Holy_Family_Ch6_2_Absolute_Criticisms_Second_Campaign", "start_page": 92, "end_page": 98},
    {"title": "v4_Holy_Family_Ch6_3_Absolute_Criticisms_Third_Campaign", "start_page": 99, "end_page": 143},
    {"title": "v4_Holy_Family_Ch7_Critical_Criticisms_Correspondence", "start_page": 144, "end_page": 161},
    {"title": "v4_Holy_Family_Ch7_1_Critical_Mass", "start_page": 144, "end_page": 147},
    {"title": "v4_Holy_Family_Ch7_2_Un_Critical_Mass_and_Critical_Criticism", "start_page": 148, "end_page": 153},
    {"title": "v4_Holy_Family_Ch7_3_Un_Critically_Critical_Mass", "start_page": 154, "end_page": 161},
    {"title": "v4_Holy_Family_Ch8_Earthly_Course_Critical_Criticism_as_Rudolph", "start_page": 162, "end_page": 209},
    {"title": "v4_Holy_Family_Ch8_1_Critical_Transformation_Butcher_into_Dog", "start_page": 163, "end_page": 165},
    {"title": "v4_Holy_Family_Ch8_2_Revelation_Mystery_Critical_Religion", "start_page": 166, "end_page": 175},
    {"title": "v4_Holy_Family_Ch8_3_Revelation_Mysteries_of_Law", "start_page": 176, "end_page": 190},
    {"title": "v4_Holy_Family_Ch8_4_Revealed_Mystery_of_Standpoint", "start_page": 191, "end_page": 192},
    {"title": "v4_Holy_Family_Ch8_5_Revelation_Mystery_Utilisation_Human_Impulses", "start_page": 193, "end_page": 194},
    {"title": "v4_Holy_Family_Ch8_6_Revelation_Mystery_Emancipation_of_Women", "start_page": 195, "end_page": 195},
    {"title": "v4_Holy_Family_Ch8_7_Revelation_Political_Economic_Mysteries", "start_page": 196, "end_page": 200},
    {"title": "v4_Holy_Family_Ch8_8_Rudolph_Revealed_Mystery_All_Mysteries", "start_page": 201, "end_page": 209},
    {"title": "v4_Holy_Family_Ch9_Critical_Last_Judgment", "start_page": 210, "end_page": 211},
    {"title": "v4_Holy_Family_Historical_Epilogue", "start_page": 211, "end_page": 212},
    
    # Marx works (pages 265-295)
    {"title": "v4_Marx_Draft_Article_Friedrich_List_Das_nationale_System", "start_page": 265, "end_page": 294},
    {"title": "v4_Marx_Draft_Article_General_Characterisation_of_List", "start_page": 265, "end_page": 276},
    {"title": "v4_Marx_Draft_Article_Theory_Productive_Forces_Exchange_Values", "start_page": 277, "end_page": 285},
    {"title": "v4_Marx_Draft_Article_Problem_of_Land_Rent", "start_page": 286, "end_page": 289},
    {"title": "v4_Marx_Draft_Article_Herr_List_and_Ferrier", "start_page": 290, "end_page": 294},
    {"title": "v4_Engels_Condition_Working_Class_England_From_Personal_Observation", "start_page": 295, "end_page": 295},
    
    # More Marx/Engels works (pages 597-613)
    {"title": "v4_Marx_Peuchet_On_Suicide", "start_page": 597, "end_page": 612},
    {"title": "v4_Engels_Fragment_Fourier_on_Trade", "start_page": 613, "end_page": 613},
    
    # From the Preparatory Materials (pages 665-679)
    {"title": "v4_Marx_Hegels_Construction_of_Phenomenology", "start_page": 665, "end_page": 665},
    {"title": "v4_Marx_Draft_Plan_Work_on_Modern_State", "start_page": 666, "end_page": 666},
    {"title": "v4_Marx_Plan_Library_Best_Foreign_Socialist_Writers", "start_page": 667, "end_page": 667},
    {"title": "v4_Marx_From_the_Notebook", "start_page": 668, "end_page": 679}
]

    
def main():
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 4_ Ma - Karl Marx.pdf")
    
    output_dir = "marx_chapters_v4"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 4_ Ma - Karl Marx.pdf
folder_name: marx_chapters_v4
expected_file_numbers 49

[1/49] is processing: v4_Holy_Family_Preface
✓ generated：marx_chapters_v4/v4_Holy_Family_Preface.pdf (page_num 5-6 -> PDF_page 31-32)
[2/49] is processing: v4_Holy_Family_Foreword
✓ generated：marx_chapters_v4/v4_Holy_Family_Foreword.pdf (page_num 7-8 -> PDF_page 33-34)
[3/49] is processing: v4_Holy_Family_Ch1_Critical_Criticism_as_Master_Bookbinder
✓ generated：marx_chapters_v4/v4_Holy_Family_Ch1_Critical_Criticism_as_Master_Bookbinder.pdf (page_num 9-11 -> PDF_page 35-37)
[4/49] is processing: v4_Holy_Family_Ch2_Critical_Criticism_as_Mill_Owner
✓ generated：marx_chapters_v4/v4_Holy_Family_Ch2_Critical_Criticism_as_Mill_Owner.pdf (page_num 12-16 -> PDF_page 38-42)
[5/49] is processing: v4_Holy_Family_Ch3_Thoroughness_of_Critical_Criticism
✓ generated：marx_chapters_v4/v4_Holy_Family_Ch3_Thoroughness_of_Critical_Criticism.pdf (page

In [12]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 22 
        pdf_end = end_page + 22
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
    # Karl Marx Works (pages 3-20)
    {"title": "v5_Marx_Theses_on_Feuerbach_Original_Version", "start_page": 3, "end_page": 5},
    {"title": "v5_Marx_Theses_on_Feuerbach_Edited_by_Engels", "start_page": 6, "end_page": 10},
    {"title": "v5_Engels_Feuerbach", "start_page": 11, "end_page": 14},
    {"title": "v5_Marx_Engels_Reply_to_Bruno_Bauer_Anti_Critique", "start_page": 15, "end_page": 18},
    {"title": "v5_Marx_Engels_German_Ideology_Critique_Modern_German_Philosophy", "start_page": 19, "end_page": 20},
    
    # Volume I: Critique of Modern German Philosophy (pages 21-451)
    {"title": "v5_German_Ideology_Vol1_Critique_Modern_German_Philosophy", "start_page": 21, "end_page": 22},
    {"title": "v5_German_Ideology_Preface", "start_page": 23, "end_page": 26},
    
    # I. Feuerbach (pages 27-93)
    {"title": "v5_German_Ideology_I_Feuerbach_Opposition_Materialist_Idealist", "start_page": 27, "end_page": 93},
    {"title": "v5_German_Ideology_I_1_Ideology_in_General", "start_page": 28, "end_page": 30},
    {"title": "v5_German_Ideology_I_2_Premises_Materialist_Conception_History", "start_page": 31, "end_page": 31},
    {"title": "v5_German_Ideology_I_3_Production_Intercourse_Division_Labour", "start_page": 32, "end_page": 34},
    {"title": "v5_German_Ideology_I_4_Essence_Materialist_Conception_History", "start_page": 35, "end_page": 37},
    {"title": "v5_German_Ideology_I_II_Preconditions_Real_Liberation", "start_page": 38, "end_page": 40},
    {"title": "v5_German_Ideology_I_3_Primary_Historical_Relations", "start_page": 41, "end_page": 45},
    {"title": "v5_German_Ideology_I_4_Social_Division_Labour_Consequences", "start_page": 46, "end_page": 47},
    {"title": "v5_German_Ideology_I_5_Development_Productive_Forces", "start_page": 48, "end_page": 49},
    {"title": "v5_German_Ideology_I_6_Conclusions_Materialist_Conception_History", "start_page": 50, "end_page": 52},
    {"title": "v5_German_Ideology_I_7_Summary_Materialist_Conception_History", "start_page": 53, "end_page": 54},
    {"title": "v5_German_Ideology_I_8_Inconsistency_Idealist_Conception_History", "start_page": 55, "end_page": 56},
    {"title": "v5_German_Ideology_I_9_Idealist_Conception_History_Feuerbachs_Quasi_Communism", "start_page": 57, "end_page": 58},
    {"title": "v5_German_Ideology_I_III_Ruling_Class_Ruling_Ideas", "start_page": 59, "end_page": 62},
    {"title": "v5_German_Ideology_I_IV_Instruments_Production_Forms_Property", "start_page": 63, "end_page": 93},
    
    # The Leipzig Council (pages 94-96)
    {"title": "v5_German_Ideology_Leipzig_Council", "start_page": 94, "end_page": 96},
    
    # II. Saint Bruno (pages 97-116)
    {"title": "v5_German_Ideology_II_Saint_Bruno", "start_page": 97, "end_page": 116},
    {"title": "v5_German_Ideology_II_1_Campaign_against_Feuerbach", "start_page": 97, "end_page": 104},
    {"title": "v5_German_Ideology_II_2_Saint_Bruno_Views_Struggle_Feuerbach_Stirner", "start_page": 105, "end_page": 106},
    {"title": "v5_German_Ideology_II_3_Saint_Bruno_versus_Authors_Die_Heilige_Familie", "start_page": 107, "end_page": 113},
    {"title": "v5_German_Ideology_II_4_Obituary_for_M_Hess", "start_page": 114, "end_page": 116},
    
    # III. Saint Max (pages 117-451)
    {"title": "v5_German_Ideology_III_Saint_Max", "start_page": 117, "end_page": 451},
    {"title": "v5_German_Ideology_III_1_The_Unique_and_His_Property", "start_page": 119, "end_page": 120},
    
    # The Old Testament: Man (pages 121-239)
    {"title": "v5_German_Ideology_Old_Testament_Man", "start_page": 121, "end_page": 239},
    {"title": "v5_German_Ideology_1_Book_of_Genesis", "start_page": 121, "end_page": 129},
    {"title": "v5_German_Ideology_2_Economy_Old_Testament", "start_page": 130, "end_page": 135},
    {"title": "v5_German_Ideology_3_The_Ancients", "start_page": 136, "end_page": 143},
    {"title": "v5_German_Ideology_4_The_Moderns", "start_page": 144, "end_page": 184},
    {"title": "v5_German_Ideology_5_Stirner_Delighted_in_His_Construction", "start_page": 185, "end_page": 192},
    {"title": "v5_German_Ideology_6_The_Free_Ones", "start_page": 193, "end_page": 239},
    
    # The New Testament: Ego (pages 240-426)
    {"title": "v5_German_Ideology_New_Testament_Ego", "start_page": 240, "end_page": 426},
    {"title": "v5_German_Ideology_1_Economy_New_Testament", "start_page": 240, "end_page": 241},
    {"title": "v5_German_Ideology_2_Phenomenology_Egoist_Agreement_with_Himself", "start_page": 242, "end_page": 271},
    {"title": "v5_German_Ideology_3_Revelation_John_the_Divine", "start_page": 272, "end_page": 300},
    {"title": "v5_German_Ideology_4_Peculiarity", "start_page": 301, "end_page": 314},
    {"title": "v5_German_Ideology_5_The_Owner", "start_page": 315, "end_page": 426},
    {"title": "v5_German_Ideology_6_Solomons_Song_of_Songs", "start_page": 427, "end_page": 443},
    {"title": "v5_German_Ideology_2_Apologetical_Commentary", "start_page": 444, "end_page": 450},
    {"title": "v5_German_Ideology_Close_Leipzig_Council", "start_page": 451, "end_page": 451},
    
    # Volume II: Critique of German Socialism (pages 453-583)
    {"title": "v5_German_Ideology_Vol2_Critique_German_Socialism", "start_page": 453, "end_page": 454},
    {"title": "v5_German_Ideology_True_Socialism", "start_page": 455, "end_page": 457},
    {"title": "v5_German_Ideology_I_Die_Rheinischen_Jahrbucher_Philosophy_True_Socialism", "start_page": 458, "end_page": 483},
    {"title": "v5_German_Ideology_IV_Karl_Grun_Die_Soziale_Bewegung", "start_page": 484, "end_page": 492},
    {"title": "v5_German_Ideology_Saint_Simonism", "start_page": 493, "end_page": 510},
    {"title": "v5_German_Ideology_Fourierism", "start_page": 510, "end_page": 528},
    {"title": "v5_German_Ideology_Proudhon", "start_page": 529, "end_page": 530},
    {"title": "v5_German_Ideology_V_Doctor_Georg_Kuhlmann_Prophecies_True_Socialism", "start_page": 531, "end_page": 539},
    {"title": "v5_Engels_The_True_Socialists", "start_page": 540, "end_page": 582}
]

    
def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 5_ Ma - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v5"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 5_ Ma - Karl Marx.pdf
folder_name: marx_chapters_v5
expected_file_numbers 55

[1/55] is processing: v5_Marx_Theses_on_Feuerbach_Original_Version
✓ generated：marx_chapters_v5/v5_Marx_Theses_on_Feuerbach_Original_Version.pdf (page_num 3-5 -> PDF_page 26-28)
[2/55] is processing: v5_Marx_Theses_on_Feuerbach_Edited_by_Engels
✓ generated：marx_chapters_v5/v5_Marx_Theses_on_Feuerbach_Edited_by_Engels.pdf (page_num 6-10 -> PDF_page 29-33)
[3/55] is processing: v5_Engels_Feuerbach
✓ generated：marx_chapters_v5/v5_Engels_Feuerbach.pdf (page_num 11-14 -> PDF_page 34-37)
[4/55] is processing: v5_Marx_Engels_Reply_to_Bruno_Bauer_Anti_Critique
✓ generated：marx_chapters_v5/v5_Marx_Engels_Reply_to_Bruno_Bauer_Anti_Critique.pdf (page_num 15-18 -> PDF_page 38-41)
[5/55] is processing: v5_Marx_Engels_German_Ideology_Critique_Modern_German_Philosophy
✓ generated：marx_chapters_v5/v5_Marx_Engels_German_Ideology_Critique_Modern

In [3]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 28 
        pdf_end = end_page + 28
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
    # Volume 6: Marx & Engels Collected Works 1845-1848
    {"title": "v6_Marx_Statement", "start_page": 34, "end_page": 34},
    {"title": "v6_Marx_Engels_Circular_Against_Kriege", "start_page": 35, "end_page": 35},
    {"title": "v6_Marx_Engels_Circular_Section_One_How_Communism_Became_Love_Sick", "start_page": 36, "end_page": 40},
    {"title": "v6_Marx_Engels_Circular_Section_Two_Volks_Tribun_Political_Economy_Attitude_Young_America", "start_page": 41, "end_page": 43},
    {"title": "v6_Marx_Engels_Circular_Section_Three_Metaphysical_Trumpetings", "start_page": 44, "end_page": 45},
    {"title": "v6_Marx_Engels_Circular_Section_Four_Flirtations_with_Religion", "start_page": 46, "end_page": 49},
    {"title": "v6_Marx_Engels_Circular_Section_Five_Kriege_Personal_Stand", "start_page": 50, "end_page": 51},
    {"title": "v6_Marx_Engels_Letter_from_Brussels_Communist_Correspondence_Committee_to_Kottgen", "start_page": 54, "end_page": 56},
    {"title": "v6_Engels_The_Prussian_Bank_Question", "start_page": 57, "end_page": 57},
    {"title": "v6_Marx_Engels_Address_German_Democratic_Communists_Brussels_to_Feargus_OConnor", "start_page": 58, "end_page": 60},
    {"title": "v6_Marx_Declaration_Against_Karl_Grun", "start_page": 72, "end_page": 74},
    {"title": "v6_Marx_The_Poverty_of_Philosophy_Answer_to_Philosophy_of_Poverty_by_Proudhon", "start_page": 105, "end_page": 108},
    {"title": "v6_Marx_Poverty_of_Philosophy_Foreword", "start_page": 109, "end_page": 110},
    {"title": "v6_Marx_Poverty_of_Philosophy_Chapter_I_Scientific_Discovery", "start_page": 111, "end_page": 111},
    {"title": "v6_Marx_Poverty_of_Philosophy_Section_1_Opposition_Between_Use_Value_and_Exchange_Value", "start_page": 111, "end_page": 119},
    {"title": "v6_Marx_Poverty_of_Philosophy_Section_2_Constituted_Value_or_Synthetic_Value", "start_page": 120, "end_page": 143},
    {"title": "v6_Marx_Poverty_of_Philosophy_Section_3_Application_Law_of_Proportionality_of_Value", "start_page": 144, "end_page": 144},
    {"title": "v6_Marx_Poverty_of_Philosophy_Section_3A_Money", "start_page": 144, "end_page": 151},
    {"title": "v6_Marx_Poverty_of_Philosophy_Section_3B_Surplus_Left_by_Labour", "start_page": 152, "end_page": 160},
    {"title": "v6_Marx_Poverty_of_Philosophy_Chapter_II_Metaphysics_of_Political_Economy", "start_page": 161, "end_page": 161},
    {"title": "v6_Marx_Poverty_of_Philosophy_Section_1_The_Method", "start_page": 161, "end_page": 161},
    {"title": "v6_Marx_Poverty_of_Philosophy_First_Observation", "start_page": 162, "end_page": 164},
    {"title": "v6_Marx_Poverty_of_Philosophy_Second_Observation", "start_page": 165, "end_page": 165},
    {"title": "v6_Marx_Poverty_of_Philosophy_Third_Observation", "start_page": 166, "end_page": 166},
    {"title": "v6_Marx_Poverty_of_Philosophy_Fourth_Observation", "start_page": 167, "end_page": 168},
    {"title": "v6_Marx_Poverty_of_Philosophy_Fifth_Observation", "start_page": 169, "end_page": 169},
    {"title": "v6_Marx_Poverty_of_Philosophy_Sixth_Observation", "start_page": 170, "end_page": 173},
    {"title": "v6_Marx_Poverty_of_Philosophy_Seventh_and_Last_Observation", "start_page": 174, "end_page": 177},
    {"title": "v6_Marx_Poverty_of_Philosophy_Section_2_Division_of_Labour_and_Machinery", "start_page": 178, "end_page": 189},
    {"title": "v6_Marx_Poverty_of_Philosophy_Section_3_Competition_and_Monopoly", "start_page": 190, "end_page": 196},
    {"title": "v6_Marx_Poverty_of_Philosophy_Section_4_Property_or_Rent", "start_page": 197, "end_page": 205},
    {"title": "v6_Marx_Poverty_of_Philosophy_Section_5_Strikes_and_Combinations_of_Workers", "start_page": 206, "end_page": 212},
    {"title": "v6_Engels_The_Decline_and_Approaching_Fall_of_Guizot_Position_of_French_Bourgeoisie", "start_page": 213, "end_page": 219},
    {"title": "v6_Marx_The_Communism_of_the_Rheinischer_Beobachter", "start_page": 220, "end_page": 234},
    {"title": "v6_Marx_The_Protectionists_the_Free_Traders_and_the_Working_Class", "start_page": 279, "end_page": 281},
    {"title": "v6_Marx_Moralising_Criticism_and_Critical_Morality_Contribution_to_German_Cultural_History_Contra_Karl_Heinzen", "start_page": 312, "end_page": 340},
    {"title": "v6_Marx_Engels_On_Poland_Speeches_at_International_Meeting_in_London_November_29_1847_Mark_17th_Anniversary_Polish_Uprising_1830", "start_page": 388, "end_page": 388},
    {"title": "v6_Marx_Speech_On_Poland", "start_page": 388, "end_page": 388},
    {"title": "v6_Engels_Speech_On_Poland", "start_page": 389, "end_page": 390},
    {"title": "v6_Marx_Remarks_on_the_Article_by_M_Adolphe_Bartels", "start_page": 402, "end_page": 403},
    {"title": "v6_Marx_Lamartine_and_Communism", "start_page": 404, "end_page": 405},
    {"title": "v6_Marx_Wages", "start_page": 415, "end_page": 415},
    {"title": "v6_Marx_Wages_A", "start_page": 415, "end_page": 415},
    {"title": "v6_Marx_Wages_B_Additions", "start_page": 415, "end_page": 415},
    {"title": "v6_Marx_Wages_I_Atkinson", "start_page": 415, "end_page": 415},
    {"title": "v6_Marx_Wages_II_Carlyle", "start_page": 416, "end_page": 416},
    {"title": "v6_Marx_Wages_III_McCulloch", "start_page": 416, "end_page": 418},
    {"title": "v6_Marx_Wages_IV_John_Wade", "start_page": 419, "end_page": 419},
    {"title": "v6_Marx_Wages_V_Babbage", "start_page": 420, "end_page": 420},
    {"title": "v6_Marx_Wages_VI_Andrew_Ure", "start_page": 420, "end_page": 420},
    {"title": "v6_Marx_Wages_VII_Rossi", "start_page": 421, "end_page": 421},
    {"title": "v6_Marx_Wages_VIII_Cherbuliez", "start_page": 421, "end_page": 421},
    {"title": "v6_Marx_Wages_IX_Bray_Saving_Banks", "start_page": 421, "end_page": 421},
    {"title": "v6_Marx_Wages_C", "start_page": 422, "end_page": 422},
    {"title": "v6_Marx_Wages_I_How_Does_Growth_Productive_Forces_Affect_Wages", "start_page": 422, "end_page": 422},
    {"title": "v6_Marx_Wages_II_Competition_Between_Workers_and_Employers", "start_page": 423, "end_page": 423},
    {"title": "v6_Marx_Wages_III_Competition_Among_Workers_Themselves", "start_page": 424, "end_page": 424},
    {"title": "v6_Marx_Wages_IV_Fluctuations_of_Wages", "start_page": 424, "end_page": 424},
    {"title": "v6_Marx_Wages_V_Minimum_Wage", "start_page": 425, "end_page": 425},
    {"title": "v6_Marx_Wages_VI_Suggestions_for_Remedies", "start_page": 426, "end_page": 434},
    {"title": "v6_Marx_Wages_VII_Workers_Associations", "start_page": 435, "end_page": 435},
    {"title": "v6_Marx_Wages_VIII_Positive_Aspect_of_Wage_Labour", "start_page": 436, "end_page": 437},
    {"title": "v6_Marx_Speech_on_Question_of_Free_Trade_Delivered_to_Democratic_Association_Brussels_Public_Meeting_January_9_1848", "start_page": 450, "end_page": 465},
    {"title": "v6_Marx_The_Situation_in_France", "start_page": 468, "end_page": 468},
    {"title": "v6_Marx_Engels_Manifesto_of_the_Communist_Party", "start_page": 477, "end_page": 477},
    {"title": "v6_Marx_Engels_Manifesto_I_Bourgeois_and_Proletarians", "start_page": 482, "end_page": 496},
    {"title": "v6_Marx_Engels_Manifesto_II_Proletarians_and_Communists", "start_page": 497, "end_page": 506},
    {"title": "v6_Marx_Engels_Manifesto_III_Socialist_and_Communist_Literature", "start_page": 507, "end_page": 507},
    {"title": "v6_Marx_Engels_Manifesto_III_1_Reactionary_Socialism", "start_page": 507, "end_page": 507},
    {"title": "v6_Marx_Engels_Manifesto_III_1a_Feudal_Socialism", "start_page": 507, "end_page": 508},
    {"title": "v6_Marx_Engels_Manifesto_III_1b_Petty_Bourgeois_Socialism", "start_page": 509, "end_page": 509},
    {"title": "v6_Marx_Engels_Manifesto_III_1c_German_or_True_Socialism", "start_page": 510, "end_page": 512},
    {"title": "v6_Marx_Engels_Manifesto_III_2_Conservative_or_Bourgeois_Socialism", "start_page": 513, "end_page": 513},
    {"title": "v6_Marx_Engels_Manifesto_III_3_Critical_Utopian_Socialism_and_Communism", "start_page": 514, "end_page": 517},
    {"title": "v6_Marx_Engels_Manifesto_IV_Position_of_Communists_in_Relation_to_Various_Existing_Opposition_Parties", "start_page": 518, "end_page": 519},
    {"title": "v6_Marx_The_Debat_social_of_February_6_on_the_Democratic_Association", "start_page": 537, "end_page": 539},
    {"title": "v6_Marx_Engels_On_the_Polish_Question", "start_page": 545, "end_page": 545},
    {"title": "v6_Marx_Speech_on_Polish_Question", "start_page": 545, "end_page": 548},
    {"title": "v6_Engels_Speech_on_Polish_Question", "start_page": 549, "end_page": 552},
    {"title": "v6_Marx_To_the_Editor_of_La_Reforme", "start_page": 564, "end_page": 566},
    {"title": "v6_Marx_Persecution_of_Foreigners_in_Brussels", "start_page": 567, "end_page": 568},
    
    # From the Preparatory Materials
    {"title": "v6_Marx_Protectionists", "start_page": 573, "end_page": 573},
    {"title": "v6_Marx_Demand", "start_page": 574, "end_page": 575},
    {"title": "v6_Marx_Draft_Plan_for_Section_III_of_Manifesto_of_Communist_Party", "start_page": 576, "end_page": 576},
    {"title": "v6_Marx_Page_from_Rough_Draft_of_Manifesto_of_Communist_Party", "start_page": 577, "end_page": 580},
    {"title": "v6_Marx_Notes_on_the_Arrest_Maltreatment_and_Expulsion_of_Wilhelm_Wolff_by_the_Brussels_Police", "start_page": 581, "end_page": 584}
]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 6_ Ma - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v6"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0

        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 6_ Ma - Karl Marx.pdf
folder_name: marx_chapters_v6
expected_file_numbers 86

[1/86] is processing: v6_Marx_Statement
✓ generated：marx_chapters_v6/v6_Marx_Statement.pdf (page_num 34-34 -> PDF_page 63-63)
[2/86] is processing: v6_Marx_Engels_Circular_Against_Kriege
✓ generated：marx_chapters_v6/v6_Marx_Engels_Circular_Against_Kriege.pdf (page_num 35-35 -> PDF_page 64-64)
[3/86] is processing: v6_Marx_Engels_Circular_Section_One_How_Communism_Became_Love_Sick
✓ generated：marx_chapters_v6/v6_Marx_Engels_Circular_Section_One_How_Communism_Became_Love_Sick.pdf (page_num 36-40 -> PDF_page 65-69)
[4/86] is processing: v6_Marx_Engels_Circular_Section_Two_Volks_Tribun_Political_Economy_Attitude_Young_America
✓ generated：marx_chapters_v6/v6_Marx_Engels_Circular_Section_Two_Volks_Tribun_Political_Economy_Attitude_Young_America.pdf (page_num 41-43 -> PDF_page 70-72)
[5/86] is processing: v6_Marx_Engels_Circular_Secti

In [8]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 30
        pdf_end = end_page + 30
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
    # Opening Documents (Items 1-4)
    {"title": "v7_Demands_of_the_Communist_Party_in_Germany", "start_page": 3, "end_page": 7},
    {"title": "v7_Letter_to_Etienne_Cabet_Editor_of_the_Populaire", "start_page": 8, "end_page": 9},
    {"title": "v7_To_the_Committee_of_the_German_Democratic_Society_in_Paris", "start_page": 10, "end_page": 10},
    {"title": "v7_To_the_Editor_of_the_Newspaper_L_Alba", "start_page": 11, "end_page": 14},
    
    # June 1848 Articles (Items 5-51)
    {"title": "v7_Statement_of_the_Editorial_Board_of_the_Neue_Rheinische_Zeitung", "start_page": 15, "end_page": 15},
    {"title": "v7_The_Assembly_at_Frankfurt", "start_page": 16, "end_page": 19},
    {"title": "v7_Huser", "start_page": 20, "end_page": 23},
    {"title": "v7_The_Latest_Heroic_Deed_of_the_House_of_Bourbon", "start_page": 24, "end_page": 26},
    {"title": "v7_The_Democratic_Party", "start_page": 27, "end_page": 29},
    {"title": "v7_Camphausen_Statement_at_the_Session_of_May_30", "start_page": 30, "end_page": 33},
    {"title": "v7_Defeat_of_the_German_Troops_at_Sundewitt", "start_page": 34, "end_page": 35},
    {"title": "v7_Questions_of_Life_and_Death", "start_page": 36, "end_page": 38},
    {"title": "v7_The_Camphausen_Government", "start_page": 39, "end_page": 40},
    {"title": "v7_The_Question_of_Union", "start_page": 41, "end_page": 41},
    {"title": "v7_The_War_Comedy", "start_page": 42, "end_page": 44},
    {"title": "v7_The_Reaction", "start_page": 45, "end_page": 45},
    {"title": "v7_Comite_de_surete_generale_in_Berlin", "start_page": 46, "end_page": 47},
    {"title": "v7_The_Programmes_of_the_Radical_Democratic_Party_and_of_the_Left_at_Frankfurt", "start_page": 48, "end_page": 52},
    {"title": "v7_The_Agreement_Debates_in_Berlin", "start_page": 53, "end_page": 56},
    {"title": "v7_The_Agreement_Debates", "start_page": 57, "end_page": 61},
    {"title": "v7_The_Question_of_the_Address", "start_page": 62, "end_page": 63},
    {"title": "v7_A_New_Partition_of_Poland", "start_page": 64, "end_page": 65},
    {"title": "v7_The_Shield_of_the_Dynasty", "start_page": 66, "end_page": 67},
    {"title": "v7_Cologne_in_Danger", "start_page": 68, "end_page": 71},
    {"title": "v7_An_Admission_of_Incompetence_by_the_Assemblies_of_Frankfurt_and_Berlin", "start_page": 72, "end_page": 72},
    {"title": "v7_The_Berlin_Debate_on_the_Revolution", "start_page": 73, "end_page": 86},
    {"title": "v7_The_Position_of_the_Parties_in_Cologne", "start_page": 87, "end_page": 88},
    {"title": "v7_The_Agreement_Assembly_of_June_15", "start_page": 89, "end_page": 90},
    {"title": "v7_The_Prague_Uprising", "start_page": 91, "end_page": 93},
    {"title": "v7_Valdenaire_Arrest_Sebaldt", "start_page": 94, "end_page": 95},
    {"title": "v7_The_Agreement_Assembly_Session_of_June_17", "start_page": 96, "end_page": 100},
    {"title": "v7_The_Stupp_Amendment", "start_page": 101, "end_page": 103},
    {"title": "v7_A_New_Policy_in_Posen", "start_page": 104, "end_page": 105},
    {"title": "v7_The_Downfall_of_the_Camphausen_Government", "start_page": 106, "end_page": 106},
    {"title": "v7_The_Downfall_of_the_Camphausen_Government_2", "start_page": 107, "end_page": 108},
    {"title": "v7_The_First_Deed_of_the_German_National_Assembly_in_Frankfurt", "start_page": 109, "end_page": 110},
    {"title": "v7_The_Hansemann_Government", "start_page": 111, "end_page": 112},
    {"title": "v7_The_Neue_Berliner_Zeitung_on_the_Chartists", "start_page": 113, "end_page": 114},
    {"title": "v7_Threat_of_the_Gervinus_Zeitung", "start_page": 115, "end_page": 116},
    {"title": "v7_Patow_Redemption_Memorandum", "start_page": 117, "end_page": 118},
    {"title": "v7_The_Democratic_Character_of_the_Uprising", "start_page": 119, "end_page": 120},
    {"title": "v7_News_from_Paris", "start_page": 121, "end_page": 121},
    {"title": "v7_Reichensperger", "start_page": 122, "end_page": 122},
    {"title": "v7_News_from_Paris_2", "start_page": 123, "end_page": 123},
    {"title": "v7_Details_about_the_23rd_of_June", "start_page": 124, "end_page": 127},
    {"title": "v7_News_from_Paris_3", "start_page": 128, "end_page": 128},
    {"title": "v7_The_Northern_Star_about_the_Neue_Rheinische_Zeitung", "start_page": 129, "end_page": 129},
    {"title": "v7_The_23rd_of_June", "start_page": 130, "end_page": 133},
    {"title": "v7_The_24th_of_June", "start_page": 134, "end_page": 138},
    {"title": "v7_The_25th_of_June", "start_page": 139, "end_page": 143},
    {"title": "v7_The_June_Revolution", "start_page": 144, "end_page": 149},
    
    # July 1848 Articles (Items 52-83)
    {"title": "v7_The_Kolnische_Zeitung_on_the_June_Revolution", "start_page": 150, "end_page": 156},
    {"title": "v7_The_June_Revolution_The_Course_of_the_Paris_Uprising", "start_page": 157, "end_page": 164},
    {"title": "v7_Germany_Foreign_Policy", "start_page": 165, "end_page": 167},
    {"title": "v7_Marrast_and_Thiers", "start_page": 168, "end_page": 169},
    {"title": "v7_The_Agreement_Debates_July", "start_page": 170, "end_page": 175},
    {"title": "v7_Arrests", "start_page": 176, "end_page": 176},
    {"title": "v7_Arrests_2", "start_page": 177, "end_page": 179},
    {"title": "v7_The_Agreement_Debates_July_2", "start_page": 180, "end_page": 185},
    {"title": "v7_Legal_Proceedings_against_the_Neue_Rheinische_Zeitung", "start_page": 186, "end_page": 188},
    {"title": "v7_The_Berlin_Agreement_Debates", "start_page": 189, "end_page": 193},
    {"title": "v7_The_Government_of_Action", "start_page": 194, "end_page": 194},
    {"title": "v7_The_Agreement_Debate_July", "start_page": 195, "end_page": 198},
    {"title": "v7_The_Ministerial_Crisis", "start_page": 199, "end_page": 199},
    {"title": "v7_The_Agreement_Session_of_July_4_Second_Article", "start_page": 200, "end_page": 207},
    {"title": "v7_Legal_Proceedings_against_the_Neue_Rheinische_Zeitung_2", "start_page": 208, "end_page": 211},
    {"title": "v7_German_Foreign_Policy_and_the_Latest_Events_in_Prague", "start_page": 212, "end_page": 215},
    {"title": "v7_The_Agreement_Debates_of_July_7", "start_page": 216, "end_page": 222},
    {"title": "v7_Herr_Forstmann_on_the_State_Credit", "start_page": 223, "end_page": 225},
    {"title": "v7_The_Agreement_Debates_July_3", "start_page": 226, "end_page": 231},
    {"title": "v7_The_Debate_on_Jacoby_Motion", "start_page": 232, "end_page": 247},
    {"title": "v7_The_Suppression_of_the_Clubs_in_Stuttgart_and_Heidelberg", "start_page": 248, "end_page": 249},
    {"title": "v7_The_Prussian_Press_Bill", "start_page": 250, "end_page": 252},
    {"title": "v7_The_Faedrelandet_on_the_Armistice_with_Denmark", "start_page": 253, "end_page": 255},
    {"title": "v7_The_Civic_Militia_Bill", "start_page": 256, "end_page": 265},
    {"title": "v7_The_Armistice_with_Denmark", "start_page": 266, "end_page": 269},
    {"title": "v7_The_Armistice_Negotiations", "start_page": 270, "end_page": 270},
    {"title": "v7_The_Concordia_of_Turin", "start_page": 271, "end_page": 272},
    {"title": "v7_The_Agreement_Debates_on_the_District_Estates_Agreement_Session_of_July_18", "start_page": 273, "end_page": 277},
    {"title": "v7_The_Bill_on_the_Compulsory_Loan_and_Its_Motivation", "start_page": 278, "end_page": 286},
    {"title": "v7_Armistice_Negotiations_with_Denmark_Broken_Off", "start_page": 287, "end_page": 287},
    {"title": "v7_The_Dissolution_of_the_Democratic_Associations_in_Baden", "start_page": 288, "end_page": 289},
    {"title": "v7_The_Bill_Proposing_the_Abolition_of_Feudal_Obligations", "start_page": 290, "end_page": 295},
    
    # August 1848 Articles (Items 84-104)
    {"title": "v7_The_Kolnische_Zeitung_on_the_State_of_Affairs_in_England", "start_page": 296, "end_page": 300},
    {"title": "v7_The_Agreement_Debate_about_the_Valdenaire_Affair", "start_page": 301, "end_page": 304},
    {"title": "v7_The_Milan_Bulletin", "start_page": 305, "end_page": 306},
    {"title": "v7_The_Russian_Note", "start_page": 307, "end_page": 313},
    {"title": "v7_Miscellaneous", "start_page": 314, "end_page": 314},
    {"title": "v7_Bakunin", "start_page": 315, "end_page": 316},
    {"title": "v7_The_Hansemann_Government_and_the_Old_Prussian_Criminal_Bill", "start_page": 317, "end_page": 318},
    {"title": "v7_The_Kolnische_Zeitung_on_the_Compulsory_Loan", "start_page": 319, "end_page": 320},
    {"title": "v7_Proudhon_Speech_against_Thiers", "start_page": 321, "end_page": 324},
    {"title": "v7_Dr_Gottschalk", "start_page": 325, "end_page": 326},
    {"title": "v7_Debate_about_the_Existing_Redemption_Legislation", "start_page": 327, "end_page": 332},
    {"title": "v7_The_Model_State_of_Belgium", "start_page": 333, "end_page": 336},
    {"title": "v7_The_Frankfurt_Assembly_Debates_the_Polish_Question", "start_page": 337, "end_page": 381},
    {"title": "v7_The_Danish_Armistice_and_Hansemann", "start_page": 382, "end_page": 382},
    {"title": "v7_The_German_Citizenship_and_the_Prussian_Police", "start_page": 383, "end_page": 384},
    {"title": "v7_The_Italian_Liberation_Struggle_and_the_Cause_of_Its_Present_Failure", "start_page": 385, "end_page": 387},
    {"title": "v7_Charles_Albert_Betrayal", "start_page": 388, "end_page": 389},
    {"title": "v7_The_Attempt_to_Expel_Schapper", "start_page": 390, "end_page": 392},
    {"title": "v7_Geiger_and_Schapper", "start_page": 393, "end_page": 394},
    {"title": "v7_The_Kolnische_Zeitung_about_Italy", "start_page": 395, "end_page": 398},
    {"title": "v7_The_Zeitungs_Halle_on_the_Rhine_Province", "start_page": 399, "end_page": 401},
    
    # September 1848 Articles (Items 105-125)
    {"title": "v7_Mediation_and_Intervention_Radetzky_and_Cavaignac", "start_page": 402, "end_page": 403},
    {"title": "v7_The_Antwerp_Death_Sentences", "start_page": 404, "end_page": 406},
    {"title": "v7_The_Conflict_between_Marx_and_Prussian_Citizenship", "start_page": 407, "end_page": 410},
    {"title": "v7_The_Danish_Armistice_September", "start_page": 411, "end_page": 415},
    {"title": "v7_Editorial_Note_Accompanying_the_Article_The_Financial_Project_of_the_Left", "start_page": 416, "end_page": 416},
    {"title": "v7_The_Fall_of_the_Government_of_Action", "start_page": 417, "end_page": 419},
    {"title": "v7_His_Successors", "start_page": 420, "end_page": 420},
    {"title": "v7_The_Danish_Prussian_Armistice", "start_page": 421, "end_page": 425},
    {"title": "v7_Arrests_September", "start_page": 426, "end_page": 426},
    {"title": "v7_The_Crisis_and_the_Counter_Revolution", "start_page": 427, "end_page": 433},
    {"title": "v7_Army_Order_Election_Candidates_Semi_Official_Comments_on_Prussian_Ambiguity", "start_page": 434, "end_page": 435},
    {"title": "v7_Freedom_of_Debate_in_Berlin", "start_page": 436, "end_page": 438},
    {"title": "v7_Ratification_of_the_Armistice", "start_page": 439, "end_page": 441},
    {"title": "v7_The_Uprising_in_Frankfurt", "start_page": 442, "end_page": 445},
    {"title": "v7_The_Faedrelandet_on_the_Armistice", "start_page": 446, "end_page": 447},
    {"title": "v7_The_Government_of_the_Counter_Revolution", "start_page": 448, "end_page": 449},
    {"title": "v7_The_Cologne_Committee_of_Public_Safety", "start_page": 450, "end_page": 450},
    {"title": "v7_Public_Prosecutor_Hecker_Questions_People_Who_Had_Attended_the_Worringen_Meeting", "start_page": 451, "end_page": 451},
    {"title": "v7_Counter_Revolution_in_Cologne", "start_page": 452, "end_page": 453},
    {"title": "v7_An_Attempt_to_Arrest_Moll", "start_page": 454, "end_page": 454},
    {"title": "v7_State_of_Siege_in_Cologne", "start_page": 455, "end_page": 455},
    
    # October 1848 Articles (Items 126-138)
    {"title": "v7_Editorial_Statement_Concerning_the_Reappearance_of_the_Neue_Rheinische_Zeitung", "start_page": 456, "end_page": 456},
    {"title": "v7_Revolution_in_Vienna", "start_page": 457, "end_page": 458},
    {"title": "v7_The_Latest_News_from_the_Model_State", "start_page": 459, "end_page": 461},
    {"title": "v7_The_Revolution_of_Cologne", "start_page": 462, "end_page": 465},
    {"title": "v7_The_Pfuel_Government", "start_page": 466, "end_page": 466},
    {"title": "v7_Thiers_Speech_Concerning_a_General_Mortgage_Bank_with_a_Legal_Rate", "start_page": 467, "end_page": 471},
    {"title": "v7_The_Frankfurter_Oberpostamts_Zeitung_and_the_Viennese_Revolution", "start_page": 472, "end_page": 473},
    {"title": "v7_Reply_of_the_King_of_Prussia_to_the_Delegation_of_the_National_Assembly", "start_page": 474, "end_page": 475},
    {"title": "v7_Reply_of_Frederick_William_IV_to_the_Delegation_of_the_Civic_Militia", "start_page": 476, "end_page": 477},
    {"title": "v7_The_Reforme_on_the_June_Insurrection", "start_page": 478, "end_page": 479},
    {"title": "v7_English_French_Mediation_in_Italy", "start_page": 480, "end_page": 481},
    {"title": "v7_The_Model_Constitutional_State", "start_page": 482, "end_page": 484},
    {"title": "v7_Public_Prosecutor_Hecker_and_the_Neue_Rheinische_Zeitung", "start_page": 485, "end_page": 489},
    
    # November 1848 Articles (Items 139-146)
    {"title": "v7_Appeal_of_the_Democratic_Congress_to_the_German_People", "start_page": 490, "end_page": 492},
    {"title": "v7_The_Paris_Reforme_on_the_Situation_in_France", "start_page": 493, "end_page": 495},
    {"title": "v7_The_Viennese_Revolution_and_the_Kolnische_Zeitung", "start_page": 496, "end_page": 497},
    {"title": "v7_The_Latest_News_from_Vienna_Berlin_and_Paris", "start_page": 498, "end_page": 499},
    {"title": "v7_Our_Bourgeoisie_and_Dr_Nuckel", "start_page": 500, "end_page": 500},
    {"title": "v7_News_from_Vienna", "start_page": 501, "end_page": 502},
    {"title": "v7_The_Victory_of_the_Counter_Revolution_in_Vienna", "start_page": 503, "end_page": 506},
]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 7_ Ma - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v7"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0

        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 7_ Ma - Karl Marx.pdf
folder_name: marx_chapters_v7
expected_file_numbers 145

[1/145] is processing: v7_Demands_of_the_Communist_Party_in_Germany
✓ generated：marx_chapters_v7/v7_Demands_of_the_Communist_Party_in_Germany.pdf (page_num 3-7 -> PDF_page 34-38)
[2/145] is processing: v7_Letter_to_Etienne_Cabet_Editor_of_the_Populaire
✓ generated：marx_chapters_v7/v7_Letter_to_Etienne_Cabet_Editor_of_the_Populaire.pdf (page_num 8-9 -> PDF_page 39-40)
[3/145] is processing: v7_To_the_Committee_of_the_German_Democratic_Society_in_Paris
✓ generated：marx_chapters_v7/v7_To_the_Committee_of_the_German_Democratic_Society_in_Paris.pdf (page_num 10-10 -> PDF_page 41-41)
[4/145] is processing: v7_To_the_Editor_of_the_Newspaper_L_Alba
✓ generated：marx_chapters_v7/v7_To_the_Editor_of_the_Newspaper_L_Alba.pdf (page_num 11-14 -> PDF_page 42-45)
[5/145] is processing: v7_Statement_of_the_Editorial_Board_of_the_Neue_Rheinisch

In [11]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 28
        pdf_end = end_page + 28
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
    # November 1848 Articles (Items 1-48)
    {"title": "v8_The_Crisis_in_Berlin", "start_page": 3, "end_page": 6},
    {"title": "v8_The_Ex_Principality", "start_page": 7, "end_page": 8},
    {"title": "v8_The_New_Institutions_Progress_in_Switzerland", "start_page": 9, "end_page": 13},
    {"title": "v8_Counter_Revolution_in_Berlin", "start_page": 14, "end_page": 19},
    {"title": "v8_Decision_of_the_Berlin_National_Assembly", "start_page": 20, "end_page": 21},
    {"title": "v8_Sitting_of_the_Swiss_Chambers", "start_page": 22, "end_page": 22},
    {"title": "v8_Cavaignac_and_the_June_Revolution", "start_page": 23, "end_page": 23},
    {"title": "v8_Appeal_of_the_Democratic_District_Committee_of_the_Rhine_Province", "start_page": 24, "end_page": 24},
    {"title": "v8_Impeachment_of_the_Government", "start_page": 25, "end_page": 28},
    {"title": "v8_Statement", "start_page": 29, "end_page": 29},
    {"title": "v8_Confessions_of_a_Noble_Soul", "start_page": 30, "end_page": 34},
    {"title": "v8_The_Kolnische_Zeitung", "start_page": 35, "end_page": 35},
    {"title": "v8_No_More_Taxes", "start_page": 36, "end_page": 36},
    {"title": "v8_A_Decree_of_Eichmann", "start_page": 37, "end_page": 38},
    {"title": "v8_Tax_Refusal_and_the_Countryside", "start_page": 39, "end_page": 40},
    {"title": "v8_Appeal", "start_page": 41, "end_page": 41},
    {"title": "v8_Elections_to_the_Federal_Court_Miscellaneous", "start_page": 42, "end_page": 44},
    {"title": "v8_The_City_Council", "start_page": 45, "end_page": 45},
    {"title": "v8_Appeal_2", "start_page": 46, "end_page": 46},
    {"title": "v8_On_the_Proclamation_of_the_Brandenburg_Manteuffel_Ministry_about_Tax_Refusal", "start_page": 47, "end_page": 47},
    {"title": "v8_The_Chief_Public_Prosecutor_and_the_Neue_Rheinische_Zeitung", "start_page": 48, "end_page": 49},
    {"title": "v8_The_Public_Prosecutor_Office_in_Berlin_and_Cologne", "start_page": 50, "end_page": 50},
    {"title": "v8_The_Frankfurt_Assembly", "start_page": 51, "end_page": 52},
    {"title": "v8_State_of_Siege_Everywhere", "start_page": 53, "end_page": 53},
    {"title": "v8_Position_of_the_Left_in_the_National_Assembly", "start_page": 54, "end_page": 54},
    {"title": "v8_News_from_Switzerland", "start_page": 55, "end_page": 55},
    {"title": "v8_Result_of_the_Elections_to_the_National_Council", "start_page": 56, "end_page": 56},
    {"title": "v8_Elections_Sydow", "start_page": 57, "end_page": 60},
    {"title": "v8_Debate_in_the_National_Council", "start_page": 61, "end_page": 62},
    {"title": "v8_Raveaux_Resignation_Violation_of_the_Swiss_Frontier", "start_page": 63, "end_page": 64},
    {"title": "v8_Manteuffel_and_the_Central_Authority", "start_page": 65, "end_page": 65},
    {"title": "v8_The_German_Central_Authority_and_Switzerland", "start_page": 66, "end_page": 74},
    {"title": "v8_Drigalski_Legislator_Citizen_and_Communist", "start_page": 75, "end_page": 81},
    {"title": "v8_Three_State_Trials_against_the_Neue_Rheinische_Zeitung", "start_page": 82, "end_page": 82},
    {"title": "v8_Personalities_of_the_Federal_Council", "start_page": 83, "end_page": 87},
    {"title": "v8_Report_of_the_Frankfurt_Committee_on_Austrian_Affairs", "start_page": 88, "end_page": 93},
    {"title": "v8_News", "start_page": 94, "end_page": 94},
    {"title": "v8_Sittings_of_the_Federal_Council_and_the_Council_of_States", "start_page": 95, "end_page": 95},
    {"title": "v8_Letters_Opened", "start_page": 96, "end_page": 96},
    {"title": "v8_Joint_Sitting_of_the_Councils_The_Federal_Council", "start_page": 97, "end_page": 98},
    {"title": "v8_The_Organ_of_Manteuffel_and_Johann_The_Rhine_Province_and_the_King_of_Prussia", "start_page": 99, "end_page": 100},
    {"title": "v8_The_Revolutionary_Movement_in_Italy", "start_page": 101, "end_page": 105},
    {"title": "v8_German_Professorial_Babblers", "start_page": 106, "end_page": 107},
    {"title": "v8_Sitting_of_the_National_Council_The_Council_of_States_Protest_of_the_Pope_Imperial_Grain_Embargo_The_Valaisan_Great_Council", "start_page": 108, "end_page": 110},
    {"title": "v8_Sitting_of_the_National_Council", "start_page": 111, "end_page": 111},
    {"title": "v8_Berne_Declared_Federal_Capital_Franscini", "start_page": 112, "end_page": 112},
    {"title": "v8_News_from_Switzerland_2", "start_page": 113, "end_page": 114},
    {"title": "v8_Duel_between_Berg_and_Luvini", "start_page": 115, "end_page": 115},
    
    # December 1848 Articles (Items 49-72)
    {"title": "v8_The_Closing_of_the_German_Frontier_The_Empire_The_Council_of_War", "start_page": 116, "end_page": 118},
    {"title": "v8_The_Federal_Council_and_the_Foreign_Ambassadors_The_Federal_Council_in_Tessin_Centralisation_of_Posts_German_Army_Commander_Apology", "start_page": 119, "end_page": 119},
    {"title": "v8_Swiss_Evidence_of_the_Austrian_Army_Heroic_Deeds_in_Vienna", "start_page": 120, "end_page": 122},
    {"title": "v8_The_French_Working_Class_and_the_Presidential_Elections", "start_page": 123, "end_page": 128},
    {"title": "v8_Proudhon", "start_page": 129, "end_page": 132},
    {"title": "v8_Herr_Raumer_Is_Still_Alive", "start_page": 133, "end_page": 133},
    {"title": "v8_Second_Stage_of_the_Counter_Revolution", "start_page": 134, "end_page": 134},
    {"title": "v8_The_Coup_d_Etat_of_the_Counter_Revolution", "start_page": 135, "end_page": 135},
    {"title": "v8_Measures_Concerning_the_German_Refugees", "start_page": 136, "end_page": 137},
    {"title": "v8_The_National_Council", "start_page": 138, "end_page": 153},
    {"title": "v8_The_Bourgeoisie_and_the_Counter_Revolution", "start_page": 154, "end_page": 178},
    {"title": "v8_A_New_Ally_of_the_Counter_Revolution", "start_page": 179, "end_page": 181},
    {"title": "v8_The_Calumnies_of_the_Neue_Rheinische_Zeitung", "start_page": 182, "end_page": 182},
    {"title": "v8_Ursuline_Convent_Recruiting_for_the_Grape_Shot_King_The_Burghers_Commune_Commission_on_a_General_Customs_Tariff", "start_page": 183, "end_page": 184},
    {"title": "v8_Address_of_the_Central_Commission_of_the_Workers_Associations_of_Switzerland_to_the_Executive_of_the_March_Association_in_Frankfurt_am_Main", "start_page": 185, "end_page": 186},
    {"title": "v8_Dismissal_of_Drigalski", "start_page": 187, "end_page": 187},
    {"title": "v8_The_Trial_of_Gottschalk_and_His_Comrades", "start_page": 188, "end_page": 196},
    {"title": "v8_The_Prussian_Counter_Revolution_and_the_Prussian_Judiciary", "start_page": 197, "end_page": 203},
    {"title": "v8_Measures_against_German_Refugees_Return_of_Troops_from_Tessin_The_Patricians_Commune", "start_page": 204, "end_page": 206},
    {"title": "v8_Letter_of_the_Central_Commission_of_the_Workers_Associations_in_Switzerland_to_the_Association_in_Vivis", "start_page": 207, "end_page": 209},
    {"title": "v8_Refutation", "start_page": 210, "end_page": 210},
    {"title": "v8_The_New_Holy_Alliance", "start_page": 211, "end_page": 212},
    {"title": "v8_The_Revolutionary_Movement", "start_page": 213, "end_page": 215},
    {"title": "v8_Swiss_Italian_Affairs", "start_page": 216, "end_page": 217},
    
    # January 1849 Articles (Items 73-88)
    {"title": "v8_A_Bourgeois_Document", "start_page": 218, "end_page": 221},
    {"title": "v8_A_New_Year_Greeting", "start_page": 222, "end_page": 226},
    {"title": "v8_The_Magyar_Struggle", "start_page": 227, "end_page": 238},
    {"title": "v8_Herr_Miller_Radetzky_Chicanery_towards_Tessin_The_Federal_Council_Lohbauer", "start_page": 239, "end_page": 241},
    {"title": "v8_The_Last_Volunteer_Insurgents", "start_page": 242, "end_page": 242},
    {"title": "v8_Budget", "start_page": 243, "end_page": 243},
    {"title": "v8_Priests_Rebellion", "start_page": 244, "end_page": 245},
    {"title": "v8_The_Swiss_Press", "start_page": 246, "end_page": 250},
    {"title": "v8_Protectionist_Agitation_Recruiting_into_the_Neapolitan_Army", "start_page": 251, "end_page": 251},
    {"title": "v8_Miller_The_Freiburg_Government_Ochsenbein", "start_page": 252, "end_page": 253},
    {"title": "v8_Montesquieu_LVI", "start_page": 254, "end_page": 267},
    {"title": "v8_Answer_from_Colonel_Engels", "start_page": 268, "end_page": 268},
    {"title": "v8_The_Prussian_Warrant_for_the_Arrest_of_Kossuth", "start_page": 269, "end_page": 270},
    {"title": "v8_The_Berlin_National_Zeitung_to_the_Primary_Electors", "start_page": 271, "end_page": 280},
    {"title": "v8_The_Situation_in_Paris", "start_page": 281, "end_page": 283},
    {"title": "v8_The_Situation_in_Paris_2", "start_page": 284, "end_page": 285},
    
    # February 1849 Articles (Items 89-121)
    {"title": "v8_The_Kolnische_Zeitung_on_the_Elections", "start_page": 286, "end_page": 289},
    {"title": "v8_The_Struggle_in_Hungary", "start_page": 290, "end_page": 294},
    {"title": "v8_Camphausen", "start_page": 295, "end_page": 297},
    {"title": "v8_From_the_Banat", "start_page": 298, "end_page": 299},
    {"title": "v8_The_19th_Army_Bulletin_and_Commentaries_on_It", "start_page": 300, "end_page": 303},
    {"title": "v8_The_First_Trial_of_the_Neue_Rheinische_Zeitung", "start_page": 304, "end_page": 322},
    {"title": "v8_The_Trial_of_the_Rhenish_District_Committee_of_Democrats", "start_page": 323, "end_page": 339},
    {"title": "v8_The_Tax_Refusal_Trial", "start_page": 340, "end_page": 341},
    {"title": "v8_Political_Trial", "start_page": 342, "end_page": 343},
    {"title": "v8_Lassalle", "start_page": 344, "end_page": 346},
    {"title": "v8_War_Discord_between_the_Government_and_the_Southern_Slavs", "start_page": 347, "end_page": 349},
    {"title": "v8_The_War_in_Hungary", "start_page": 350, "end_page": 353},
    {"title": "v8_The_Division_of_Labour_in_the_Kolnische_Zeitung", "start_page": 354, "end_page": 359},
    {"title": "v8_From_the_Theatre_of_War", "start_page": 360, "end_page": 361},
    {"title": "v8_Democratic_Pan_Slavism", "start_page": 362, "end_page": 378},
    {"title": "v8_Prussian_Financial_Administration_under_Bodelschwingh_and_Co", "start_page": 379, "end_page": 389},
    {"title": "v8_Stein", "start_page": 390, "end_page": 391},
    {"title": "v8_Three_Stars_versus_Triangle", "start_page": 392, "end_page": 393},
    {"title": "v8_The_Vienna_Correspondent_of_the_Kolnische_Zeitung", "start_page": 394, "end_page": 395},
    {"title": "v8_Saedt", "start_page": 396, "end_page": 397},
    {"title": "v8_The_Kolnische_Zeitung_on_the_Magyar_Struggle", "start_page": 398, "end_page": 403},
    {"title": "v8_Bulletin_No_22", "start_page": 404, "end_page": 408},
    {"title": "v8_Croats_and_Slovaks_in_Hungary", "start_page": 409, "end_page": 411},
    {"title": "v8_Military_Art_of_the_Royal_Imperial_Army", "start_page": 412, "end_page": 413},
    {"title": "v8_Proclamation_of_a_Republic_in_Rome", "start_page": 414, "end_page": 414},
    {"title": "v8_Windischgratz_Jews_and_Southern_Slavs", "start_page": 415, "end_page": 417},
    {"title": "v8_Further_Contribution_on_the_Old_Prussian_Financial_Administration", "start_page": 418, "end_page": 420},
    {"title": "v8_A_Denunciation", "start_page": 421, "end_page": 422},
    {"title": "v8_Bulletin_No_23_From_the_Theatre_of_War", "start_page": 423, "end_page": 426},
    {"title": "v8_Latest_News_of_the_Magyars_Victory_on_the_Theiss_Brutality_of_the_Austrians_State_of_the_War_in_General", "start_page": 427, "end_page": 429},
    {"title": "v8_More_News_of_the_Magyars", "start_page": 430, "end_page": 431},
    {"title": "v8_The_Russians_in_Transylvania", "start_page": 432, "end_page": 439},
    {"title": "v8_Russian_Invasion_Serbs_Prospects_for_the_Austrians_From_the_Theatre_of_War", "start_page": 440, "end_page": 444},
    
    # March 1849 Articles (Items 122-130)
    {"title": "v8_Speech_from_the_Throne", "start_page": 445, "end_page": 450},
    {"title": "v8_From_the_Theatre_of_War_in_Transylvania_and_Hungary", "start_page": 451, "end_page": 455},
    {"title": "v8_European_War_Inevitable", "start_page": 456, "end_page": 457},
    {"title": "v8_From_the_Theatre_of_War_2", "start_page": 458, "end_page": 462},
    {"title": "v8_Lassalle_2", "start_page": 463, "end_page": 465},
    {"title": "v8_The_War_in_Hungary_2", "start_page": 466, "end_page": 470},
    {"title": "v8_From_the_Hungarian_Theatre_of_War", "start_page": 471, "end_page": 473},
    {"title": "v8_The_Proceedings_against_Lassalle", "start_page": 474, "end_page": 476},
    {"title": "v8_Magyar_Victory", "start_page": 477, "end_page": 482},
    
    # Preparatory Materials (Items 1-3)
    {"title": "v8_Karl_Marx_Prohibition_of_a_Torchlight_Procession_for_Gottschalk_Note", "start_page": 483, "end_page": 483},
    {"title": "v8_Karl_Marx_Fragment_of_the_Draft_of_The_Bourgeoisie_and_the_Counter_Revolution", "start_page": 484, "end_page": 484},
    {"title": "v8_Karl_Marx_Draft_of_a_Speech_at_the_Trial_of_the_Neue_Rheinische_Zeitung", "start_page": 485, "end_page": 494}

]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 8_ Ma - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v8"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 8_ Ma - Karl Marx.pdf
folder_name: marx_chapters_v8
expected_file_numbers 133

[1/133] is processing: v8_The_Crisis_in_Berlin
✓ generated：marx_chapters_v8/v8_The_Crisis_in_Berlin.pdf (page_num 3-6 -> PDF_page 32-35)
[2/133] is processing: v8_The_Ex_Principality
✓ generated：marx_chapters_v8/v8_The_Ex_Principality.pdf (page_num 7-8 -> PDF_page 36-37)
[3/133] is processing: v8_The_New_Institutions_Progress_in_Switzerland
✓ generated：marx_chapters_v8/v8_The_New_Institutions_Progress_in_Switzerland.pdf (page_num 9-13 -> PDF_page 38-42)
[4/133] is processing: v8_Counter_Revolution_in_Berlin
✓ generated：marx_chapters_v8/v8_Counter_Revolution_in_Berlin.pdf (page_num 14-19 -> PDF_page 43-48)
[5/133] is processing: v8_Decision_of_the_Berlin_National_Assembly
✓ generated：marx_chapters_v8/v8_Decision_of_the_Berlin_National_Assembly.pdf (page_num 20-21 -> PDF_page 49-50)
[6/133] is processing: v8_Sitting_of_the_Swiss

In [15]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 30
        pdf_end = end_page + 30
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
    # March 1849 Articles (Items 1-47)
    {"title": "v9_The_State_of_Trade", "start_page": 3, "end_page": 8},
    {"title": "v9_From_the_Theatre_of_War", "start_page": 9, "end_page": 12},
    {"title": "v9_The_Military_Reports_of_the_Kolnische_Zeitung", "start_page": 13, "end_page": 18},
    {"title": "v9_The_26th_Austrian_Army_Bulletin", "start_page": 19, "end_page": 21},
    {"title": "v9_The_English_Soldier_Oath_of_Allegiance", "start_page": 22, "end_page": 23},
    {"title": "v9_From_the_Theatre_of_War_2", "start_page": 24, "end_page": 26},
    {"title": "v9_The_27th_Bulletin_Military_Reports", "start_page": 27, "end_page": 32},
    {"title": "v9_Ruge", "start_page": 33, "end_page": 35},
    {"title": "v9_The_March_Association", "start_page": 36, "end_page": 37},
    {"title": "v9_From_the_Theatre_of_War_3", "start_page": 38, "end_page": 41},
    {"title": "v9_The_Model_Republic", "start_page": 42, "end_page": 46},
    {"title": "v9_Vienna_and_Frankfurt", "start_page": 47, "end_page": 49},
    {"title": "v9_Three_New_Bills", "start_page": 50, "end_page": 54},
    {"title": "v9_Government_Provocations", "start_page": 55, "end_page": 57},
    {"title": "v9_An_Austrian_Report_Published_in_the_Augsburg_Allgemeine_Zeitung", "start_page": 58, "end_page": 60},
    {"title": "v9_From_the_Theatre_of_War_4", "start_page": 61, "end_page": 62},
    {"title": "v9_Austrian_Defeats", "start_page": 63, "end_page": 64},
    {"title": "v9_The_Hohenzollern_General_Plan_of_Reform", "start_page": 65, "end_page": 69},
    {"title": "v9_The_Censorship", "start_page": 70, "end_page": 71},
    {"title": "v9_From_the_Theatre_of_War_5", "start_page": 72, "end_page": 74},
    {"title": "v9_From_the_Theatre_of_War_6", "start_page": 75, "end_page": 75},
    {"title": "v9_From_the_Theatre_of_War_7", "start_page": 76, "end_page": 78},
    {"title": "v9_The_Milliard", "start_page": 79, "end_page": 83},
    {"title": "v9_The_Frankfurt_March_Association_and_the_Neue_Rheinische_Zeitung", "start_page": 84, "end_page": 85},
    {"title": "v9_Draft_Address_of_the_Second_Chamber", "start_page": 86, "end_page": 89},
    {"title": "v9_From_the_Theatre_of_War_8", "start_page": 90, "end_page": 90},
    {"title": "v9_From_the_Theatre_of_War_9", "start_page": 91, "end_page": 94},
    {"title": "v9_From_the_Theatre_of_War_10", "start_page": 95, "end_page": 101},
    {"title": "v9_Military_Dictatorship_in_Austria", "start_page": 102, "end_page": 107},
    {"title": "v9_The_18th_of_March", "start_page": 108, "end_page": 108},
    {"title": "v9_The_Neue_Preussische_Zeitung_on_the_Occasion_of_the_18th_of_March", "start_page": 109, "end_page": 109},
    {"title": "v9_From_the_Theatre_of_War_11", "start_page": 110, "end_page": 112},
    {"title": "v9_Military_Reports_from_Hungary", "start_page": 113, "end_page": 120},
    {"title": "v9_From_the_Theatre_of_War_12", "start_page": 121, "end_page": 122},
    {"title": "v9_From_the_Theatre_of_War_13", "start_page": 123, "end_page": 124},
    {"title": "v9_The_Hohenzollern_Press_Bill", "start_page": 125, "end_page": 132},
    {"title": "v9_From_the_Theatre_of_War_14", "start_page": 133, "end_page": 133},
    {"title": "v9_From_the_Theatre_of_War_15", "start_page": 134, "end_page": 134},
    {"title": "v9_The_Debate_on_the_Address_in_Berlin", "start_page": 135, "end_page": 143},
    {"title": "v9_From_the_Theatre_of_War_The_Confused_Situation_in_Serbia", "start_page": 144, "end_page": 147},
    {"title": "v9_The_War_in_Italy_and_Hungary", "start_page": 148, "end_page": 151},
    {"title": "v9_From_the_Theatre_of_War_16", "start_page": 152, "end_page": 155},
    {"title": "v9_From_the_Theatre_of_War_Italy", "start_page": 156, "end_page": 157},
    {"title": "v9_From_the_Theatre_of_War_17", "start_page": 158, "end_page": 163},
    {"title": "v9_From_the_Theatre_of_War_Italy_2", "start_page": 164, "end_page": 166},
    {"title": "v9_Latest_News_from_Hungary", "start_page": 167, "end_page": 167},
    {"title": "v9_From_the_Theatre_of_War_More_Russian_Troops", "start_page": 168, "end_page": 168},
    
    # March-April 1849 Articles (Items 48-50)
    {"title": "v9_The_Defeat_of_the_Piedmontese", "start_page": 169, "end_page": 177},
    {"title": "v9_From_the_Theatre_of_War_18", "start_page": 178, "end_page": 180},
    {"title": "v9_From_the_Theatre_of_War_19", "start_page": 181, "end_page": 184},
    
    # April 1849 Articles (Items 51-100)
    {"title": "v9_From_the_Theatre_of_War_20", "start_page": 185, "end_page": 187},
    {"title": "v9_From_the_Theatre_of_War_21", "start_page": 188, "end_page": 190},
    {"title": "v9_French_Foreign_Policy", "start_page": 191, "end_page": 192},
    {"title": "v9_The_Comedy_with_the_Imperial_Crown", "start_page": 193, "end_page": 194},
    {"title": "v9_The_Call_Up_of_the_Army_Reserve_in_Prussia", "start_page": 195, "end_page": 196},
    {"title": "v9_Wage_Labour_and_Capital", "start_page": 197, "end_page": 228},
    {"title": "v9_The_Southern_Slavs_and_the_Austrian_Monarchy", "start_page": 229, "end_page": 230},
    {"title": "v9_The_War_in_Hungary", "start_page": 231, "end_page": 236},
    {"title": "v9_From_the_Theatre_of_War_22", "start_page": 237, "end_page": 241},
    {"title": "v9_From_the_Theatre_of_War_23", "start_page": 242, "end_page": 247},
    {"title": "v9_From_the_Theatre_of_War_24", "start_page": 248, "end_page": 249},
    {"title": "v9_Austrian_Lamentations", "start_page": 250, "end_page": 251},
    {"title": "v9_From_the_Theatre_of_War_25", "start_page": 252, "end_page": 254},
    {"title": "v9_From_the_Theatre_of_War_26", "start_page": 255, "end_page": 256},
    {"title": "v9_The_Extradition_of_Political_Refugees", "start_page": 257, "end_page": 258},
    {"title": "v9_From_the_Theatre_of_War_The_German_Navy", "start_page": 259, "end_page": 260},
    {"title": "v9_From_the_Theatre_of_War_Windischgratz_Comments_on_the_Imposed_Constitution", "start_page": 261, "end_page": 264},
    {"title": "v9_Rumours_of_the_Extermination_of_the_Rebels", "start_page": 265, "end_page": 266},
    {"title": "v9_A_Magyar_Victory", "start_page": 267, "end_page": 267},
    {"title": "v9_A_Magyar_Victory_2", "start_page": 268, "end_page": 270},
    {"title": "v9_An_Austrian_Defeat", "start_page": 271, "end_page": 276},
    {"title": "v9_From_the_Theatre_of_War_27", "start_page": 277, "end_page": 280},
    {"title": "v9_Parliamentary_Decisions_Are_Disregarded_Manteuffel_Spies", "start_page": 281, "end_page": 281},
    {"title": "v9_Statement", "start_page": 282, "end_page": 282},
    {"title": "v9_From_the_Theatre_of_War_28", "start_page": 283, "end_page": 285},
    {"title": "v9_From_the_Theatre_of_War_29", "start_page": 286, "end_page": 287},
    {"title": "v9_From_the_Theatre_of_War_Peasant_War_in_the_Bukovina", "start_page": 288, "end_page": 290},
    {"title": "v9_Elberfeld", "start_page": 291, "end_page": 291},
    {"title": "v9_From_the_Theatre_of_War_30", "start_page": 292, "end_page": 294},
    {"title": "v9_Sitting_of_the_Second_Chamber_in_Berlin_April_13", "start_page": 295, "end_page": 298},
    {"title": "v9_The_Slovaks_The_So_Called_Dembinski_Bulletin", "start_page": 299, "end_page": 301},
    {"title": "v9_From_the_Theatre_of_War_31", "start_page": 302, "end_page": 305},
    {"title": "v9_Magyar_Rodomontade_of_the_Kolnische_Zeitung", "start_page": 306, "end_page": 306},
    {"title": "v9_The_New_Croatian_Slavonian_Dalmatian_Robber_State", "start_page": 307, "end_page": 310},
    {"title": "v9_The_Russians", "start_page": 311, "end_page": 313},
    {"title": "v9_From_the_Theatre_of_War_32", "start_page": 314, "end_page": 317},
    {"title": "v9_From_the_Theatre_of_War_33", "start_page": 318, "end_page": 319},
    {"title": "v9_The_Debate_on_the_Law_on_Posters", "start_page": 320, "end_page": 329},
    {"title": "v9_From_the_Theatre_of_War_34", "start_page": 330, "end_page": 332},
    {"title": "v9_From_the_Theatre_of_War_35", "start_page": 333, "end_page": 334},
    {"title": "v9_The_Hungarian_War", "start_page": 335, "end_page": 338},
    {"title": "v9_Lassalle", "start_page": 339, "end_page": 341},
    {"title": "v9_From_the_Theatre_of_War_36", "start_page": 342, "end_page": 345},
    {"title": "v9_Hungarian_Victories", "start_page": 346, "end_page": 349},
    {"title": "v9_From_the_Theatre_of_War_37", "start_page": 350, "end_page": 351},
    {"title": "v9_Hungarian_Advances_Excitement_in_Vienna", "start_page": 352, "end_page": 352},
    {"title": "v9_Magyar_Advances", "start_page": 353, "end_page": 356},
    {"title": "v9_Dissolution_of_the_Second_Chamber", "start_page": 357, "end_page": 358},
    {"title": "v9_Posen", "start_page": 359, "end_page": 362},
    {"title": "v9_From_the_Theatre_of_War_38", "start_page": 363, "end_page": 367},
    
    # May 1849 Articles (Items 101-141)
    {"title": "v9_From_the_Theatre_of_War_39", "start_page": 368, "end_page": 369},
    {"title": "v9_The_Counter_Revolutionary_Plans_in_Berlin", "start_page": 370, "end_page": 371},
    {"title": "v9_Lassalle_2", "start_page": 372, "end_page": 376},
    {"title": "v9_A_Prussian_Kick_for_the_Frankfurt_Assembly", "start_page": 377, "end_page": 379},
    {"title": "v9_The_Dissolution", "start_page": 380, "end_page": 380},
    {"title": "v9_From_the_Theatre_of_War_40", "start_page": 381, "end_page": 382},
    {"title": "v9_Lassalle_3", "start_page": 383, "end_page": 388},
    {"title": "v9_Prohibition_of_the_Meeting_of_the_Rhenish_Municipal_Councils", "start_page": 389, "end_page": 389},
    {"title": "v9_From_the_Theatre_of_War_41", "start_page": 390, "end_page": 391},
    {"title": "v9_The_Congress_of_Rhenish_Towns", "start_page": 392, "end_page": 393},
    {"title": "v9_The_Third_Party_in_the_Alliance", "start_page": 394, "end_page": 395},
    {"title": "v9_From_the_Theatre_of_War_42", "start_page": 396, "end_page": 398},
    {"title": "v9_News_from_Southern_Germany", "start_page": 399, "end_page": 399},
    {"title": "v9_News_from_Hungary", "start_page": 400, "end_page": 401},
    {"title": "v9_Longing_for_a_State_of_Siege", "start_page": 402, "end_page": 403},
    {"title": "v9_The_Situation_in_Hungary", "start_page": 404, "end_page": 405},
    {"title": "v9_From_the_Theatre_of_War_43", "start_page": 406, "end_page": 406},
    {"title": "v9_Kossuth_Proclamation", "start_page": 407, "end_page": 408},
    {"title": "v9_From_the_Theatre_of_War_44", "start_page": 409, "end_page": 410},
    {"title": "v9_The_Prussian_Army_and_the_Revolutionary_Uprising_of_the_People", "start_page": 411, "end_page": 412},
    {"title": "v9_A_Question_to_the_Workers", "start_page": 413, "end_page": 413},
    {"title": "v9_The_Tsar_and_His_Subordinate_Knyazes", "start_page": 414, "end_page": 415},
    {"title": "v9_The_Approaching_Revolution", "start_page": 416, "end_page": 417},
    {"title": "v9_The_Deeds_of_the_Hohenzollern_Dynasty", "start_page": 418, "end_page": 422},
    {"title": "v9_The_Situation_in_Elberfeld", "start_page": 423, "end_page": 423},
    {"title": "v9_From_the_Theatre_of_War_45", "start_page": 424, "end_page": 425},
    {"title": "v9_Counter_Revolutionary_Offensive_and_Victory_of_the_Revolution", "start_page": 426, "end_page": 427},
    {"title": "v9_The_Uprising_in_Elberfeld_and_Dusseldorf", "start_page": 428, "end_page": 429},
    {"title": "v9_The_New_Prussian_Constitution", "start_page": 430, "end_page": 431},
    {"title": "v9_The_Sanguinary_Law_in_Dusseldorf", "start_page": 432, "end_page": 433},
    {"title": "v9_The_Uprising_in_the_Berg_Country", "start_page": 434, "end_page": 435},
    {"title": "v9_The_Venal_Baseness_of_the_Kolnische_Zeitung", "start_page": 436, "end_page": 436},
    {"title": "v9_The_Kreuz_Zeitung", "start_page": 437, "end_page": 437},
    {"title": "v9_A_New_Prussian_Kick_for_the_Frankfurt_Assembly", "start_page": 438, "end_page": 439},
    {"title": "v9_The_New_Martial_Law_Charter", "start_page": 440, "end_page": 446},
    {"title": "v9_Elberfeld_2", "start_page": 447, "end_page": 449},
    {"title": "v9_The_Worthy_Schwanbeck", "start_page": 450, "end_page": 450},
    {"title": "v9_The_Summary_Suppression_of_the_Neue_Rheinische_Zeitung", "start_page": 451, "end_page": 454},
    {"title": "v9_Hungary", "start_page": 455, "end_page": 463},
    {"title": "v9_To_My_People", "start_page": 464, "end_page": 466}
    
]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 9_ Ka - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v9"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 9_ Ka - Karl Marx.pdf
folder_name: marx_chapters_v9
expected_file_numbers 140

[1/140] is processing: v9_The_State_of_Trade
✓ generated：marx_chapters_v9/v9_The_State_of_Trade.pdf (page_num 3-8 -> PDF_page 34-39)
[2/140] is processing: v9_From_the_Theatre_of_War
✓ generated：marx_chapters_v9/v9_From_the_Theatre_of_War.pdf (page_num 9-12 -> PDF_page 40-43)
[3/140] is processing: v9_The_Military_Reports_of_the_Kolnische_Zeitung
✓ generated：marx_chapters_v9/v9_The_Military_Reports_of_the_Kolnische_Zeitung.pdf (page_num 13-18 -> PDF_page 44-49)
[4/140] is processing: v9_The_26th_Austrian_Army_Bulletin
✓ generated：marx_chapters_v9/v9_The_26th_Austrian_Army_Bulletin.pdf (page_num 19-21 -> PDF_page 50-52)
[5/140] is processing: v9_The_English_Soldier_Oath_of_Allegiance
✓ generated：marx_chapters_v9/v9_The_English_Soldier_Oath_of_Allegiance.pdf (page_num 22-23 -> PDF_page 53-54)
[6/140] is processing: v9_From_the_T

In [17]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 23
        pdf_end = end_page + 23
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
    # Main Works (Items 1-43)
    {"title": "v10_F_Engels_The_German_Social_Democrats_and_The_Times", "start_page": 3, "end_page": 4},
    {"title": "v10_K_Marx_and_F_Engels_Announcement_of_the_Neue_Rheinische_Zeitung_Politisch_okonomische_Revue", "start_page": 5, "end_page": 6},
    {"title": "v10_K_Marx_and_F_Engels_Announcement", "start_page": 41, "end_page": 44},
    {"title": "v10_K_Marx_The_Class_Struggles_in_France_1848_to_1850", "start_page": 45, "end_page": 146},
    {"title": "v10_K_Marx_and_F_Engels_Reviews_from_the_Neue_Rheinische_Zeitung_Politisch_okonomische_Revue_No_2", "start_page": 241, "end_page": 256},
    {"title": "v10_K_Marx_and_F_Engels_Review_January_February_1850", "start_page": 257, "end_page": 270},
    {"title": "v10_K_Marx_and_F_Engels_Address_of_the_Central_Authority_to_the_League_March_1850", "start_page": 277, "end_page": 287},
    {"title": "v10_K_Marx_and_F_Engels_Reviews_from_the_Neue_Rheinische_Zeitung_Politisch_okonomische_Revue_No_4", "start_page": 301, "end_page": 337},
    {"title": "v10_K_Marx_and_F_Engels_Review_March_April_1850", "start_page": 338, "end_page": 341},
    {"title": "v10_K_Marx_Louis_Napoleon_and_Fould", "start_page": 342, "end_page": 344},
    {"title": "v10_K_Marx_and_F_Engels_Gottfried_Kinkel", "start_page": 345, "end_page": 347},
    {"title": "v10_K_Marx_and_F_Engels_Editorial_Note", "start_page": 348, "end_page": 348},
    {"title": "v10_K_Marx_and_F_Engels_Statement", "start_page": 349, "end_page": 351},
    {"title": "v10_K_Marx_and_F_Engels_To_the_Editor_of_The_Times", "start_page": 352, "end_page": 352},
    {"title": "v10_K_Marx_and_F_Engels_A_Letter_to_the_Prussian_Ambassador_in_London_Baron_Bunsen", "start_page": 370, "end_page": 370},
    {"title": "v10_K_Marx_and_F_Engels_Address_of_the_Central_Authority_to_the_League_June_1850", "start_page": 371, "end_page": 377},
    {"title": "v10_K_Marx_and_F_Engels_The_Prussian_Refugees", "start_page": 378, "end_page": 379},
    {"title": "v10_K_Marx_and_F_Engels_To_the_Editor_of_The_Spectator", "start_page": 380, "end_page": 380},
    {"title": "v10_K_Marx_and_F_Engels_Prussian_Spies_in_London", "start_page": 381, "end_page": 384},
    {"title": "v10_K_Marx_To_the_Editor_of_The_Globe", "start_page": 385, "end_page": 386},
    {"title": "v10_K_Marx_and_F_Engels_Statement_to_the_Editor_of_the_Neue_Deutsche_Zeitung", "start_page": 387, "end_page": 388},
    {"title": "v10_K_Marx_To_the_Chairman_of_a_Refugee_Meeting_in_London", "start_page": 389, "end_page": 389},
    {"title": "v10_K_Marx_and_F_Engels_To_the_Editors_of_the_Weser_Zeitung", "start_page": 390, "end_page": 391},
    {"title": "v10_K_Marx_and_F_Engels_Statement_on_Resignation_from_the_German_Workers_Educational_Society_in_London", "start_page": 483, "end_page": 483},
    {"title": "v10_K_Marx_and_F_Engels_A_Letter_to_Adam_Barthelemy_and_Vidil", "start_page": 484, "end_page": 484},
    {"title": "v10_K_Marx_and_F_Engels_Editorial_Comment_on_the_Article_Tailoring_in_London_or_the_Struggle_between_Big_and_Small_Capital_by_J_G_Eccarius", "start_page": 485, "end_page": 485},
    {"title": "v10_K_Marx_and_F_Engels_Review_May_to_October_1850", "start_page": 490, "end_page": 532},
    {"title": "v10_K_Marx_and_F_Engels_Draft_Statement_by_Heinrich_Bauer_and_Karl_Pfander_on_the_Funds_of_the_German_Workers_Educational_Society_in_London", "start_page": 533, "end_page": 534},
    {"title": "v10_K_Marx_and_F_Engels_Statement", "start_page": 535, "end_page": 536},
    {"title": "v10_K_Marx_and_F_Engels_Introduction_to_the_Leaflet_of_L_A_Blanqui_Toast_Sent_to_the_Refugee_Committee", "start_page": 537, "end_page": 539},
    {"title": "v10_K_Marx_The_Constitution_of_the_French_Republic_Adopted_November_4_1848", "start_page": 567, "end_page": 582},
    
    # Preparatory Materials (Items 1-2)
    {"title": "v10_K_Marx_On_Germany_Note_for_the_Review_May_October_1850", "start_page": 583, "end_page": 583},
    {"title": "v10_K_Marx_Reflections", "start_page": 584, "end_page": 594},
    
    # Appendices (Item 1)
    {"title": "v10_Permit_to_Leave_Switzerland_Issued_to_Frederick_Engels", "start_page": 595, "end_page": 595}
    
]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 10_ M - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v10"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 10_ M - Karl Marx.pdf
folder_name: marx_chapters_v10
expected_file_numbers 34

[1/34] is processing: v10_F_Engels_The_German_Social_Democrats_and_The_Times
✓ generated：marx_chapters_v10/v10_F_Engels_The_German_Social_Democrats_and_The_Times.pdf (page_num 3-4 -> PDF_page 27-28)
[2/34] is processing: v10_K_Marx_and_F_Engels_Announcement_of_the_Neue_Rheinische_Zeitung_Politisch_okonomische_Revue
✓ generated：marx_chapters_v10/v10_K_Marx_and_F_Engels_Announcement_of_the_Neue_Rheinische_Zeitung_Politisch_okonomische_Revue.pdf (page_num 5-6 -> PDF_page 29-30)
[3/34] is processing: v10_K_Marx_and_F_Engels_Announcement
✓ generated：marx_chapters_v10/v10_K_Marx_and_F_Engels_Announcement.pdf (page_num 41-44 -> PDF_page 65-68)
[4/34] is processing: v10_K_Marx_The_Class_Struggles_in_France_1848_to_1850
✓ generated：marx_chapters_v10/v10_K_Marx_The_Class_Struggles_in_France_1848_to_1850.pdf (page_num 45-146 -> PDF_page 

In [18]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 26
        pdf_end = end_page + 26
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
    # Main Works (Items 1-37)
    {"title": "v11_K_Marx_Statement_and_Accompanying_Letter_to_the_Editorial_Board_of_the_Augsburg_Allgemeine_Zeitung_October_4_1851", "start_page": 97, "end_page": 98},
    {"title": "v11_K_Marx_The_Eighteenth_Brumaire_of_Louis_Bonaparte", "start_page": 99, "end_page": 197},
    {"title": "v11_K_Marx_and_F_Engels_To_the_Editor_of_The_Times", "start_page": 210, "end_page": 211},
    {"title": "v11_K_Marx_Statement_Sent_to_the_Editorial_Board_of_the_Kolnische_Zeitung", "start_page": 223, "end_page": 223},
    {"title": "v11_K_Marx_General_Klapka", "start_page": 224, "end_page": 226},
    {"title": "v11_K_Marx_and_F_Engels_The_Great_Men_of_the_Exile", "start_page": 227, "end_page": 326},
    {"title": "v11_K_Marx_The_Elections_in_England_Tories_and_Whigs", "start_page": 327, "end_page": 332},
    {"title": "v11_K_Marx_The_Chartists", "start_page": 333, "end_page": 341},
    {"title": "v11_K_Marx_Corruption_at_Elections", "start_page": 342, "end_page": 347},
    {"title": "v11_K_Marx_Result_of_the_Elections", "start_page": 348, "end_page": 353},
    {"title": "v11_K_Marx_Movements_of_Mazzini_and_Kossuth_League_with_Louis_Napoleon_Palmerston", "start_page": 354, "end_page": 356},
    {"title": "v11_K_Marx_Pauperism_and_Free_Trade_The_Approaching_Commercial_Crisis", "start_page": 357, "end_page": 363},
    {"title": "v11_K_Marx_Political_Consequences_of_the_Commercial_Excitement", "start_page": 364, "end_page": 368},
    {"title": "v11_K_Marx_Political_Parties_and_Prospects", "start_page": 369, "end_page": 372},
    {"title": "v11_K_Marx_Attempts_to_Form_a_New_Opposition_Party", "start_page": 373, "end_page": 377},
    {"title": "v11_K_Marx_and_F_Engels_Public_Statement_to_the_Editors_of_the_English_Press", "start_page": 378, "end_page": 379},
    {"title": "v11_K_Marx_The_Trials_at_Cologne_To_the_Editor_of_The_Morning_Advertiser", "start_page": 380, "end_page": 381},
    {"title": "v11_K_Marx_Kossuth_Mazzini_and_Louis_Napoleon", "start_page": 382, "end_page": 383},
    {"title": "v11_K_Marx_and_F_Engels_A_Final_Declaration_on_the_Late_Cologne_Trials", "start_page": 384, "end_page": 387},
    {"title": "v11_K_Marx_Revelations_Concerning_the_Communist_Trial_in_Cologne", "start_page": 395, "end_page": 457},
    {"title": "v11_K_Marx_Parliament_Vote_of_November_26_Disraeli_Budget", "start_page": 458, "end_page": 464},
    {"title": "v11_K_Marx_A_Reply_to_Kossuth_Secretary", "start_page": 465, "end_page": 465},
    {"title": "v11_K_Marx_The_Defeat_of_the_Ministry", "start_page": 466, "end_page": 470},
    {"title": "v11_K_Marx_A_Superannuated_Administration_Prospects_of_the_Coalition_Ministry", "start_page": 471, "end_page": 476},
    {"title": "v11_K_Marx_Political_Prospects_Commercial_Prosperity_Case_of_Starvation", "start_page": 477, "end_page": 485},
    {"title": "v11_K_Marx_Elections_Financial_Clouds_The_Duchess_of_Sutherland_and_Slavery", "start_page": 486, "end_page": 494},
    {"title": "v11_K_Marx_Capital_Punishment_Mr_Cobden_Pamphlet_Regulations_of_the_Bank_of_England", "start_page": 495, "end_page": 501},
    {"title": "v11_K_Marx_Defense_Finances_Decrease_of_the_Aristocracy_Politics", "start_page": 502, "end_page": 507},
    {"title": "v11_K_Marx_The_Italian_Insurrection_British_Politics", "start_page": 508, "end_page": 512},
    {"title": "v11_K_Marx_The_Attack_on_Francis_Joseph_The_Milan_Riot_British_Politics_Disraeli_Speech_Napoleon_Will", "start_page": 513, "end_page": 521},
    {"title": "v11_K_Marx_Parliamentary_Debates_The_Clergy_Against_Socialism_Starvation", "start_page": 522, "end_page": 527},
    {"title": "v11_K_Marx_Forced_Emigration_Kossuth_and_Mazzini_The_Refugee_Question_Election_Bribery_in_England_Mr_Cobden", "start_page": 528, "end_page": 534}
    
]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 11_ Ka - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v11"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 11_ Ka - Karl Marx.pdf
folder_name: marx_chapters_v11
expected_file_numbers 32

[1/32] is processing: v11_K_Marx_Statement_and_Accompanying_Letter_to_the_Editorial_Board_of_the_Augsburg_Allgemeine_Zeitung_October_4_1851
✓ generated：marx_chapters_v11/v11_K_Marx_Statement_and_Accompanying_Letter_to_the_Editorial_Board_of_the_Augsburg_Allgemeine_Zeitung_October_4_1851.pdf (page_num 97-98 -> PDF_page 124-125)
[2/32] is processing: v11_K_Marx_The_Eighteenth_Brumaire_of_Louis_Bonaparte
✓ generated：marx_chapters_v11/v11_K_Marx_The_Eighteenth_Brumaire_of_Louis_Bonaparte.pdf (page_num 99-197 -> PDF_page 126-224)
[3/32] is processing: v11_K_Marx_and_F_Engels_To_the_Editor_of_The_Times
✓ generated：marx_chapters_v11/v11_K_Marx_and_F_Engels_To_the_Editor_of_The_Times.pdf (page_num 210-211 -> PDF_page 237-238)
[4/32] is processing: v11_K_Marx_Statement_Sent_to_the_Editorial_Board_of_the_Kolnische_Zeitung
✓ generated：m

In [21]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 30
        pdf_end = end_page + 30
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
    # 1853 Articles
    # March 1853 (Items 1-4)
    {"title": "v12_K_Marx_and_F_Engels_British_Politics_Disraeli_The_Refugees_Mazzini_in_London_Turkey", "start_page": 3, "end_page": 12},
    {"title": "v12_K_Marx_The_London_Press_Policy_of_Napoleon_on_the_Turkish_Question", "start_page": 18, "end_page": 21},
    
    # April 1853 (Items 5-16)
    {"title": "v12_K_Marx_The_Berlin_Conspiracy", "start_page": 28, "end_page": 31},
    {"title": "v12_K_Marx_The_Berlin_Conspiracy_London_Police_Mazzini_Radetzky", "start_page": 37, "end_page": 39},
    {"title": "v12_K_Marx_Hirsch_Confessions", "start_page": 40, "end_page": 43},
    {"title": "v12_K_Marx_The_New_Financial_Juggle_or_Gladstone_and_the_Pennies", "start_page": 44, "end_page": 49},
    {"title": "v12_K_Marx_Achievements_of_the_Ministry", "start_page": 50, "end_page": 56},
    {"title": "v12_K_Marx_Feargus_O_Connor_Ministerial_Defeats_The_Budget", "start_page": 57, "end_page": 62},
    {"title": "v12_K_Marx_L_S_D_or_Class_Budgets_and_Who_Relieved_by_Them", "start_page": 63, "end_page": 66},
    {"title": "v12_K_Marx_Riot_at_Constantinople_German_Table_Moving_The_Budget", "start_page": 67, "end_page": 74},
    {"title": "v12_K_Marx_Soap_for_the_People_a_Sop_for_The_Times_The_Coalition_Budget", "start_page": 75, "end_page": 81},
    {"title": "v12_K_Marx_and_F_Engels_The_Rocket_Affair_The_Swiss_Insurrection", "start_page": 82, "end_page": 85},
    
    # May 1853 (Items 17-20)
    {"title": "v12_K_Marx_Revolution_in_China_and_in_Europe", "start_page": 93, "end_page": 100},
    {"title": "v12_K_Marx_Affairs_in_Holland_Denmark_Conversion_of_the_British_Debt_India_Turkey_and_Russia", "start_page": 101, "end_page": 106},
    {"title": "v12_K_Marx_Mazzini_Switzerland_and_Austria_The_Turkish_Question", "start_page": 107, "end_page": 111},
    {"title": "v12_K_Marx_The_Turkish_Question_The_Times_Russian_Aggrandizement", "start_page": 112, "end_page": 114},
    
    # June 1853 (Items 21-26)
    {"title": "v12_K_Marx_The_Russian_Humbug_Gladstone_Failure_Sir_Charles_Wood_East_Indian_Reforms", "start_page": 115, "end_page": 124},
    {"title": "v12_K_Marx_The_British_Rule_in_India", "start_page": 125, "end_page": 133},
    {"title": "v12_K_Marx_English_Prosperity_Strikes_The_Turkish_Question_India", "start_page": 134, "end_page": 141},
    {"title": "v12_K_Marx_Turkey_and_Russia_Connivance_of_the_Aberdeen_Ministry_with_Russia_The_Budget_Tax_on_Newspaper_Supplements_Parliamentary_Corruption", "start_page": 142, "end_page": 147},
    {"title": "v12_K_Marx_The_East_India_Company_Its_History_and_Results", "start_page": 148, "end_page": 156},
    {"title": "v12_K_Marx_The_Indian_Question_Irish_Tenant_Right", "start_page": 157, "end_page": 162},
    
    # July 1853 (Items 27-34)
    {"title": "v12_K_Marx_Russian_Policy_Against_Turkey_Chartism", "start_page": 163, "end_page": 173},
    {"title": "v12_K_Marx_The_Turkish_War_Question_The_New_York_Tribune_in_the_House_of_Commons_The_Government_of_India", "start_page": 174, "end_page": 184},
    {"title": "v12_K_Marx_Layard_Motion_Struggle_Over_the_Ten_Hours_Bill", "start_page": 185, "end_page": 191},
    {"title": "v12_K_Marx_The_Russo_Turkish_Difficulty_Ducking_and_Dodging_of_the_British_Cabinet_Nesselrode_Last_Note_The_East_India_Question", "start_page": 192, "end_page": 200},
    {"title": "v12_K_Marx_War_in_Burma_The_Russian_Question_Curious_Diplomatic_Correspondence", "start_page": 201, "end_page": 208},
    {"title": "v12_K_Marx_The_War_Question_Doings_of_Parliament_India", "start_page": 209, "end_page": 216},
    {"title": "v12_K_Marx_The_Future_Results_of_British_Rule_in_India", "start_page": 217, "end_page": 222},
    {"title": "v12_K_Marx_Financial_Failure_of_Government_Cabs_Ireland_The_Russian_Question", "start_page": 223, "end_page": 232},
    
    # August 1853 (Items 35-42)
    {"title": "v12_K_Marx_In_the_House_of_Commons_The_Press_on_the_Eastern_Question_The_Czar_Manifesto_Denmark", "start_page": 233, "end_page": 238},
    {"title": "v12_K_Marx_Advertisement_Duty_Russian_Movements_Denmark_The_United_States_in_Europe", "start_page": 239, "end_page": 244},
    {"title": "v12_K_Marx_The_War_Question_British_Population_and_Trade_Returns_Doings_of_Parliament", "start_page": 245, "end_page": 256},
    {"title": "v12_K_Marx_Urquhart_Bem_The_Turkish_Question_in_the_House_of_Lords", "start_page": 257, "end_page": 264},
    {"title": "v12_K_Marx_The_Turkish_Question_in_the_Commons", "start_page": 265, "end_page": 276},
    {"title": "v12_K_Marx_Affairs_Continental_and_English", "start_page": 277, "end_page": 283},
    {"title": "v12_K_Marx_Michael_Bakunin", "start_page": 284, "end_page": 286},
    {"title": "v12_K_Marx_Rise_in_the_Price_of_Corn_Cholera_Strikes_Sailors_Movement", "start_page": 287, "end_page": 289},
    
    # September 1853 (Items 43-49)
    {"title": "v12_K_Marx_To_the_Editor_of_The_People_Paper", "start_page": 290, "end_page": 291},
    {"title": "v12_K_Marx_The_Vienna_Note_The_United_States_and_Europe_Letters_from_Shumla_Peel_Bank_Act", "start_page": 292, "end_page": 300},
    {"title": "v12_K_Marx_Political_Movements_Scarcity_of_Bread_in_Europe", "start_page": 301, "end_page": 308},
    {"title": "v12_K_Marx_The_Western_Powers_and_Turkey_Imminent_Economic_Crisis_Railway_Construction_in_India", "start_page": 309, "end_page": 317},
    {"title": "v12_K_Marx_The_Western_Powers_and_Turkey_Symptoms_of_Economic_Crisis", "start_page": 318, "end_page": 328},
    {"title": "v12_K_Marx_Panic_on_the_London_Stock_Exchange_Strikes", "start_page": 329, "end_page": 334},
    
    # October-December 1853 (Items 50-71)
    {"title": "v12_K_Marx_Lord_Palmerston", "start_page": 341, "end_page": 406},
    {"title": "v12_K_Marx_The_War_Question_Financial_Matters_Strikes", "start_page": 407, "end_page": 415},
    {"title": "v12_K_Marx_The_Turkish_Manifesto_France_Economic_Position", "start_page": 416, "end_page": 420},
    {"title": "v12_K_Marx_Arrest_of_Delescluze_Denmark_Austria_The_Times_on_the_Prospects_of_War_Against_Russia", "start_page": 421, "end_page": 423},
    {"title": "v12_K_Marx_War_Strikes_Dearth", "start_page": 435, "end_page": 443},
    {"title": "v12_K_Marx_Persian_Expedition_in_Afghanistan_and_Russian_Expedition_in_Central_Asia_Denmark_The_Fighting_on_the_Danube_and_in_Asia_Wigan_Colliers", "start_page": 444, "end_page": 449},
    {"title": "v12_K_Marx_The_Labor_Question", "start_page": 460, "end_page": 463},
    {"title": "v12_K_Marx_Prosperity_The_Labor_Question", "start_page": 464, "end_page": 470},
    {"title": "v12_K_Marx_David_Urquhart", "start_page": 477, "end_page": 478},
    {"title": "v12_K_Marx_The_Knight_of_the_Noble_Consciousness", "start_page": 479, "end_page": 508},
    {"title": "v12_K_Marx_Manteuffel_Speech_Religious_Movement_in_Prussia_Mazzini_Address_London_Corporation_Russell_Reform_Labor_Parliament", "start_page": 509, "end_page": 515},
    {"title": "v12_K_Marx_The_Turkish_War_Industrial_Distress", "start_page": 523, "end_page": 526},
    {"title": "v12_K_Marx_The_Quadruple_Convention_England_and_the_War", "start_page": 527, "end_page": 535},
    {"title": "v12_K_Marx_The_Russian_Victory_Position_of_England_and_France", "start_page": 536, "end_page": 542},
    {"title": "v12_K_Marx_Palmerston_Resignation", "start_page": 543, "end_page": 546},
    
    # 1854 Articles
    # January-February 1854 (Items 72-81)
    {"title": "v12_K_Marx_The_Western_Powers_and_Turkey", "start_page": 559, "end_page": 567},
    {"title": "v12_K_Marx_The_War_in_the_East", "start_page": 568, "end_page": 578},
    {"title": "v12_K_Marx_The_Fighting_in_the_East_Finances_of_Austria_and_France_Fortification_of_Constantinople", "start_page": 583, "end_page": 588},
    {"title": "v12_K_Marx_The_Czar_Views_Prince_Albert", "start_page": 589, "end_page": 592},
    {"title": "v12_K_Marx_and_F_Engels_Fortification_of_Constantinople_Denmark_Neutrality_Composition_of_British_Parliament_Crop_Failure_in_Europe", "start_page": 593, "end_page": 600},
    {"title": "v12_K_Marx_Count_Orlov_Mission_Russian_Finances_During_the_War", "start_page": 601, "end_page": 605},
    {"title": "v12_K_Marx_Blue_Books_Parliamentary_Debates_on_February_6_Count_Orlov_Mission_Operations_of_the_Allied_Fleet_The_Irish_Brigade_Concerning_the_Convocation_of_the_Labor_Parliament", "start_page": 606, "end_page": 612},
    {"title": "v12_K_Marx_Russian_Diplomacy_The_Blue_Book_on_the_Eastern_Question_Montenegro", "start_page": 613, "end_page": 622},
    
    # Appendix (Item 82)
    {"title": "v12_K_Marx_Apropos_Carey_Cluss_Article_The_Best_Paper_in_the_Union_and_Its_Best_Men_and_Political_Economists_with_Extracts_from_Marx_Letters", "start_page": 623, "end_page": 634}
]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 12_ M - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v12"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 12_ M - Karl Marx.pdf
folder_name: marx_chapters_v12
expected_file_numbers 68

[1/68] is processing: v12_K_Marx_and_F_Engels_British_Politics_Disraeli_The_Refugees_Mazzini_in_London_Turkey
✓ generated：marx_chapters_v12/v12_K_Marx_and_F_Engels_British_Politics_Disraeli_The_Refugees_Mazzini_in_London_Turkey.pdf (page_num 3-12 -> PDF_page 34-43)
[2/68] is processing: v12_K_Marx_The_London_Press_Policy_of_Napoleon_on_the_Turkish_Question
✓ generated：marx_chapters_v12/v12_K_Marx_The_London_Press_Policy_of_Napoleon_on_the_Turkish_Question.pdf (page_num 18-21 -> PDF_page 49-52)
[3/68] is processing: v12_K_Marx_The_Berlin_Conspiracy
✓ generated：marx_chapters_v12/v12_K_Marx_The_Berlin_Conspiracy.pdf (page_num 28-31 -> PDF_page 59-62)
[4/68] is processing: v12_K_Marx_The_Berlin_Conspiracy_London_Police_Mazzini_Radetzky
✓ generated：marx_chapters_v12/v12_K_Marx_The_Berlin_Conspiracy_London_Police_Mazzini_Radetzky.pd

In [25]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 28
        pdf_end = end_page + 28
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
    # Main Works (Items 1-93)
    {"title": "v13_K_Marx_and_F_Engels_The_War_Question_in_Europe", "start_page": 3, "end_page": 7},
    {"title": "v13_K_Marx_Declaration_of_the_Prussian_Cabinet_Napoleon_Plans_Prussia_Policy", "start_page": 8, "end_page": 10},
    {"title": "v13_K_Marx_Debates_in_Parliament", "start_page": 11, "end_page": 25},
    {"title": "v13_K_Marx_Parliamentary_Debates_of_February_22_Pozzo_di_Borgo_Dispatch_The_Policy_of_the_Western_Powers", "start_page": 26, "end_page": 34},
    {"title": "v13_K_Marx_English_and_French_War_Plans_Greek_Insurrection_Spain_China", "start_page": 35, "end_page": 42},
    {"title": "v13_K_Marx_Austrian_Bankruptcy", "start_page": 43, "end_page": 49},
    {"title": "v13_K_Marx_Opening_of_the_Labour_Parliament_English_War_Budget", "start_page": 50, "end_page": 56},
    {"title": "v13_K_Marx_Letter_to_the_Labour_Parliament", "start_page": 57, "end_page": 60},
    {"title": "v13_K_Marx_The_Labor_Parliament", "start_page": 61, "end_page": 64},
    {"title": "v13_K_Marx_The_Greek_Insurrection", "start_page": 70, "end_page": 72},
    {"title": "v13_K_Marx_The_Documents_on_the_Partition_of_Turkey", "start_page": 73, "end_page": 83},
    {"title": "v13_K_Marx_The_Secret_Diplomatic_Correspondence", "start_page": 84, "end_page": 99},
    {"title": "v13_K_Marx_Declaration_of_War_On_the_History_of_the_Eastern_Question", "start_page": 100, "end_page": 108},
    {"title": "v13_K_Marx_British_Finances_The_Troubles_at_Preston", "start_page": 117, "end_page": 122},
    {"title": "v13_K_Marx_and_F_Engels_The_European_War", "start_page": 129, "end_page": 131},
    {"title": "v13_K_Marx_The_War_Debate_in_Parliament", "start_page": 132, "end_page": 142},
    {"title": "v13_K_Marx_Reshid_Pasha_Note_An_Italian_Newspaper_on_the_Eastern_Question", "start_page": 154, "end_page": 158},
    {"title": "v13_K_Marx_Greece_and_Turkey_Turkey_and_the_Western_Powers_Falling_Off_in_Wheat_Sales_in_England", "start_page": 159, "end_page": 162},
    {"title": "v13_K_Marx_The_Greek_Insurrection_The_Polish_Emigration_The_Austro_Prussian_Treaty_Russian_Documents", "start_page": 166, "end_page": 172},
    {"title": "v13_K_Marx_The_Bombardment_of_Odessa_Greece_Proclamation_of_Prince_Daniel_of_Montenegro_Manteuffel_Speech", "start_page": 173, "end_page": 180},
    {"title": "v13_K_Marx_British_Finances", "start_page": 184, "end_page": 191},
    {"title": "v13_K_Marx_Attack_Upon_Sevastopol_Clearing_of_Estates_in_Scotland", "start_page": 196, "end_page": 200},
    {"title": "v13_K_Marx_The_Treaty_Between_Austria_and_Prussia_Parliamentary_Debates_of_May_29", "start_page": 215, "end_page": 219},
    {"title": "v13_K_Marx_The_Formation_of_a_Special_Ministry_of_War_in_Britain_The_War_on_the_Danube_The_Economic_Situation", "start_page": 220, "end_page": 226},
    {"title": "v13_K_Marx_Reorganisation_of_the_British_War_Administration_The_Austrian_Summons_Britain_Economic_Situation_St_Arnaud", "start_page": 227, "end_page": 233},
    {"title": "v13_K_Marx_and_F_Engels_State_of_the_Russian_War", "start_page": 246, "end_page": 252},
    {"title": "v13_K_Marx_and_F_Engels_The_Russian_Retreat", "start_page": 253, "end_page": 257},
    {"title": "v13_K_Marx_The_War_Debate_in_Parliament", "start_page": 258, "end_page": 266},
    {"title": "v13_K_Marx_The_Insurrection_at_Madrid_The_Austro_Turkish_Treaty_Moldavia_and_Wallachia", "start_page": 267, "end_page": 275},
    {"title": "v13_K_Marx_The_Details_of_the_Insurrection_at_Madrid_The_Austro_Prussian_Summons_The_New_Austrian_Loan_Wallachia", "start_page": 282, "end_page": 290},
    {"title": "v13_K_Marx_Excitement_in_Italy_The_Events_in_Spain_The_Position_of_the_German_States_British_Magistrates", "start_page": 291, "end_page": 300},
    {"title": "v13_K_Marx_A_Congress_at_Vienna_The_Austrian_Loan_Proclamations_of_Dulce_and_O_Donnell_The_Ministerial_Crisis_in_Britain", "start_page": 301, "end_page": 308},
    {"title": "v13_K_Marx_The_Spanish_Revolution_Greece_and_Turkey", "start_page": 309, "end_page": 315},
    {"title": "v13_K_Marx_The_War_Debates_in_Parliament", "start_page": 316, "end_page": 322},
    {"title": "v13_K_Marx_The_Policy_of_Austria_The_War_Debates_in_the_House_of_Commons", "start_page": 323, "end_page": 333},
    {"title": "v13_K_Marx_and_F_Engels_That_Bore_of_a_War", "start_page": 334, "end_page": 339},
    {"title": "v13_K_Marx_Espartero", "start_page": 340, "end_page": 346},
    {"title": "v13_K_Marx_Evacuation_of_the_Danubian_Principalities_The_Events_in_Spain_A_New_Danish_Constitution_The_Chartists", "start_page": 350, "end_page": 356},
    {"title": "v13_K_Marx_Evacuation_of_Moldavia_and_Wallachia_Poland_Demands_of_the_Spanish_People", "start_page": 357, "end_page": 363},
    {"title": "v13_K_Marx_The_Eastern_Question_The_Revolution_in_Spain_The_Madrid_Press", "start_page": 364, "end_page": 371},
    {"title": "v13_K_Marx_Revolution_in_Spain_Bomarsund", "start_page": 372, "end_page": 378},
    {"title": "v13_K_Marx_Revolutionary_Spain", "start_page": 389, "end_page": 446},
    {"title": "v13_K_Marx_The_Reaction_in_Spain", "start_page": 447, "end_page": 454},
    {"title": "v13_K_Marx_The_Rumours_about_Mazzini_Arrest_The_Austrian_Compulsory_Loan_Spain_The_Situation_in_Wallachia", "start_page": 455, "end_page": 460},
    {"title": "v13_K_Marx_The_Actions_of_the_Allied_Fleet_The_Situation_in_the_Danubian_Principalities_Spain_British_Foreign_Trade", "start_page": 461, "end_page": 469},
    {"title": "v13_K_Marx_and_F_Engels_The_Sevastopol_Hoax", "start_page": 483, "end_page": 487},
    {"title": "v13_K_Marx_and_F_Engels_The_Sevastopol_Hoax_General_News", "start_page": 488, "end_page": 491},
    {"title": "v13_K_Marx_and_F_Engels_Progress_of_the_War", "start_page": 543, "end_page": 549},
    {"title": "v13_K_Marx_In_Retrospect", "start_page": 554, "end_page": 559},
    {"title": "v13_K_Marx_The_Press_and_the_Military_System", "start_page": 560, "end_page": 563},
    {"title": "v13_K_Marx_The_Crisis_in_Trade_and_Industry", "start_page": 571, "end_page": 578},
    {"title": "v13_K_Marx_The_Four_Points", "start_page": 579, "end_page": 584},
    {"title": "v13_K_Marx_The_Commercial_Crisis_in_Britain", "start_page": 585, "end_page": 589},
    {"title": "v13_K_Marx_Sunday_Observance_and_the_Publicans_Clanricarde", "start_page": 590, "end_page": 592},
    {"title": "v13_K_Marx_The_Aims_of_the_Negotiations_Polemic_Against_Prussia_A_Snowball_Riot", "start_page": 598, "end_page": 599},
    {"title": "v13_K_Marx_The_Opening_of_Parliament", "start_page": 600, "end_page": 607},
    {"title": "v13_K_Marx_Comments_on_the_Cabinet_Crisis", "start_page": 608, "end_page": 604},
    {"title": "v13_K_Marx_Parliamentary_News", "start_page": 605, "end_page": 608},
    {"title": "v13_K_Marx_and_F_Engels_From_Parliament_From_the_Theatre_of_War", "start_page": 615, "end_page": 619},
    {"title": "v13_K_Marx_and_F_Engels_The_Late_British_Government", "start_page": 620, "end_page": 626},
    {"title": "v13_K_Marx_On_the_Ministerial_Crisis", "start_page": 627, "end_page": 630},
    {"title": "v13_K_Marx_Fall_of_the_Aberdeen_Ministry", "start_page": 631, "end_page": 637},
    {"title": "v13_K_Marx_The_Defeated_Government", "start_page": 638, "end_page": 641},
    {"title": "v13_K_Marx_The_Parties_and_Cliques", "start_page": 642, "end_page": 644},
    {"title": "v13_K_Marx_Two_Crises", "start_page": 645, "end_page": 650},
    
    # Preparatory Materials (Items 1-2)
    {"title": "v13_K_Marx_A_Central_Junta", "start_page": 651, "end_page": 653},
    {"title": "v13_K_Marx_Unpublished_Extract_from_a_Series_of_Articles_Revolutionary_Spain", "start_page": 654, "end_page": 662},
    
    # Appendix (Item 1)
    {"title": "v13_The_English_Middle_Class", "start_page": 663, "end_page": 668}
]
    

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 13_ M - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v13"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 13_ M - Karl Marx.pdf
folder_name: marx_chapters_v13
expected_file_numbers 68

[1/68] is processing: v13_K_Marx_and_F_Engels_The_War_Question_in_Europe
✓ generated：marx_chapters_v13/v13_K_Marx_and_F_Engels_The_War_Question_in_Europe.pdf (page_num 3-7 -> PDF_page 32-36)
[2/68] is processing: v13_K_Marx_Declaration_of_the_Prussian_Cabinet_Napoleon_Plans_Prussia_Policy
✓ generated：marx_chapters_v13/v13_K_Marx_Declaration_of_the_Prussian_Cabinet_Napoleon_Plans_Prussia_Policy.pdf (page_num 8-10 -> PDF_page 37-39)
[3/68] is processing: v13_K_Marx_Debates_in_Parliament
✓ generated：marx_chapters_v13/v13_K_Marx_Debates_in_Parliament.pdf (page_num 11-25 -> PDF_page 40-54)
[4/68] is processing: v13_K_Marx_Parliamentary_Debates_of_February_22_Pozzo_di_Borgo_Dispatch_The_Policy_of_the_Western_Powers
✓ generated：marx_chapters_v13/v13_K_Marx_Parliamentary_Debates_of_February_22_Pozzo_di_Borgo_Dispatch_The_Policy_of_the

In [28]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 30
        pdf_end = end_page + 30
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
    # Main Works (Items 1-132)
    {"title": "v14_K_Marx_and_F_Engels_Palmerston_The_Army", "start_page": 8, "end_page": 11},
    {"title": "v14_K_Marx_From_Parliament_Gladstone_at_the_Dispatch_Box", "start_page": 12, "end_page": 13},
    {"title": "v14_K_Marx_Lord_Palmerston", "start_page": 14, "end_page": 20},
    {"title": "v14_K_Marx_Herbert_Re_election_The_First_Measures_of_the_New_Ministry_News_from_India", "start_page": 21, "end_page": 23},
    {"title": "v14_K_Marx_Parliament", "start_page": 24, "end_page": 28},
    {"title": "v14_K_Marx_The_Coalition_between_Tories_and_Radicals", "start_page": 29, "end_page": 31},
    {"title": "v14_F_Engels_The_War_That_Looms_on_Europe", "start_page": 32, "end_page": 39},
    {"title": "v14_K_Marx_and_F_Engels_Parliamentary_and_Military_Affairs", "start_page": 40, "end_page": 42},
    {"title": "v14_K_Marx_On_the_New_Ministerial_Crisis", "start_page": 43, "end_page": 46},
    {"title": "v14_K_Marx_Joseph_Hume", "start_page": 47, "end_page": 48},
    {"title": "v14_K_Marx_Palmerston", "start_page": 49, "end_page": 52},
    {"title": "v14_K_Marx_The_British_Constitution", "start_page": 53, "end_page": 56},
    {"title": "v14_K_Marx_Layard", "start_page": 57, "end_page": 58},
    {"title": "v14_K_Marx_The_Crisis_in_England", "start_page": 59, "end_page": 62},
    {"title": "v14_K_Marx_The_Buying_of_Commissions_News_from_Australia", "start_page": 63, "end_page": 66},
    {"title": "v14_K_Marx_The_English_Press_on_the_Late_Tsar", "start_page": 67, "end_page": 68},
    {"title": "v14_K_Marx_On_the_History_of_the_French_Alliance", "start_page": 69, "end_page": 72},
    {"title": "v14_K_Marx_The_Committee_of_Inquiry", "start_page": 73, "end_page": 75},
    {"title": "v14_K_Marx_The_Brussels_Memoire", "start_page": 76, "end_page": 77},
    {"title": "v14_K_Marx_Ireland_Revenge", "start_page": 78, "end_page": 80},
    {"title": "v14_K_Marx_and_F_Engels_Criticism_of_the_French_Conduct_of_the_War", "start_page": 90, "end_page": 93},
    {"title": "v14_K_Marx_Agitation_against_Prussia_A_day_of_Fasting", "start_page": 94, "end_page": 97},
    {"title": "v14_K_Marx_A_Meeting", "start_page": 98, "end_page": 101},
    {"title": "v14_K_Marx_Reports_from_the_English_Press", "start_page": 102, "end_page": 103},
    {"title": "v14_K_Marx_From_Parliament", "start_page": 104, "end_page": 108},
    {"title": "v14_K_Marx_Some_Observations_on_the_History_of_the_French_Alliance", "start_page": 118, "end_page": 120},
    {"title": "v14_K_Marx_Napoleon_and_Barbes_The_Newspaper_Stamp", "start_page": 121, "end_page": 123},
    {"title": "v14_K_Marx_The_Committee_of_Inquiry", "start_page": 124, "end_page": 127},
    {"title": "v14_K_Marx_The_British_Army", "start_page": 128, "end_page": 131},
    {"title": "v14_K_Marx_A_Scandal_in_the_French_Legislature_Drouyn_de_Lhuys_Influence_The_State_of_the_Militia", "start_page": 139, "end_page": 140},
    {"title": "v14_K_Marx_Prospect_in_France_and_England", "start_page": 141, "end_page": 145},
    {"title": "v14_K_Marx_On_the_History_of_Political_Agitation", "start_page": 166, "end_page": 169},
    {"title": "v14_K_Marx_Pianori_Dissatisfaction_with_Austria", "start_page": 177, "end_page": 179},
    {"title": "v14_K_Marx_The_Morning_Post_versus_Prussia_The_Character_of_the_Whigs_and_Tories", "start_page": 186, "end_page": 188},
    {"title": "v14_K_Marx_A_Sitting_of_the_House_of_Lords", "start_page": 189, "end_page": 193},
    {"title": "v14_K_Marx_The_Agitation_Outside_Parliament", "start_page": 194, "end_page": 197},
    {"title": "v14_K_Marx_Questions_of_Finance", "start_page": 198, "end_page": 200},
    {"title": "v14_K_Marx_On_the_Reform_Movement", "start_page": 208, "end_page": 210},
    {"title": "v14_K_Marx_A_Critique_of_the_Crimean_Affair_From_Parliament", "start_page": 211, "end_page": 214},
    {"title": "v14_K_Marx_and_F_Engels_Prologue_at_Lord_Palmerston_Course_of_the_Latest_Events_in_the_Crimea", "start_page": 218, "end_page": 221},
    {"title": "v14_K_Marx_Parliamentary_Reform_The_Break_off_and_Continuation_of_the_Vienna_Conference_The_So_Called_War_of_Annihilation", "start_page": 222, "end_page": 226},
    {"title": "v14_K_Marx_Disraeli_Motion", "start_page": 227, "end_page": 230},
    {"title": "v14_K_Marx_From_Parliament", "start_page": 231, "end_page": 236},
    {"title": "v14_K_Marx_A_Critique_of_Palmerston_Latest_Speech", "start_page": 237, "end_page": 239},
    {"title": "v14_K_Marx_The_Association_for_Administrative_Reform_People_Charter", "start_page": 240, "end_page": 244},
    {"title": "v14_K_Marx_Parliamentary", "start_page": 245, "end_page": 248},
    {"title": "v14_K_Marx_The_Great_Parliamentary_Debate", "start_page": 257, "end_page": 259},
    {"title": "v14_K_Marx_Napier_Letters_Roebuck_Committee", "start_page": 273, "end_page": 276},
    {"title": "v14_K_Marx_and_F_Engels_The_Debate_on_Layard_Motion_The_War_in_the_Crimea", "start_page": 277, "end_page": 279},
    {"title": "v14_K_Marx_Prince_Albert_Toast_The_Stamp_Duty_on_Newspapers", "start_page": 280, "end_page": 282},
    {"title": "v14_K_Marx_Eccentricities_of_Politics", "start_page": 283, "end_page": 286},
    {"title": "v14_K_Marx_and_F_Engels_The_Local_War_Debate_on_Administrative_Reform_Report_of_the_Roebuck_Committee", "start_page": 287, "end_page": 291},
    {"title": "v14_K_Marx_Announcement_Concerning_the_Taking_of_Sevastopol_From_the_Paris_Bourse_On_the_Massacre_at_Tangis_in_the_House_of_Lords", "start_page": 292, "end_page": 296},
    {"title": "v14_K_Marx_The_Mishap_of_June_18_Reinforcements", "start_page": 297, "end_page": 301},
    {"title": "v14_K_Marx_Anti_Church_Movement_Demonstration_in_Hyde_Park", "start_page": 302, "end_page": 307},
    {"title": "v14_K_Marx_Miscellaneous_Reports", "start_page": 308, "end_page": 312},
    {"title": "v14_K_Marx_Miscellaneous_Reports", "start_page": 320, "end_page": 322},
    {"title": "v14_K_Marx_Agitation_over_the_Tightening_up_of_Sunday_Observance", "start_page": 323, "end_page": 327},
    {"title": "v14_K_Marx_and_F_Engels_Clashes_between_the_Police_and_the_People_The_Events_in_the_Crimea", "start_page": 333, "end_page": 336},
    {"title": "v14_K_Marx_From_Parliament_Roebuck_and_Bulwer_Motions", "start_page": 337, "end_page": 339},
    {"title": "v14_K_Marx_From_the_Houses_of_Parliament_Bulwer_Motion_The_Irish_Question", "start_page": 340, "end_page": 343},
    {"title": "v14_K_Marx_and_F_Engels_Russell_Resignation_The_Events_in_the_Crimea", "start_page": 348, "end_page": 351},
    {"title": "v14_K_Marx_Russell_Dismissal", "start_page": 352, "end_page": 354},
    {"title": "v14_K_Marx_From_Parliament", "start_page": 355, "end_page": 357},
    {"title": "v14_K_Marx_and_F_Engels_From_Parliament_From_the_Theatre_of_War", "start_page": 363, "end_page": 366},
    {"title": "v14_K_Marx_Palmerston_The_Physiology_of_the_Ruling_Class_of_Great_Britain", "start_page": 367, "end_page": 370},
    {"title": "v14_K_Marx_Lord_John_Russell", "start_page": 371, "end_page": 393},
    {"title": "v14_K_Marx_The_Late_Birmingham_Conference", "start_page": 394, "end_page": 400},
    {"title": "v14_K_Marx_General_Simpson_Resignation_From_Parliament", "start_page": 470, "end_page": 471},
    {"title": "v14_K_Marx_Commentary_on_the_Parliamentary_Proceedings", "start_page": 472, "end_page": 475},
    {"title": "v14_K_Marx_The_Military_Forces_against_Russia", "start_page": 476, "end_page": 476},
    {"title": "v14_K_Marx_The_Poland_Meeting", "start_page": 477, "end_page": 480},
    {"title": "v14_K_Marx_On_the_Critique_of_Austrian_Policy_in_the_Crimean_Campaign", "start_page": 481, "end_page": 483},
    {"title": "v14_K_Marx_and_F_Engels_The_Anglo_French_War_against_Russia", "start_page": 484, "end_page": 489},
    {"title": "v14_K_Marx_Events_at_the_Theatres_of_War", "start_page": 490, "end_page": 492},
    {"title": "v14_K_Marx_Napier_Letter", "start_page": 493, "end_page": 494},
    {"title": "v14_K_Marx_Austria_and_the_War", "start_page": 495, "end_page": 500},
    {"title": "v14_K_Marx_and_F_Engels_The_Punishment_of_the_Ranks", "start_page": 501, "end_page": 503},
    {"title": "v14_F_Engels_The_Battle_of_the_Tchernaya", "start_page": 504, "end_page": 512},
    {"title": "v14_K_Marx_Another_British_Revelation", "start_page": 513, "end_page": 518},
    {"title": "v14_K_Marx_O_Connor_Funeral", "start_page": 524, "end_page": 524},
    {"title": "v14_F_Engels_Crimean_Prospects", "start_page": 525, "end_page": 530},
    {"title": "v14_K_Marx_and_F_Engels_Events_in_the_Crimea", "start_page": 531, "end_page": 533},
    {"title": "v14_K_Marx_The_Commercial_and_Financial_Situation", "start_page": 534, "end_page": 536},
    {"title": "v14_F_Engels_The_State_of_the_War", "start_page": 537, "end_page": 541},
    {"title": "v14_K_Marx_and_F_Engels_The_Reports_of_Generals_Simpson_Pelissier_and_Niel", "start_page": 542, "end_page": 545},
    {"title": "v14_K_Marx_A_Diplomatic_Impropriety", "start_page": 553, "end_page": 553},
    {"title": "v14_K_Marx_The_Official_Financial_Report", "start_page": 554, "end_page": 556},
    {"title": "v14_K_Marx_The_Bank_of_France_Reinforcements_to_the_Crimea_The_New_Field_Marshals", "start_page": 557, "end_page": 559},
    {"title": "v14_K_Marx_The_Committee_at_Newcastle_upon_Tyne", "start_page": 560, "end_page": 562},
    {"title": "v14_K_Marx_Big_Meeting_in_Support_of_Political_Refugees", "start_page": 581, "end_page": 583},
    {"title": "v14_K_Marx_Traditional_English_Policy", "start_page": 584, "end_page": 587},
    {"title": "v14_K_Marx_The_American_Difficulty_Affairs_of_France", "start_page": 599, "end_page": 604},
    {"title": "v14_K_Marx_The_Fall_of_Kars", "start_page": 605, "end_page": 614},
    {"title": "v14_K_Marx_The_France_of_Bonaparte_the_Little", "start_page": 615, "end_page": 620},
    {"title": "v14_K_Marx_The_Fall_of_Kars", "start_page": 621, "end_page": 654},
    {"title": "v14_K_Marx_Speech_at_the_Anniversary_of_The_People_Paper_Delivered_in_London_April_14_1856", "start_page": 655, "end_page": 656},
    {"title": "v14_K_Marx_Prussia", "start_page": 657, "end_page": 661},
    {"title": "v14_K_Marx_The_House_of_Lords_and_the_Duke_of_York_Monument", "start_page": 662, "end_page": 671},
    {"title": "v14_K_Marx_To_the_Editor_of_The_Free_Press", "start_page": 672, "end_page": 672},
    {"title": "v14_K_Marx_Kars_Papers_Curiosities", "start_page": 673, "end_page": 684}
    
    
]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 14_ M - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v14"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 14_ M - Karl Marx.pdf
folder_name: marx_chapters_v14
expected_file_numbers 101

[1/101] is processing: v14_K_Marx_and_F_Engels_Palmerston_The_Army
✓ generated：marx_chapters_v14/v14_K_Marx_and_F_Engels_Palmerston_The_Army.pdf (page_num 8-11 -> PDF_page 39-42)
[2/101] is processing: v14_K_Marx_From_Parliament_Gladstone_at_the_Dispatch_Box
✓ generated：marx_chapters_v14/v14_K_Marx_From_Parliament_Gladstone_at_the_Dispatch_Box.pdf (page_num 12-13 -> PDF_page 43-44)
[3/101] is processing: v14_K_Marx_Lord_Palmerston
✓ generated：marx_chapters_v14/v14_K_Marx_Lord_Palmerston.pdf (page_num 14-20 -> PDF_page 45-51)
[4/101] is processing: v14_K_Marx_Herbert_Re_election_The_First_Measures_of_the_New_Ministry_News_from_India
✓ generated：marx_chapters_v14/v14_K_Marx_Herbert_Re_election_The_First_Measures_of_the_New_Ministry_News_from_India.pdf (page_num 21-23 -> PDF_page 52-54)
[5/101] is processing: v14_K_Marx_Parliame

In [29]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 28
        pdf_end = end_page + 28
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
    # 1856 Articles
    # May 1856 (Item 1)
    {"title": "v15_K_Marx_Sardinia", "start_page": 3, "end_page": 7},
    
    # June 1856 (Items 2-4)
    {"title": "v15_K_Marx_The_French_Credit_Mobilier_I", "start_page": 8, "end_page": 13},
    {"title": "v15_K_Marx_The_French_Credit_Mobilier_II", "start_page": 14, "end_page": 18},
    {"title": "v15_K_Marx_The_French_Credit_Mobilier_III", "start_page": 19, "end_page": 24},
    
    # June 1856 - March 1857 (Items 5-9)
    {"title": "v15_K_Marx_Revelations_of_the_Diplomatic_History_of_the_18th_Century", "start_page": 25, "end_page": 96},
    
    # July 1856 (Item 10)
    {"title": "v15_K_Marx_Revolution_in_Spain", "start_page": 97, "end_page": 102},
    
    # August 1856 (Item 11)
    {"title": "v15_K_Marx_Revolution_in_Spain_2", "start_page": 103, "end_page": 108},
    
    # September 1856 (Item 12)
    {"title": "v15_K_Marx_The_Economic_Crisis_in_Europe", "start_page": 109, "end_page": 117},
    
    # October 1856 (Items 13-16)
    {"title": "v15_K_Marx_The_Monetary_Crisis_in_Europe", "start_page": 118, "end_page": 116},
    {"title": "v15_K_Marx_The_Causes_of_the_Monetary_Crisis_in_Europe", "start_page": 117, "end_page": 122},
    {"title": "v15_K_Marx_The_Monetary_Crisis_in_Europe_From_the_History_of_Money_Circulation", "start_page": 123, "end_page": 129},
    
    # November 1856 (Items 17-20)
    {"title": "v15_K_Marx_The_Economic_Crisis_in_France", "start_page": 130, "end_page": 135},
    {"title": "v15_K_Marx_The_European_Crisis", "start_page": 136, "end_page": 138},
    {"title": "v15_K_Marx_The_Maritime_Commerce_of_Austria", "start_page": 139, "end_page": 143},
    {"title": "v15_K_Marx_The_Maritime_Commerce_of_Austria_Second_article", "start_page": 144, "end_page": 150},
    
    # December 1856 (Item 21)
    {"title": "v15_K_Marx_The_Right_Divine_of_the_Hohenzollerns", "start_page": 151, "end_page": 157},
    
    # 1857 Articles
    # January 1857 (Items 22-28)
    {"title": "v15_K_Marx_The_Anglo_Chinese_Conflict", "start_page": 158, "end_page": 163},
    {"title": "v15_K_Marx_The_War_Against_Persia", "start_page": 177, "end_page": 180},
    {"title": "v15_K_Marx_B_Bauer_Pamphlets_on_the_Collision_with_Russia", "start_page": 181, "end_page": 193},
    
    # February 1857 (Items 29-30)
    {"title": "v15_K_Marx_The_New_English_Budget", "start_page": 200, "end_page": 206},
    {"title": "v15_K_Marx_Parliamentary_Debates_on_the_Chinese_Hostilities", "start_page": 207, "end_page": 212},
    
    # March 1857 (Items 31-37)
    {"title": "v15_K_Marx_Defeat_of_the_Palmerston_Ministry", "start_page": 213, "end_page": 218},
    {"title": "v15_K_Marx_The_Coming_Election_in_England", "start_page": 219, "end_page": 222},
    {"title": "v15_K_Marx_Russian_Trade_with_China", "start_page": 223, "end_page": 225},
    {"title": "v15_K_Marx_The_English_Election", "start_page": 226, "end_page": 231},
    {"title": "v15_K_Marx_English_Atrocities_in_China", "start_page": 232, "end_page": 235},
    {"title": "v15_K_Marx_A_Traitor_in_Circassia", "start_page": 236, "end_page": 237},
    {"title": "v15_K_Marx_The_Defeat_of_Cobden_Bright_and_Gibson", "start_page": 238, "end_page": 242},
    
    # April 1857 (Items 38-42)
    {"title": "v15_K_Marx_Result_of_the_Election", "start_page": 247, "end_page": 250},
    {"title": "v15_K_Marx_Condition_of_Factory_Laborers", "start_page": 251, "end_page": 254},
    {"title": "v15_K_Marx_The_English_Factory_System", "start_page": 255, "end_page": 261},
    
    # May 1857 (Items 43-47)
    {"title": "v15_K_Marx_The_British_Wild_Cats", "start_page": 266, "end_page": 269},
    {"title": "v15_K_Marx_Credit_Mobilier", "start_page": 270, "end_page": 277},
    {"title": "v15_K_Marx_Interesting_Revelations", "start_page": 284, "end_page": 288},
    
    # June 1857 (Items 48-50)
    {"title": "v15_K_Marx_The_New_French_Bank_Act", "start_page": 289, "end_page": 292},
    {"title": "v15_K_Marx_The_Persian_Treaty", "start_page": 293, "end_page": 296},
    {"title": "v15_K_Marx_The_Revolt_in_the_Indian_Army", "start_page": 297, "end_page": 300},
    
    # July 1857 (Items 51-54)
    {"title": "v15_K_Marx_State_of_Europe_Financial_State_of_France", "start_page": 301, "end_page": 304},
    {"title": "v15_K_Marx_The_Revolt_in_India", "start_page": 305, "end_page": 308},
    {"title": "v15_K_Marx_The_Indian_Question", "start_page": 309, "end_page": 313},
    {"title": "v15_K_Marx_Indian_News", "start_page": 314, "end_page": 317},
    
    # August 1857 (Items 55-59)
    {"title": "v15_K_Marx_State_of_the_Indian_Insurrection", "start_page": 318, "end_page": 321},
    {"title": "v15_K_Marx_The_Oriental_Question", "start_page": 322, "end_page": 326},
    {"title": "v15_K_Marx_The_Indian_Insurrection", "start_page": 327, "end_page": 330},
    {"title": "v15_K_Marx_Political_Situation_in_Europe", "start_page": 331, "end_page": 335},
    {"title": "v15_K_Marx_Investigation_of_Tortures_in_India", "start_page": 336, "end_page": 341},
    
    # September 1857 (Items 60-65)
    {"title": "v15_K_Marx_The_Revolt_in_India", "start_page": 342, "end_page": 348},
    {"title": "v15_K_Marx_British_Incomes_in_India", "start_page": 349, "end_page": 352},
    {"title": "v15_K_Marx_The_Indian_Revolt", "start_page": 353, "end_page": 356},
    {"title": "v15_K_Marx_The_French_Credit_Mobilier", "start_page": 357, "end_page": 360},
    {"title": "v15_K_Marx_The_Revolt_in_India_2", "start_page": 361, "end_page": 364},
    {"title": "v15_K_Marx_The_Revolt_in_India_3", "start_page": 365, "end_page": 368},
    
    # October 1857 (Items 66-67)
    {"title": "v15_K_Marx_The_Revolt_in_India_4", "start_page": 369, "end_page": 373},
    {"title": "v15_K_Marx_The_Revolt_in_India_5", "start_page": 374, "end_page": 378},
    
    # November 1857 (Items 68-71)
    {"title": "v15_K_Marx_The_Bank_Act_of_1844_and_the_Monetary_Crisis_in_England", "start_page": 379, "end_page": 384},
    {"title": "v15_K_Marx_The_British_Revulsion", "start_page": 385, "end_page": 391},
    {"title": "v15_K_Marx_The_Trade_Crisis_in_England", "start_page": 400, "end_page": 403},
    
    # December 1857 (Items 72-74)
    {"title": "v15_K_Marx_The_Financial_Crisis_in_Europe", "start_page": 404, "end_page": 409},
    {"title": "v15_K_Marx_The_Crisis_in_Europe", "start_page": 410, "end_page": 412},
    {"title": "v15_K_Marx_The_French_Crisis", "start_page": 413, "end_page": 418},
    
    # 1858 Articles
    # January 1858 (Items 75-78)
    {"title": "v15_K_Marx_British_Commerce", "start_page": 425, "end_page": 434},
    {"title": "v15_K_Marx_The_Approaching_Indian_Loan", "start_page": 443, "end_page": 446},
    
    # February 1858 (Items 79-83)
    {"title": "v15_K_Marx_The_Attempt_upon_the_Life_of_Bonaparte", "start_page": 453, "end_page": 458},
    {"title": "v15_K_Marx_The_Economic_Crisis_in_France", "start_page": 459, "end_page": 463},
    {"title": "v15_K_Marx_The_Rule_of_the_Pretorians", "start_page": 464, "end_page": 467},
    {"title": "v15_K_Marx_The_Derby_Ministry_Palmerston_Sham_Resignation", "start_page": 468, "end_page": 471},
    
    # March 1858 (Items 84-87)
    {"title": "v15_K_Marx_Portents_of_the_Day", "start_page": 472, "end_page": 476},
    {"title": "v15_K_Marx_Bonaparte_Present_Position", "start_page": 477, "end_page": 481},
    {"title": "v15_K_Marx_Pelissier_Mission_to_England", "start_page": 482, "end_page": 484},
    
    # April 1858 (Items 88-93)
    {"title": "v15_K_Marx_The_French_Trials_in_London", "start_page": 490, "end_page": 498},
    {"title": "v15_K_Marx_The_Financial_State_of_France", "start_page": 499, "end_page": 503},
    {"title": "v15_K_Marx_Mr_Disraeli_Budget", "start_page": 510, "end_page": 514},
    {"title": "v15_K_Marx_The_English_Alliance", "start_page": 515, "end_page": 520},
    {"title": "v15_K_Marx_Important_British_Documents", "start_page": 521, "end_page": 526},
    
    # May 1858 (Items 94-99)
    {"title": "v15_K_Marx_The_Annexation_of_Oude", "start_page": 533, "end_page": 538},
    {"title": "v15_K_Marx_A_Curious_Piece_of_History", "start_page": 539, "end_page": 545},
    {"title": "v15_K_Marx_Lord_Canning_Proclamation_and_Land_Tenure_in_India", "start_page": 546, "end_page": 549},
    {"title": "v15_K_Marx_Bonaparte_Financial_Maneuvers_Military_Despotism", "start_page": 550, "end_page": 552},
    
    # June 1858 (Items 100-104)
    {"title": "v15_K_Marx_The_State_of_British_Commerce", "start_page": 560, "end_page": 565},
    {"title": "v15_K_Marx_Political_Parties_in_England_Situation_in_Europe", "start_page": 566, "end_page": 569},
    {"title": "v15_K_Marx_The_British_Government_and_the_Slave_Trade", "start_page": 570, "end_page": 574},
    {"title": "v15_K_Marx_Taxation_in_India", "start_page": 575, "end_page": 579},
    
    # July 1858 (Items 105-110)
    {"title": "v15_K_Marx_The_Indian_Bill", "start_page": 585, "end_page": 588},
    {"title": "v15_K_Marx_To_the_Editor_of_the_Neue_Zeit", "start_page": 589, "end_page": 589},
    {"title": "v15_K_Marx_Imprisonment_of_Lady_Bulwer_Lytton", "start_page": 596, "end_page": 601},
    {"title": "v15_K_Marx_The_Increase_of_Lunacy_in_Great_Britain", "start_page": 602, "end_page": 606},
    
    # Preparatory Materials (Item 112)
    {"title": "v15_K_Marx_Venice", "start_page": 615, "end_page": 620},
    
    # Supplement (Item 113)
    {"title": "v15_K_Marx_Revolutionary_Spain", "start_page": 621, "end_page": 632}
]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 15_ Ka - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v15"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 15_ Ka - Karl Marx.pdf
folder_name: marx_chapters_v15
expected_file_numbers 88

[1/88] is processing: v15_K_Marx_Sardinia
✓ generated：marx_chapters_v15/v15_K_Marx_Sardinia.pdf (page_num 3-7 -> PDF_page 32-36)
[2/88] is processing: v15_K_Marx_The_French_Credit_Mobilier_I
✓ generated：marx_chapters_v15/v15_K_Marx_The_French_Credit_Mobilier_I.pdf (page_num 8-13 -> PDF_page 37-42)
[3/88] is processing: v15_K_Marx_The_French_Credit_Mobilier_II
✓ generated：marx_chapters_v15/v15_K_Marx_The_French_Credit_Mobilier_II.pdf (page_num 14-18 -> PDF_page 43-47)
[4/88] is processing: v15_K_Marx_The_French_Credit_Mobilier_III
✓ generated：marx_chapters_v15/v15_K_Marx_The_French_Credit_Mobilier_III.pdf (page_num 19-24 -> PDF_page 48-53)
[5/88] is processing: v15_K_Marx_Revelations_of_the_Diplomatic_History_of_the_18th_Century
✓ generated：marx_chapters_v15/v15_K_Marx_Revelations_of_the_Diplomatic_History_of_the_18th_Century.

In [31]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 30
        pdf_end = end_page + 30
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
    # 1858 Articles
    # August 1858 (Items 1-3)
    {"title": "v16_K_Marx_The_English_Bank_Act_of_1844", "start_page": 3, "end_page": 7},
    {"title": "v16_K_Marx_Commercial_Crises_and_Currency_in_Britain", "start_page": 8, "end_page": 12},
    {"title": "v16_K_Marx_History_of_the_Opium_Trade", "start_page": 13, "end_page": 16},
    
    # September 1858 (Items 4-10)
    {"title": "v16_K_Marx_History_of_the_Opium_Trade_2", "start_page": 17, "end_page": 20},
    {"title": "v16_K_Marx_Another_Strange_Chapter_of_Modern_History", "start_page": 21, "end_page": 27},
    {"title": "v16_K_Marx_The_Anglo_Chinese_Treaty", "start_page": 28, "end_page": 32},
    {"title": "v16_K_Marx_British_Commerce_and_Finance", "start_page": 33, "end_page": 36},
    {"title": "v16_K_Marx_Mazzini_New_Manifesto", "start_page": 37, "end_page": 40},
    {"title": "v16_K_Marx_A_New_French_Revolutionary_Manifesto", "start_page": 41, "end_page": 45},
    {"title": "v16_K_Marx_The_British_and_Chinese_Treaty", "start_page": 46, "end_page": 50},
    
    # October 1858 (Items 11-19)
    {"title": "v16_K_Marx_The_Question_of_the_Abolition_of_Serfdom_in_Russia", "start_page": 51, "end_page": 53},
    {"title": "v16_K_Marx_The_King_of_Prussia_Insanity", "start_page": 54, "end_page": 58},
    {"title": "v16_K_Marx_The_King_of_Prussia_Insanity_2", "start_page": 65, "end_page": 69},
    {"title": "v16_K_Marx_The_Prussian_Regency", "start_page": 70, "end_page": 73},
    {"title": "v16_K_Marx_Affairs_in_Prussia", "start_page": 74, "end_page": 77},
    {"title": "v16_K_Marx_Affairs_in_Prussia_2", "start_page": 78, "end_page": 81},
    {"title": "v16_K_Marx_Mr_John_Bright", "start_page": 87, "end_page": 90},
    
    # November 1858 (Items 20-26)
    {"title": "v16_K_Marx_The_New_Ministry", "start_page": 96, "end_page": 100},
    {"title": "v16_K_Marx_The_New_Ministry_2", "start_page": 101, "end_page": 105},
    {"title": "v16_K_Marx_Affairs_in_Prussia_3", "start_page": 106, "end_page": 109},
    {"title": "v16_K_Marx_Project_for_the_Regulation_of_the_Price_of_Bread_in_France", "start_page": 110, "end_page": 114},
    {"title": "v16_K_Marx_Affairs_in_Prussia_4", "start_page": 115, "end_page": 119},
    
    # December 1858 (Items 27-30)
    {"title": "v16_K_Marx_Affairs_in_Prussia_5", "start_page": 125, "end_page": 128},
    {"title": "v16_K_Marx_Question_of_the_Ionian_Islands", "start_page": 129, "end_page": 133},
    {"title": "v16_K_Marx_The_Excitement_in_Ireland", "start_page": 134, "end_page": 138},
    {"title": "v16_K_Marx_The_Emancipation_Question", "start_page": 139, "end_page": 147},
    
    # 1859 Articles
    # January 1859 (Items 31-36)
    {"title": "v16_K_Marx_On_Italian_Unity", "start_page": 148, "end_page": 153},
    {"title": "v16_K_Marx_The_War_Prospect_in_Europe", "start_page": 154, "end_page": 157},
    {"title": "v16_K_Marx_Affairs_in_Prussia_6", "start_page": 158, "end_page": 161},
    {"title": "v16_K_Marx_and_F_Engels_The_Money_Panic_in_Europe", "start_page": 162, "end_page": 166},
    {"title": "v16_K_Marx_Louis_Napoleon_Position", "start_page": 167, "end_page": 170},
    
    # February 1859 (Items 37-40)
    {"title": "v16_K_Marx_The_State_of_British_Manufactures", "start_page": 190, "end_page": 196},
    
    # March 1859 (Items 41-48)
    {"title": "v16_K_Marx_The_New_British_Reform_Bill", "start_page": 202, "end_page": 205},
    {"title": "v16_K_Marx_The_State_of_British_Manufactures_2", "start_page": 206, "end_page": 210},
    {"title": "v16_K_Marx_Peace_or_War", "start_page": 256, "end_page": 257},
    {"title": "v16_K_Marx_A_Sigh_from_the_Tuileries", "start_page": 258, "end_page": 260},
    {"title": "v16_K_Marx_The_War_Prospect_in_France", "start_page": 261, "end_page": 266},
    {"title": "v16_K_Marx_The_War_Prospect_in_Prussia", "start_page": 267, "end_page": 270},
    {"title": "v16_K_Marx_A_Historic_Parallel", "start_page": 271, "end_page": 273},
    
    # April 1859 (Items 49-55)
    {"title": "v16_K_Marx_Great_Trouble_in_Indian_Finances", "start_page": 279, "end_page": 286},
    {"title": "v16_K_Marx_The_Proposed_Peace_Congress", "start_page": 290, "end_page": 294},
    {"title": "v16_K_Marx_and_F_Engels_The_State_of_the_Question_Germany_Arming", "start_page": 295, "end_page": 298},
    {"title": "v16_K_Marx_The_Financial_Panic", "start_page": 303, "end_page": 306},
    
    # May 1859 (Items 56-66)
    {"title": "v16_K_Marx_Fair_Professions", "start_page": 307, "end_page": 309},
    {"title": "v16_K_Marx_Austria_Prussia_and_Germany_in_the_War", "start_page": 310, "end_page": 314},
    {"title": "v16_K_Marx_Highly_Important_from_Vienna", "start_page": 320, "end_page": 326},
    {"title": "v16_K_Marx_A_Prussian_View_of_the_War", "start_page": 341, "end_page": 345},
    {"title": "v16_K_Marx_Mazzini_Manifesto", "start_page": 354, "end_page": 359},
    
    # June 1859 (Items 67-74)
    {"title": "v16_K_Marx_Spree_and_Mincio", "start_page": 380, "end_page": 383},
    
    # July 1859 (Items 75-87)

    {"title": "v16_K_Marx_Erfurtery_in_the_Year_1859", "start_page": 404, "end_page": 406},
    {"title": "v16_K_Marx_What_Has_Italy_Gained", "start_page": 407, "end_page": 409},
    {"title": "v16_K_Marx_On_Ernest_Jones_from_the_Political_Review_of_Das_Volk", "start_page": 410, "end_page": 411},
    {"title": "v16_K_Marx_The_Peace", "start_page": 412, "end_page": 414},
    {"title": "v16_K_Marx_Introductory_Note_to_the_Memoir_on_Russia_for_the_Instruction_of_the_Present_Emperor", "start_page": 415, "end_page": 415},
    {"title": "v16_K_Marx_The_Treaty_of_Villafranca", "start_page": 416, "end_page": 420},
    {"title": "v16_K_Marx_Truth_Testified", "start_page": 435, "end_page": 438},
    {"title": "v16_K_Marx_Invasion", "start_page": 439, "end_page": 441},
    {"title": "v16_K_Marx_The_French_Disarmament", "start_page": 442, "end_page": 444},
    {"title": "v16_K_Marx_Quid_pro_Quo", "start_page": 445, "end_page": 462},
    
    # August 1859 (Items 88-91)
    {"title": "v16_K_Marx_British_Commerce", "start_page": 478, "end_page": 481},
    {"title": "v16_K_Marx_Louis_Napoleon_and_Italy", "start_page": 482, "end_page": 486},
    {"title": "v16_K_Marx_Population_Crime_and_Pauperism", "start_page": 487, "end_page": 491},
    
    # September 1859 (Items 92-96)
    {"title": "v16_K_Marx_Manufactures_and_Commerce", "start_page": 492, "end_page": 496},
    {"title": "v16_K_Marx_Kossuth_and_Louis_Napoleon", "start_page": 497, "end_page": 503},
    {"title": "v16_K_Marx_The_Future_of_Italy", "start_page": 504, "end_page": 507},
    {"title": "v16_K_Marx_The_New_Chinese_War", "start_page": 508, "end_page": 524},
    {"title": "v16_K_Marx_To_the_Editor_of_The_Free_Press", "start_page": 525, "end_page": 525},
    
    # October 1859 (Items 97-98)
    {"title": "v16_K_Marx_Electoral_Corruption_in_England", "start_page": 526, "end_page": 531},
    {"title": "v16_K_Marx_A_Radical_View_of_the_Peace", "start_page": 532, "end_page": 535},
    
    # November 1859 (Items 99-101)
    {"title": "v16_K_Marx_Trade_with_China", "start_page": 536, "end_page": 539},
    {"title": "v16_K_Marx_Trouble_in_Germany", "start_page": 540, "end_page": 544},
    {"title": "v16_K_Marx_The_Invasion_Panic_in_England", "start_page": 545, "end_page": 547},
    

    # Preparatory Materials (Items 107-109)
    {"title": "v16_K_Marx_Symptoms_of_the_Revival_of_France_Internal_Life", "start_page": 613, "end_page": 614},
    {"title": "v16_K_Marx_On_the_Division_of_Labour", "start_page": 617, "end_page": 620}
    
]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 16_Ma - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v16"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 16_Ma - Karl Marx.pdf
folder_name: marx_chapters_v16
expected_file_numbers 74

[1/74] is processing: v16_K_Marx_The_English_Bank_Act_of_1844
✓ generated：marx_chapters_v16/v16_K_Marx_The_English_Bank_Act_of_1844.pdf (page_num 3-7 -> PDF_page 34-38)
[2/74] is processing: v16_K_Marx_Commercial_Crises_and_Currency_in_Britain
✓ generated：marx_chapters_v16/v16_K_Marx_Commercial_Crises_and_Currency_in_Britain.pdf (page_num 8-12 -> PDF_page 39-43)
[3/74] is processing: v16_K_Marx_History_of_the_Opium_Trade
✓ generated：marx_chapters_v16/v16_K_Marx_History_of_the_Opium_Trade.pdf (page_num 13-16 -> PDF_page 44-47)
[4/74] is processing: v16_K_Marx_History_of_the_Opium_Trade_2
✓ generated：marx_chapters_v16/v16_K_Marx_History_of_the_Opium_Trade_2.pdf (page_num 17-20 -> PDF_page 48-51)
[5/74] is processing: v16_K_Marx_Another_Strange_Chapter_of_Modern_History
✓ generated：marx_chapters_v16/v16_K_Marx_Another_Strange_Cha

In [33]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 24
        pdf_end = end_page + 24
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
    # Marx and Engels Works - Python Format Volume 17 (October 1859 - December 1860)
    # Opening Letters and Statements (Items 1-10)
    {"title": "v17_K_Marx_Letter_to_the_Editor_of_the_Allgemeine_Zeitung", "start_page": 3, "end_page": 3},
    {"title": "v17_K_Marx_Statement_to_the_Editors_of_Die_Reform_the_Volks_Zeitung_and_the_Allgemeine_Zeitung", "start_page": 4, "end_page": 7},
    {"title": "v17_K_Marx_Declaration", "start_page": 8, "end_page": 9},
    {"title": "v17_K_Marx_Prosecution_of_the_Augsburg_Gazette", "start_page": 10, "end_page": 11},
    {"title": "v17_K_Marx_To_the_Editors_of_the_Volks_Zeitung_Declaration", "start_page": 12, "end_page": 13},
    {"title": "v17_K_Marx_To_the_Editor_of_the_Daily_Telegraph", "start_page": 14, "end_page": 15},
    {"title": "v17_K_Marx_To_the_Editors_of_the_Augsburg_Allgemeine_Zeitung", "start_page": 16, "end_page": 17},
    {"title": "v17_K_Marx_To_the_Editors_of_Die_Reform", "start_page": 18, "end_page": 18},
    {"title": "v17_K_Marx_Declaration_2", "start_page": 19, "end_page": 20},
    {"title": "v17_K_Marx_Herr_Vogt", "start_page": 21, "end_page": 329},
    
    # Regular Articles (Items 11-49)
    {"title": "v17_K_Marx_Affairs_in_France", "start_page": 330, "end_page": 334},
    {"title": "v17_K_Marx_English_Politics", "start_page": 335, "end_page": 340},
    {"title": "v17_K_Marx_The_New_Treaty_between_France_and_England", "start_page": 341, "end_page": 344},
    {"title": "v17_K_Marx_The_English_Budget", "start_page": 350, "end_page": 353},
    {"title": "v17_F_Engels_On_Rifled_Cannon", "start_page": 354, "end_page": 366},
    {"title": "v17_K_Marx_Public_Feeling_in_Berlin", "start_page": 367, "end_page": 369},
    {"title": "v17_K_Marx_Sicily_and_the_Sicilians", "start_page": 370, "end_page": 372},
    {"title": "v17_K_Marx_Preparations_for_Napoleon_Coming_War_on_the_Rhine", "start_page": 373, "end_page": 380},
    {"title": "v17_K_Marx_Garibaldi_in_Sicily_Affairs_in_Prussia", "start_page": 381, "end_page": 385},
    {"title": "v17_K_Marx_The_Emperor_Napoleon_III_and_Prussia", "start_page": 391, "end_page": 396},
    {"title": "v17_K_Marx_Interesting_from_Prussia", "start_page": 397, "end_page": 402},
    {"title": "v17_K_Marx_British_Commerce", "start_page": 406, "end_page": 409},
    {"title": "v17_K_Marx_The_State_of_British_Manufacturing_Industry", "start_page": 410, "end_page": 420},
    {"title": "v17_K_Marx_Interesting_from_Sicily_Garibaldi_Quarrel_with_La_Farina_A_Letter_from_Garibaldi", "start_page": 421, "end_page": 424},
    {"title": "v17_K_Marx_Events_in_Syria_Session_of_the_British_Parliament_The_State_of_British_Commerce", "start_page": 429, "end_page": 433},
    {"title": "v17_K_Marx_The_Russo_French_Alliance", "start_page": 439, "end_page": 443},
    {"title": "v17_K_Marx_The_Paper_Tax_The_Emperor_Letter", "start_page": 444, "end_page": 448},
    {"title": "v17_K_Marx_The_New_Sardinian_Loan_The_Impending_French_and_Indian_Loans", "start_page": 453, "end_page": 456},
    {"title": "v17_K_Marx_The_Crops_in_Europe", "start_page": 461, "end_page": 464},
    {"title": "v17_K_Marx_Corn_Prices_European_Finances_and_War_Preparations_The_Oriental_Question", "start_page": 465, "end_page": 470},
    {"title": "v17_K_Marx_British_Commerce_2", "start_page": 479, "end_page": 483},
    {"title": "v17_K_Marx_Russia_Using_Austria_The_Meeting_at_Warsaw", "start_page": 484, "end_page": 487},
    {"title": "v17_K_Marx_Affairs_in_Prussia_Prussia_France_and_Italy", "start_page": 488, "end_page": 492},
    {"title": "v17_K_Marx_Great_Britain_A_Money_Stringency", "start_page": 497, "end_page": 498},
    
    # Preparatory Materials (Item 50)
    {"title": "v17_K_Marx_Extracts_from_Imre_Szabo_Work_The_State_Policy_of_Modern_Europe_from_the_Beginning_of_the_Sixteenth_Century_to_the_Present_Time_In_Two_Volumes_London_1857", "start_page": 505, "end_page": 530}
    
]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 17_ M - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v17"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 17_ M - Karl Marx.pdf
folder_name: marx_chapters_v17
expected_file_numbers 35

[1/35] is processing: v17_K_Marx_Letter_to_the_Editor_of_the_Allgemeine_Zeitung
✓ generated：marx_chapters_v17/v17_K_Marx_Letter_to_the_Editor_of_the_Allgemeine_Zeitung.pdf (page_num 3-3 -> PDF_page 28-28)
[2/35] is processing: v17_K_Marx_Statement_to_the_Editors_of_Die_Reform_the_Volks_Zeitung_and_the_Allgemeine_Zeitung
✓ generated：marx_chapters_v17/v17_K_Marx_Statement_to_the_Editors_of_Die_Reform_the_Volks_Zeitung_and_the_Allgemeine_Zeitung.pdf (page_num 4-7 -> PDF_page 29-32)
[3/35] is processing: v17_K_Marx_Declaration
✓ generated：marx_chapters_v17/v17_K_Marx_Declaration.pdf (page_num 8-9 -> PDF_page 33-34)
[4/35] is processing: v17_K_Marx_Prosecution_of_the_Augsburg_Gazette
✓ generated：marx_chapters_v17/v17_K_Marx_Prosecution_of_the_Augsburg_Gazette.pdf (page_num 10-11 -> PDF_page 35-36)
[5/35] is processing: v17_K_Marx_T

In [34]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 26
        pdf_end = end_page + 26
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
    # Articles for The New American Cyclopaedia (Items 1-80+)
    {"title": "v18_Karl_Marx_and_Frederick_Engels_Barclay_de_Tolly", "start_page": 50, "end_page": 52},
    {"title": "v18_Karl_Marx_Berthier", "start_page": 56, "end_page": 59},
    {"title": "v18_Karl_Marx_and_Frederick_Engels_Bennigsen", "start_page": 76, "end_page": 79},
    {"title": "v18_Karl_Marx_Blum", "start_page": 80, "end_page": 82},
    {"title": "v18_Karl_Marx_Bourrienne", "start_page": 83, "end_page": 84},
    {"title": "v18_Karl_Marx_and_Frederick_Engels_Bem", "start_page": 130, "end_page": 133},
    {"title": "v18_Karl_Marx_Bessieres", "start_page": 134, "end_page": 135},
    {"title": "v18_Karl_Marx_and_Frederick_Engels_Bosquet", "start_page": 139, "end_page": 140},
    {"title": "v18_Karl_Marx_Bernadotte", "start_page": 149, "end_page": 158},
    {"title": "v18_Karl_Marx_Brown", "start_page": 164, "end_page": 165},
    {"title": "v18_Karl_Marx_and_Frederick_Engels_Armada", "start_page": 166, "end_page": 169},
    {"title": "v18_Karl_Marx_and_Frederick_Engels_Ayacucho", "start_page": 170, "end_page": 171},
    {"title": "v18_Karl_Marx_and_Frederick_Engels_Blucher", "start_page": 172, "end_page": 187},
    {"title": "v18_Karl_Marx_Bugeaud", "start_page": 211, "end_page": 214},
    {"title": "v18_Karl_Marx_Brune", "start_page": 215, "end_page": 218},
    {"title": "v18_Karl_Marx_Bolivar_y_Ponte", "start_page": 219, "end_page": 233},
    {"title": "v18_Karl_Marx_Bulow", "start_page": 288, "end_page": 288},
    {"title": "v18_Karl_Marx_and_Frederick_Engels_Beresford", "start_page": 289, "end_page": 290},
    
    # Preparatory Materials for The New American Cyclopaedia (Items 81-85)
    {"title": "v18_Karl_Marx_Excerpts_from_the_Article_Blum_Published_in_Meyer_Conversations_Lexicon", "start_page": 391, "end_page": 393},
    {"title": "v18_Karl_Marx_Excerpts_Made_for_the_Article_Bourrienne", "start_page": 394, "end_page": 396},
    {"title": "v18_Karl_Marx_Rough_Draft_of_the_Article_Brune", "start_page": 397, "end_page": 401},
    {"title": "v18_Karl_Marx_Excerpts_from_the_Article_Bulow_Published_in_Meyer_Conversations_Lexicon", "start_page": 402, "end_page": 406},
    
    
]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 18_ Ka - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v18"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 18_ Ka - Karl Marx.pdf
folder_name: marx_chapters_v18
expected_file_numbers 22

[1/22] is processing: v18_Karl_Marx_and_Frederick_Engels_Barclay_de_Tolly
✓ generated：marx_chapters_v18/v18_Karl_Marx_and_Frederick_Engels_Barclay_de_Tolly.pdf (page_num 50-52 -> PDF_page 77-79)
[2/22] is processing: v18_Karl_Marx_Berthier
✓ generated：marx_chapters_v18/v18_Karl_Marx_Berthier.pdf (page_num 56-59 -> PDF_page 83-86)
[3/22] is processing: v18_Karl_Marx_and_Frederick_Engels_Bennigsen
✓ generated：marx_chapters_v18/v18_Karl_Marx_and_Frederick_Engels_Bennigsen.pdf (page_num 76-79 -> PDF_page 103-106)
[4/22] is processing: v18_Karl_Marx_Blum
✓ generated：marx_chapters_v18/v18_Karl_Marx_Blum.pdf (page_num 80-82 -> PDF_page 107-109)
[5/22] is processing: v18_Karl_Marx_Bourrienne
✓ generated：marx_chapters_v18/v18_Karl_Marx_Bourrienne.pdf (page_num 83-84 -> PDF_page 110-111)
[6/22] is processing: v18_Karl_Marx_and_Frederic

In [35]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 26
        pdf_end = end_page + 26
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
    # Main Works (Items 1-72)
    {"title": "v19_K_Marx_The_American_Question_in_England", "start_page": 7, "end_page": 16},
    {"title": "v19_K_Marx_The_British_Cotton_Trade", "start_page": 17, "end_page": 20},
    {"title": "v19_K_Marx_The_London_Times_and_Lord_Palmerston", "start_page": 21, "end_page": 26},
    {"title": "v19_K_Marx_The_London_Times_on_the_Orleans_Princes_in_America", "start_page": 27, "end_page": 31},
    {"title": "v19_K_Marx_The_North_American_Civil_War", "start_page": 32, "end_page": 42},
    {"title": "v19_K_Marx_The_Civil_War_in_the_United_States", "start_page": 43, "end_page": 52},
    {"title": "v19_K_Marx_The_Crisis_in_England", "start_page": 53, "end_page": 56},
    {"title": "v19_K_Marx_British_Commerce", "start_page": 57, "end_page": 61},
    {"title": "v19_K_Marx_Economic_Notes", "start_page": 62, "end_page": 65},
    {"title": "v19_K_Marx_Intervention_in_Mexico", "start_page": 66, "end_page": 70},
    {"title": "v19_K_Marx_The_Intervention_in_Mexico", "start_page": 71, "end_page": 78},
    {"title": "v19_K_Marx_Monsieur_Fould", "start_page": 79, "end_page": 81},
    {"title": "v19_K_Marx_France_Financial_Situation", "start_page": 82, "end_page": 85},
    {"title": "v19_K_Marx_The_Dismissal_of_Fremont", "start_page": 86, "end_page": 88},
    {"title": "v19_K_Marx_The_Trent_Case", "start_page": 89, "end_page": 91},
    {"title": "v19_K_Marx_The_Anglo_American_Conflict", "start_page": 92, "end_page": 94},
    {"title": "v19_K_Marx_The_News_and_Its_Effect_in_London", "start_page": 95, "end_page": 100},
    {"title": "v19_K_Marx_The_Principal_Actors_in_the_Trent_Drama", "start_page": 101, "end_page": 104},
    {"title": "v19_K_Marx_Controversy_over_the_Trent_Case", "start_page": 105, "end_page": 109},
    {"title": "v19_K_Marx_Progress_of_Feeling_in_England", "start_page": 110, "end_page": 114},
    {"title": "v19_K_Marx_The_Crisis_over_the_Slavery_Issue", "start_page": 115, "end_page": 116},
    {"title": "v19_K_Marx_American_Matters", "start_page": 117, "end_page": 119},
    {"title": "v19_K_Marx_A_Slander_Trial", "start_page": 120, "end_page": 123},
    {"title": "v19_K_Marx_The_Washington_Cabinet_and_the_Western_Powers", "start_page": 124, "end_page": 126},
    {"title": "v19_K_Marx_The_Opinion_of_the_Newspapers_and_the_Opinion_of_the_People", "start_page": 127, "end_page": 130},
    {"title": "v19_K_Marx_French_News_Humbug_Economic_Consequences_of_War", "start_page": 131, "end_page": 133},
    {"title": "v19_K_Marx_A_Pro_America_Meeting", "start_page": 134, "end_page": 136},
    {"title": "v19_K_Marx_English_Public_Opinion", "start_page": 137, "end_page": 142},
    {"title": "v19_K_Marx_More_on_Seward_Suppressed_Dispatch", "start_page": 143, "end_page": 144},
    {"title": "v19_K_Marx_A_Coup_d_Etat_by_Lord_John_Russell", "start_page": 145, "end_page": 148},
    {"title": "v19_K_Marx_Statistical_Observations_on_the_Railway_System", "start_page": 149, "end_page": 152},
    {"title": "v19_K_Marx_A_London_Workers_Meeting", "start_page": 153, "end_page": 156},
    {"title": "v19_K_Marx_Anti_Intervention_Feeling", "start_page": 157, "end_page": 159},
    {"title": "v19_K_Marx_On_the_Cotton_Crisis", "start_page": 160, "end_page": 162},
    {"title": "v19_K_Marx_English", "start_page": 163, "end_page": 166},
    {"title": "v19_K_Marx_The_Parliamentary_Debate_on_the_Address", "start_page": 167, "end_page": 171},
    {"title": "v19_K_Marx_The_Mexican_Imbroglio", "start_page": 172, "end_page": 177},
    {"title": "v19_K_Marx_American_Affairs", "start_page": 178, "end_page": 181},
    {"title": "v19_K_Marx_The_Secessionists_Friends_in_the_Lower_House_Recognition_of_the_American_Blockade", "start_page": 182, "end_page": 185},
    {"title": "v19_K_Marx_and_F_Engels_The_American_Civil_War", "start_page": 186, "end_page": 195},
    {"title": "v19_K_Marx_An_International_Affaire_Mires", "start_page": 196, "end_page": 198},
    {"title": "v19_K_Marx_The_English_Press_and_the_Fall_of_New_Orleans", "start_page": 199, "end_page": 201},
    {"title": "v19_K_Marx_A_Treaty_Against_the_Slave_Trade", "start_page": 202, "end_page": 203},
    {"title": "v19_K_Marx_and_F_Engels_The_Situation_in_the_American_Theatre_of_War", "start_page": 204, "end_page": 208},
    {"title": "v19_K_Marx_English_Humanity_and_America", "start_page": 209, "end_page": 212},
    {"title": "v19_K_Marx_Chinese_Affairs", "start_page": 216, "end_page": 218},
    {"title": "v19_K_Marx_A_Scandal", "start_page": 219, "end_page": 222},
    {"title": "v19_K_Marx_A_Suppressed_Debate_on_Mexico_and_the_Alliance_with_France", "start_page": 223, "end_page": 225},
    {"title": "v19_K_Marx_A_Criticism_of_American_Affairs", "start_page": 226, "end_page": 229},
    {"title": "v19_K_Marx_Russell_Protest_Against_American_Rudeness_The_Rise_in_the_Price_of_Grain_On_the_Situation_in_Italy", "start_page": 230, "end_page": 232},
    {"title": "v19_K_Marx_Abolitionist_Demonstrations_in_America", "start_page": 233, "end_page": 235},
    {"title": "v19_K_Marx_A_Meeting_for_Garibaldi", "start_page": 236, "end_page": 238},
    {"title": "v19_K_Marx_Workers_Distress_in_England", "start_page": 239, "end_page": 242},
    {"title": "v19_K_Marx_A_Note_on_the_Amnesty", "start_page": 243, "end_page": 244},
    {"title": "v19_K_Marx_Garibaldi_Meetings_The_Distressed_Condition_of_Cotton_Workers", "start_page": 245, "end_page": 247},
    {"title": "v19_K_Marx_Comments_on_the_North_American_Events", "start_page": 248, "end_page": 251},
    {"title": "v19_K_Marx_Bread_Manufacture", "start_page": 252, "end_page": 255},
    {"title": "v19_K_Marx_The_Situation_in_North_America", "start_page": 256, "end_page": 259},
    {"title": "v19_K_Marx_Symptoms_of_Disintegration_in_the_Southern_Confederacy", "start_page": 260, "end_page": 262},
    {"title": "v19_K_Marx_The_Election_Results_in_the_Northern_States", "start_page": 263, "end_page": 265},
    {"title": "v19_K_Marx_The_Dismissal_of_McClellan", "start_page": 266, "end_page": 269},
    {"title": "v19_K_Marx_English_Neutrality_The_Situation_in_the_Southern_States", "start_page": 270, "end_page": 272},
    {"title": "v19_K_Marx_Letter_to_the_Editors_of_the_Berliner_Reform", "start_page": 273, "end_page": 273},
    {"title": "v19_K_Marx_Proclamation_on_Poland_by_the_German_Workers_Educational_Society_in_London", "start_page": 296, "end_page": 297},
    {"title": "v19_K_Marx_and_F_Engels_Obituary", "start_page": 320, "end_page": 320},
    
    # Preparatory Materials (Items 73-74)
    {"title": "v19_K_Marx_Ground_Rent", "start_page": 329, "end_page": 334},
    {"title": "v19_K_Marx_Biographical_Notes_on_Wilhelm_Wolff", "start_page": 335, "end_page": 338},
    
    # Appendices (Item 75)
    {"title": "v19_Application_by_Marx_for_Restoration_of_His_Prussian_Citizenship", "start_page": 339, "end_page": 339}
    
]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 19_ Ka - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v19"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 19_ Ka - Karl Marx.pdf
folder_name: marx_chapters_v19
expected_file_numbers 68

[1/68] is processing: v19_K_Marx_The_American_Question_in_England
✓ generated：marx_chapters_v19/v19_K_Marx_The_American_Question_in_England.pdf (page_num 7-16 -> PDF_page 34-43)
[2/68] is processing: v19_K_Marx_The_British_Cotton_Trade
✓ generated：marx_chapters_v19/v19_K_Marx_The_British_Cotton_Trade.pdf (page_num 17-20 -> PDF_page 44-47)
[3/68] is processing: v19_K_Marx_The_London_Times_and_Lord_Palmerston
✓ generated：marx_chapters_v19/v19_K_Marx_The_London_Times_and_Lord_Palmerston.pdf (page_num 21-26 -> PDF_page 48-53)
[4/68] is processing: v19_K_Marx_The_London_Times_on_the_Orleans_Princes_in_America
✓ generated：marx_chapters_v19/v19_K_Marx_The_London_Times_on_the_Orleans_Princes_in_America.pdf (page_num 27-31 -> PDF_page 54-58)
[5/68] is processing: v19_K_Marx_The_North_American_Civil_War
✓ generated：marx_chapters_v19/v1

In [36]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 32
        pdf_end = end_page + 32
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
    {"title": "v20_K_Marx_Inaugural_Address_of_the_Working_Mens_International_Association", "start_page": 5, "end_page": 13},
    {"title": "v20_K_Marx_Provisional_Rules_of_the_Association", "start_page": 14, "end_page": 16},
    {"title": "v20_K_Marx_Resolutions_on_the_Composition_of_the_Provisional_Central_Council", "start_page": 17, "end_page": 17},
    {"title": "v20_K_Marx_Resolutions_on_the_Terms_of_the_Admission_of_Workers_Organisations_to_the_International_Working_Mens_Association", "start_page": 18, "end_page": 18},
    {"title": "v20_K_Marx_To_Abraham_Lincoln_President_of_the_United_States_of_America", "start_page": 19, "end_page": 21},
    {"title": "v20_K_Marx_To_the_Editor_of_the_Beobachter", "start_page": 22, "end_page": 22},
    {"title": "v20_K_Marx_To_the_Editor_of_the_Stuttgart_Beobachter", "start_page": 23, "end_page": 25},
    {"title": "v20_K_Marx_On_Proudhon_Letter_to_J_B_Schweitzer", "start_page": 26, "end_page": 33},
    {"title": "v20_K_Marx_and_F_Engels_To_the_Editor_of_the_Social_Demokrat", "start_page": 36, "end_page": 36},
    {"title": "v20_K_Marx_and_F_Engels_To_the_Editor_of_the_Social_Demokrat", "start_page": 80, "end_page": 80},
    {"title": "v20_K_Marx_Resolutions_of_the_Central_Council_on_the_Conflict_in_the_Paris_Section", "start_page": 82, "end_page": 83},
    {"title": "v20_K_Marx_Synopsis_of_Engels_Pamphlet_The_Prussian_Military_Question_and_the_German_Workers_Party", "start_page": 84, "end_page": 84},
    {"title": "v20_K_Marx_Review_of_Engels_Pamphlet_The_Prussian_Military_Question_and_the_German_Workers_Party", "start_page": 85, "end_page": 86},
    {"title": "v20_K_Marx_Statement_Regarding_the_Causes_of_the_Breach_with_the_Social_Demokrat", "start_page": 87, "end_page": 90},
    {"title": "v20_K_Marx_To_the_Editor_of_the_Berliner_Reform", "start_page": 91, "end_page": 91},
    {"title": "v20_K_Marx_The_President_of_Mankind", "start_page": 92, "end_page": 96},
    {"title": "v20_K_Marx_A_Correction", "start_page": 97, "end_page": 98},
    {"title": "v20_K_Marx_Address_from_the_Working_Mens_International_Association_to_President_Johnson", "start_page": 99, "end_page": 100},
    
    # Value, Price and Profit - Major Economic Work
    {"title": "v20_K_Marx_Value_Price_and_Profit", "start_page": 101, "end_page": 149},
    
    # Geneva Congress Materials
    {"title": "v20_K_Marx_Resolution_on_the_Convocation_of_a_General_Congress_of_the_International_in_1866", "start_page": 150, "end_page": 150}, 
    {"title": "v20_K_Marx_Resolution_on_the_Procedure_of_Discussing_the_Programme_of_the_Congress", "start_page": 151, "end_page": 151},
    {"title": "v20_K_Marx_A_Warning", "start_page": 162, "end_page": 163},
    {"title": "v20_K_Marx_Proposals_of_the_Standing_Committee_on_the_Programme_of_the_Geneva_Congress", "start_page": 183, "end_page": 184},
    {"title": "v20_K_Marx_Instructions_for_the_Delegates_of_the_Provisional_General_Council_The_Different_Questions", "start_page": 185, "end_page": 194},
    {"title": "v20_K_Marx_Resolution_of_Gratitude_to_the_Delegates_of_the_Central_General_Council_to_the_Geneva_Congress", "start_page": 195, "end_page": 195},
    {"title": "v20_K_Marx_Speech_at_the_Polish_Meeting_in_London_January_22_1867", "start_page": 196, "end_page": 201},
    {"title": "v20_K_Marx_A_Correction", "start_page": 202, "end_page": 202},
    {"title": "v20_K_Marx_Resolutions_Concerning_the_Agenda_of_the_Lausanne_Congress", "start_page": 203, "end_page": 203},
    {"title": "v20_K_Marx_Resolution_on_the_Attitude_of_the_International_Working_Mens_Association_to_the_Congress_of_the_League_of_Peace_and_Freedom", "start_page": 204, "end_page": 206},
    
    # Articles and Reviews on Capital Volume One
    {"title": "v20_K_Marx_My_Plagiarism_of_F_Bastiat", "start_page": 260, "end_page": 262},
    {"title": "v20_F_Engels_Synopsis_of_Volume_One_of_Capital_by_Karl_Marx", "start_page": 263, "end_page": 310},
    
    # Preparatory Materials
    {"title": "v20_K_Marx_Draft_for_a_Speech_on_Frances_Attitude_to_Poland_Polemics_against_Peter_Fox", "start_page": 311, "end_page": 327},
    {"title": "v20_K_Marx_Draft_for_a_Report_to_the_Central_Council", "start_page": 328, "end_page": 328},
    {"title": "v20_K_Marx_Notes_Concerning_the_Conflict_in_the_Paris_Section", "start_page": 329, "end_page": 329},
    {"title": "v20_K_Marx_Original_Draft_Resolution_on_the_Conflict_in_the_Paris_Section", "start_page": 330, "end_page": 330},
    {"title": "v20_K_Marx_Memorandum_to_Hermann_Jung_About_the_Conflict_in_the_Paris_Section", "start_page": 331, "end_page": 336},
    {"title": "v20_K_Marx_Note_to_Hermann_Jung_About_Ernest_Jones_Letter_to_the_Central_Council_Meeting_March_21_1865", "start_page": 337, "end_page": 337},
    {"title": "v20_K_Marx_Notes_for_the_Report_on_Value_Price_and_Profit", "start_page": 338, "end_page": 338},
    {"title": "v20_K_Marx_Record_of_the_Central_Council_Meeting_January_16_1866", "start_page": 339, "end_page": 342},
    
]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 20_ M - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v20"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 20_ M - Karl Marx.pdf
folder_name: marx_chapters_v20
expected_file_numbers 39

[1/39] is processing: v20_K_Marx_Inaugural_Address_of_the_Working_Mens_International_Association
✓ generated：marx_chapters_v20/v20_K_Marx_Inaugural_Address_of_the_Working_Mens_International_Association.pdf (page_num 5-13 -> PDF_page 38-46)
[2/39] is processing: v20_K_Marx_Provisional_Rules_of_the_Association
✓ generated：marx_chapters_v20/v20_K_Marx_Provisional_Rules_of_the_Association.pdf (page_num 14-16 -> PDF_page 47-49)
[3/39] is processing: v20_K_Marx_Resolutions_on_the_Composition_of_the_Provisional_Central_Council
✓ generated：marx_chapters_v20/v20_K_Marx_Resolutions_on_the_Composition_of_the_Provisional_Central_Council.pdf (page_num 17-17 -> PDF_page 50-50)
[4/39] is processing: v20_K_Marx_Resolutions_on_the_Terms_of_the_Admission_of_Workers_Organisations_to_the_International_Working_Mens_Association
✓ generated：marx_cha

In [37]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 32
        pdf_end = end_page + 32
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
   # Main Works
   {"title": "v21_K_Marx_The_Fenian_Prisoners_at_Manchester_and_the_International_Working_Mens_Association", "start_page": 3, "end_page": 4},
   {"title": "v21_K_Marx_The_Position_of_the_International_on_Prussian_Protectionist_Tariffs", "start_page": 5, "end_page": 5},
   {"title": "v21_K_Marx_Resolution_on_Changing_the_Place_of_the_Internationals_Congress_in_1868", "start_page": 6, "end_page": 6},
   {"title": "v21_K_Marx_Resolution_of_the_General_Council_on_Felix_Pyats_Provocative_Behaviour", "start_page": 7, "end_page": 7},
   {"title": "v21_K_Marx_Declaration_of_the_General_Council_Concerning_the_British_Governments_Attitude_Towards_Tsarist_Russia", "start_page": 8, "end_page": 8},
   {"title": "v21_K_Marx_Draft_Resolution_on_the_Consequences_of_Using_Machinery_under_Capitalism_Proposed_by_the_General_Council_to_the_Brussels_Congress", "start_page": 9, "end_page": 9},
   {"title": "v21_K_Marx_To_the_President_and_Executive_Committee_of_the_General_Association_of_German_Workers", "start_page": 10, "end_page": 10},
   {"title": "v21_K_Marx_Draft_Resolution_on_the_Reduction_of_the_Working_Day_Proposed_by_the_General_Council_to_the_Brussels_Congress", "start_page": 11, "end_page": 11},
   {"title": "v21_K_Marx_The_Fourth_Annual_Report_of_the_General_Council_of_the_International_Working_Mens_Association", "start_page": 12, "end_page": 17},
   {"title": "v21_K_Marx_Connections_Between_the_International_Working_Mens_Association_and_English_Working_Mens_Organisations", "start_page": 25, "end_page": 27},
   {"title": "v21_K_Marx_How_Mr_Gladstones_Bank_Letter_of_1866_Procured_a_Loan_of_Six_Millions_for_Russia", "start_page": 28, "end_page": 30},
   {"title": "v21_K_Marx_Preamble_to_the_Resolutions_of_the_Geneva_1866_and_Brussels_1868_Congresses_of_the_International", "start_page": 31, "end_page": 31},
   {"title": "v21_K_Marx_Statement_to_the_German_Workers_Educational_Society_in_London", "start_page": 32, "end_page": 33},
   {"title": "v21_K_Marx_The_International_Working_Mens_Association_and_the_International_Alliance_of_Socialist_Democracy", "start_page": 34, "end_page": 36},
   {"title": "v21_K_Marx_Resume_of_the_Meetings_of_the_General_Council_International_Working_Mens_Association_to_the_Editor_of_The_Bee_Hive", "start_page": 37, "end_page": 38},
   {"title": "v21_K_Marx_The_General_Council_of_the_International_Working_Mens_Association_to_the_Central_Bureau_of_the_International_Alliance_of_Socialist_Democracy", "start_page": 45, "end_page": 46},
   {"title": "v21_K_Marx_The_Belgian_Massacres_To_the_Workmen_of_Europe_and_the_United_States", "start_page": 47, "end_page": 52},
   {"title": "v21_K_Marx_Address_to_the_National_Labour_Union_of_the_United_States", "start_page": 53, "end_page": 55},
   {"title": "v21_K_Marx_Preface_to_the_Second_Edition_of_The_Eighteenth_Brumaire_of_Louis_Bonaparte", "start_page": 56, "end_page": 58},
   {"title": "v21_K_Marx_Report_of_the_General_Council_on_the_Right_of_Inheritance", "start_page": 65, "end_page": 67},
   {"title": "v21_K_Marx_Report_of_the_General_Council_to_the_Fourth_Annual_Congress_of_the_International_Working_Mens_Association", "start_page": 68, "end_page": 82},
   {"title": "v21_K_Marx_Draft_Resolution_of_the_General_Council_on_the_Policy_of_the_British_Government_Towards_the_Irish_Prisoners", "start_page": 83, "end_page": 83},
   {"title": "v21_K_Marx_The_General_Council_to_the_Federal_Council_of_Romance_Switzerland", "start_page": 84, "end_page": 91},
   {"title": "v21_K_Marx_Obituary", "start_page": 92, "end_page": 92},
   {"title": "v21_K_Marx_The_English_Government_and_the_Fenian_Prisoners", "start_page": 101, "end_page": 107},
   {"title": "v21_K_Marx_Concerning_the_Conflict_in_the_Lyons_Section", "start_page": 108, "end_page": 109},
   {"title": "v21_K_Marx_The_General_Council_of_the_International_Working_Mens_Association_to_Committee_Members_of_the_Russian_Section_in_Geneva", "start_page": 110, "end_page": 111},
   {"title": "v21_K_Marx_Confidential_Communication", "start_page": 112, "end_page": 124},
   {"title": "v21_K_Marx_To_the_International_Metalworkers_Society", "start_page": 125, "end_page": 125},
   {"title": "v21_K_Marx_Resolution_of_the_General_Council_on_The_Bee_Hive", "start_page": 126, "end_page": 126},
   {"title": "v21_K_Marx_Concerning_the_Persecution_of_the_Members_of_the_French_Sections_Declaration_of_the_General_Council_of_the_International_Working_Mens_Association", "start_page": 127, "end_page": 130},
   {"title": "v21_K_Marx_Draft_Resolution_of_the_General_Council_on_the_French_Federal_Section_in_London", "start_page": 131, "end_page": 131},
   {"title": "v21_K_Marx_Resolution_of_the_General_Council_on_the_Convocation_of_the_Congress_in_Mainz", "start_page": 132, "end_page": 132},
   {"title": "v21_K_Marx_and_F_Engels_To_the_Committee_of_the_German_Social_Democratic_Workers_Party", "start_page": 133, "end_page": 135},
   {"title": "v21_K_Marx_General_Council_Resolution_on_the_Federal_Committee_of_Romance_Switzerland_The_General_Council_to_the_Romance_Federal_Committee", "start_page": 136, "end_page": 136},
   {"title": "v21_K_Marx_The_Lock_out_of_the_Building_Trades_at_Geneva_The_General_Council_of_the_International_Working_Mens_Association_to_the_Working_Men_and_Women_of_Europe_and_the_United_States", "start_page": 137, "end_page": 139},
   {"title": "v21_K_Marx_Confidential_Communication_to_All_Sections", "start_page": 142, "end_page": 142},
   {"title": "v21_K_Marx_Programme_for_the_Mainz_Congress_of_the_International", "start_page": 143, "end_page": 144},
   
   # Preparatory Materials
   {"title": "v21_K_Marx_Notes_for_an_Undelivered_Speech_on_Ireland", "start_page": 189, "end_page": 193},
   {"title": "v21_K_Marx_Outline_of_a_Report_on_the_Irish_Question_Delivered_to_the_German_Workers_Educational_Society_in_London_on_December_16_1867", "start_page": 194, "end_page": 206},
   {"title": "v21_K_Marx_Remarks_on_the_Programme_and_Rules_of_the_International_Alliance_of_Socialist_Democracy", "start_page": 207, "end_page": 211},
   {"title": "v21_K_Marx_Ireland_from_the_American_Revolution_to_the_Union_of_1801_Extracts_and_Notes", "start_page": 212, "end_page": 282}
   
]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 21_ M - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v21"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 21_ M - Karl Marx.pdf
folder_name: marx_chapters_v21
expected_file_numbers 42

[1/42] is processing: v21_K_Marx_The_Fenian_Prisoners_at_Manchester_and_the_International_Working_Mens_Association
✓ generated：marx_chapters_v21/v21_K_Marx_The_Fenian_Prisoners_at_Manchester_and_the_International_Working_Mens_Association.pdf (page_num 3-4 -> PDF_page 36-37)
[2/42] is processing: v21_K_Marx_The_Position_of_the_International_on_Prussian_Protectionist_Tariffs
✓ generated：marx_chapters_v21/v21_K_Marx_The_Position_of_the_International_on_Prussian_Protectionist_Tariffs.pdf (page_num 5-5 -> PDF_page 38-38)
[3/42] is processing: v21_K_Marx_Resolution_on_Changing_the_Place_of_the_Internationals_Congress_in_1868
✓ generated：marx_chapters_v21/v21_K_Marx_Resolution_on_Changing_the_Place_of_the_Internationals_Congress_in_1868.pdf (page_num 6-6 -> PDF_page 39-39)
[4/42] is processing: v21_K_Marx_Resolution_of_the_General_Co

In [40]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 32
        pdf_end = end_page + 32
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
   # Main Works
   {"title": "v22_K_Marx_First_Address_of_the_General_Council_of_the_International_Working_Mens_Association_on_the_Franco_Prussian_War", "start_page": 3, "end_page": 8},
   {"title": "v22_K_Marx_To_the_Committee_of_the_Social_Democratic_Workers_Party", "start_page": 259, "end_page": 259},
   {"title": "v22_K_Marx_and_F_Engels_Letter_to_the_Committee_of_the_Social_Democratic_Workers_Party", "start_page": 260, "end_page": 262},
   {"title": "v22_K_Marx_Second_Address_of_the_General_Council_of_the_International_Working_Mens_Association_on_the_Franco_Prussian_War_against_the_Empire", "start_page": 263, "end_page": 270},
   {"title": "v22_K_Marx_Concerning_the_Arrest_of_the_Members_of_the_Central_Committee_of_the_Social_Democratic_Workers_Party", "start_page": 271, "end_page": 271},
   {"title": "v22_K_Marx_On_the_Freedom_of_the_Press_and_Meetings_in_Germany", "start_page": 274, "end_page": 276},
   {"title": "v22_K_Marx_and_F_Engels_To_the_Editor_of_The_Times", "start_page": 285, "end_page": 285},
   {"title": "v22_K_Marx_Statement_by_the_General_Council_to_the_Editor_of_The_Times_and_Other_Papers", "start_page": 286, "end_page": 287},
   {"title": "v22_K_Marx_To_the_Editorial_Boards_of_the_Volksstaat_and_the_Zukunft", "start_page": 288, "end_page": 290},
   {"title": "v22_K_Marx_To_the_Editor_of_De_Werker", "start_page": 291, "end_page": 291},
   {"title": "v22_K_Marx_To_the_Editor_of_The_Times", "start_page": 292, "end_page": 294},
   {"title": "v22_K_Marx_and_F_Engels_Resolution_of_the_General_Council_Expelling_Henri_Louis_Tolain_from_the_International_Working_Mens_Association", "start_page": 297, "end_page": 297},
   {"title": "v22_K_Marx_The_Civil_War_in_France_Address_of_the_General_Council_of_the_International_Working_Mens_Association", "start_page": 307, "end_page": 355},
   {"title": "v22_K_Marx_To_the_Editor_of_The_Pall_Mall_Gazette", "start_page": 360, "end_page": 360},
   {"title": "v22_K_Marx_and_F_Engels_Statement_by_the_General_Council_on_Jules_Favres_Circular", "start_page": 361, "end_page": 363},
   {"title": "v22_K_Marx_and_F_Engels_Statement_by_the_General_Council_to_the_Editor_of_The_Times", "start_page": 364, "end_page": 365},
   {"title": "v22_K_Marx_and_F_Engels_Statement_by_the_General_Council_to_the_Editor_of_The_Standard", "start_page": 366, "end_page": 366},
   {"title": "v22_K_Marx_To_the_Editor_of_The_Daily_News", "start_page": 370, "end_page": 371},
   {"title": "v22_K_Marx_Letter_to_Max_Friedländer_the_Editor_of_the_Neue_Freie_Presse", "start_page": 374, "end_page": 374},
   {"title": "v22_K_Marx_Letter_to_Frederick_Greenwood_the_Editor_of_The_Pall_Mall_Gazette", "start_page": 378, "end_page": 378},
   {"title": "v22_K_Marx_Mr_Washburne_the_American_Ambassador_in_Paris", "start_page": 379, "end_page": 382},
   {"title": "v22_K_Marx_To_the_Editor_of_The_Morning_Advertiser", "start_page": 383, "end_page": 383},
   {"title": "v22_K_Marx_To_the_Editor_of_The_Standard", "start_page": 384, "end_page": 384},
   {"title": "v22_K_Marx_Covering_Letter_to_the_Editor_of_The_Times", "start_page": 388, "end_page": 388},
   {"title": "v22_K_Marx_To_the_Editor_of_LInternational", "start_page": 391, "end_page": 391},
   {"title": "v22_K_Marx_To_the_Editor_of_Public_Opinion", "start_page": 392, "end_page": 392},
   {"title": "v22_K_Marx_To_the_Editor_of_Public_Opinion", "start_page": 393, "end_page": 394},
   {"title": "v22_K_Marx_To_the_Editor_of_the_Gaulois", "start_page": 395, "end_page": 395},
   {"title": "v22_K_Marx_Letter_to_the_Editor_of_The_Sun_Charles_Dana", "start_page": 396, "end_page": 399},
   {"title": "v22_K_Marx_The_Commune_and_Archbishop_Darboy", "start_page": 400, "end_page": 402},
   {"title": "v22_K_Marx_To_the_Editor_of_La_Vérité", "start_page": 403, "end_page": 404},
   {"title": "v22_K_Marx_To_the_Editor_of_The_Evening_Standard", "start_page": 405, "end_page": 405},
   {"title": "v22_K_Marx_and_F_Engels_Propositions_to_the_General_Council_Concerning_Preparations_for_the_London_Conference", "start_page": 406, "end_page": 406},
   {"title": "v22_K_Marx_and_F_Engels_Propositions_to_Be_Submitted_to_the_Conference_by_the_General_Council", "start_page": 407, "end_page": 408},
   {"title": "v22_K_Marx_On_the_Activity_of_the_Alliance_of_Socialist_Democracy_Record_of_the_Speech_at_the_Sitting_of_the_Conference_Commission_of_18_September_1871", "start_page": 411, "end_page": 412},
   {"title": "v22_K_Marx_and_F_Engels_Motions_of_the_General_Council_Adopted_by_the_Conference", "start_page": 413, "end_page": 414},
   {"title": "v22_K_Marx_Resolution_of_the_London_Conference_relating_to_the_Split_in_Romance_Switzerland", "start_page": 419, "end_page": 422},
   {"title": "v22_K_Marx_and_F_Engels_Resolutions_of_the_Conference_of_Delegates_of_the_International_Working_Mens_Association_Assembled_at_London_from_17th_to_23rd_September_1871", "start_page": 423, "end_page": 431},
   {"title": "v22_K_Marx_To_the_Editors_of_Woodhull_and_Claflins_Weekly", "start_page": 432, "end_page": 432},
   
   # Preparatory Materials
   {"title": "v22_K_Marx_Drafts_of_The_Civil_War_in_France", "start_page": 435, "end_page": 551},
   {"title": "v22_K_Marx_and_F_Engels_Notes_from_the_Minutes_of_the_General_Council_1869_1871", "start_page": 554, "end_page": 564},
]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 22_ M - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v22"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 22_ M - Karl Marx.pdf
folder_name: marx_chapters_v22
expected_file_numbers 41

[1/41] is processing: v22_K_Marx_First_Address_of_the_General_Council_of_the_International_Working_Mens_Association_on_the_Franco_Prussian_War
✓ generated：marx_chapters_v22/v22_K_Marx_First_Address_of_the_General_Council_of_the_International_Working_Mens_Association_on_the_Franco_Prussian_War.pdf (page_num 3-8 -> PDF_page 36-41)
[2/41] is processing: v22_K_Marx_To_the_Committee_of_the_Social_Democratic_Workers_Party
✓ generated：marx_chapters_v22/v22_K_Marx_To_the_Committee_of_the_Social_Democratic_Workers_Party.pdf (page_num 259-259 -> PDF_page 292-292)
[3/41] is processing: v22_K_Marx_and_F_Engels_Letter_to_the_Committee_of_the_Social_Democratic_Workers_Party
✓ generated：marx_chapters_v22/v22_K_Marx_and_F_Engels_Letter_to_the_Committee_of_the_Social_Democratic_Workers_Party.pdf (page_num 260-262 -> PDF_page 293-295)
[4/41] is

In [41]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 34
        pdf_end = end_page + 34
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
   # 1871
   {"title": "v23_K_Marx_General_Rules_and_Administrative_Regulations_of_the_International_Working_Mens_Association", "start_page": 3, "end_page": 20},
   {"title": "v23_K_Marx_Declaration_of_the_General_Council_on_Nechayevs_Misuse_of_the_Name_of_the_International_Working_Mens_Association", "start_page": 23, "end_page": 23},
   {"title": "v23_K_Marx_Resolution_of_the_General_Council_on_the_Rules_of_the_French_Section_of_1871", "start_page": 24, "end_page": 27},
   {"title": "v23_K_Marx_Resolution_of_the_General_Council_on_the_French_Section_of_1871", "start_page": 37, "end_page": 42},
   {"title": "v23_K_Marx_Declaration", "start_page": 50, "end_page": 51},
   {"title": "v23_K_Marx_Statement_Sent_by_the_General_Council_to_the_Editors_of_the_Frankfurter_Zeitung_und_Handelsblatt", "start_page": 52, "end_page": 52},
   {"title": "v23_F_Engels_To_the_Federal_Council_of_the_Spanish_Region_in_Madrid", "start_page": 53, "end_page": 53},
   {"title": "v23_K_Marx_To_the_Editor_of_The_Eastern_Post", "start_page": 62, "end_page": 63},
   
   # 1872
   {"title": "v23_K_Marx_To_the_Editor_of_The_Eastern_Post", "start_page": 71, "end_page": 71},
   {"title": "v23_K_Marx_To_the_Editor_of_The_Eastern_Post", "start_page": 72, "end_page": 73},
   {"title": "v23_K_Marx_and_F_Engels_Declaration_of_the_General_Council_of_the_International_Working_Mens_Association", "start_page": 77, "end_page": 78},
   {"title": "v23_K_Marx_and_F_Engels_Fictitious_Splits_in_the_International_Private_Circular_from_the_General_Council_of_the_International_Working_Mens_Association", "start_page": 79, "end_page": 123},
   {"title": "v23_K_Marx_Resolutions_on_the_Split_in_the_United_States_Federation_Passed_by_the_General_Council_of_the_I_W_A_in_Its_Sittings_of_5th_and_12th_March_1872", "start_page": 124, "end_page": 126},
   {"title": "v23_K_Marx_To_the_Editor_of_La_Liberté", "start_page": 127, "end_page": 127},
   {"title": "v23_K_Marx_Resolutions_of_the_Meeting_Held_to_Celebrate_the_Anniversary_of_the_Paris_Commune", "start_page": 128, "end_page": 128},
   {"title": "v23_K_Marx_The_Nationalisation_of_the_Land", "start_page": 131, "end_page": 136},
   {"title": "v23_K_Marx_Declaration_of_the_General_Council_of_the_International_Working_Mens_Association_Concerning_Cochranes_Speech_in_the_House_of_Commons", "start_page": 140, "end_page": 145},
   {"title": "v23_K_Marx_Declaration_of_the_General_Council_Concerning_the_Universal_Federalist_Council", "start_page": 157, "end_page": 163},
   {"title": "v23_K_Marx_Stefanoni_and_the_International_Again", "start_page": 164, "end_page": 167},
   {"title": "v23_K_Marx_Reply_to_Brentanos_Article", "start_page": 164, "end_page": 167},
   {"title": "v23_K_Marx_and_F_Engels_Preface_to_the_1872_German_Edition_of_the_Manifesto_of_the_Communist_Party", "start_page": 174, "end_page": 175},
   {"title": "v23_K_Marx_To_the_Striking_Miners_of_the_Ruhr_Valley", "start_page": 185, "end_page": 187},
   {"title": "v23_K_Marx_The_General_Councils_Reply_to_the_Protest_of_the_Jura_Federation_Against_the_Convening_of_a_Congress_at_The_Hague", "start_page": 188, "end_page": 189},
   {"title": "v23_K_Marx_Reply_to_Brentanos_Second_Article", "start_page": 190, "end_page": 197},
   {"title": "v23_K_Marx_Amendments_to_the_General_Rules_and_Administrative_Regulations_of_the_International_Working_Mens_Association_Adopted_by_the_General_Council_in_the_Summer_of_1872", "start_page": 198, "end_page": 204},
   {"title": "v23_K_Marx_and_F_Engels_To_the_Spanish_Sections_of_the_International_Working_Mens_Association", "start_page": 211, "end_page": 213},
   {"title": "v23_K_Marx_To_the_Editor_of_The_Times", "start_page": 214, "end_page": 214},
   {"title": "v23_K_Marx_Resolution_on_the_Behaviour_of_Members_of_the_General_Council_at_the_Congress", "start_page": 218, "end_page": 218},
   {"title": "v23_K_Marx_Report_of_the_General_Council_to_the_Fifth_Annual_Congress_of_the_International_Working_Mens_Association_Held_at_The_Hague_from_the_2nd_to_the_7th_September_1872", "start_page": 219, "end_page": 227},
   {"title": "v23_K_Marx_and_F_Engels_Proposal_on_the_Transfer_of_the_Seat_and_on_the_Composition_of_the_General_Council_for_1872_1873", "start_page": 240, "end_page": 242},
   {"title": "v23_K_Marx_and_F_Engels_Resolutions_of_the_General_Congress_Held_at_The_Hague_from_the_2nd_to_the_7th_September_1872", "start_page": 243, "end_page": 253},
   {"title": "v23_K_Marx_On_the_Hague_Congress_A_Correspondents_Report_of_a_Speech_Made_at_a_Meeting_in_Amsterdam_on_September_8_1872", "start_page": 254, "end_page": 256},
   {"title": "v23_K_Marx_To_the_Editor_of_Le_Corsaire", "start_page": 257, "end_page": 258},
   {"title": "v23_K_Marx_To_the_Editor_of_The_Daily_News", "start_page": 259, "end_page": 259},
   {"title": "v23_K_Marx_To_the_Editors_of_Der_Volksstaat", "start_page": 286, "end_page": 287},
   {"title": "v23_K_Marx_and_F_Engels_To_the_Editor_of_The_International_Herald", "start_page": 301, "end_page": 303},
   {"title": "v23_K_Marx_Address_of_the_British_Federal_Council_to_the_Sections_Branches_Affiliated_Societies_and_Members_of_the_International_Working_Mens_Association", "start_page": 309, "end_page": 316},
   
   # 1873
   {"title": "v23_K_Marx_Political_Indifferentism", "start_page": 392, "end_page": 397},
   {"title": "v23_K_Marx_To_the_Editor_of_The_Times", "start_page": 398, "end_page": 399},
   {"title": "v23_K_Marx_Reply_to_the_Second_Circular_of_the_Self_styled_Majority_of_the_British_Federal_Council", "start_page": 406, "end_page": 408},
   {"title": "v23_K_Marx_and_F_Engels_The_Alliance_of_Socialist_Democracy_and_the_International_Working_Mens_Association_Report_and_Documents_Published_by_Decision_of_the_Hague_Congress_of_the_International", "start_page": 454, "end_page": 580},
   # Preparatory Materials
   {"title": "v23_K_Marx_Notes_on_the_Condition_of_the_Refugees_from_the_Commune", "start_page": 635, "end_page": 635},
   {"title": "v23_K_Marx_American_Split", "start_page": 636, "end_page": 643},
   {"title": "v23_K_Marx_Extracts_from_the_Minutes_of_the_General_Council_for_June_1870_April_1872", "start_page": 644, "end_page": 654}
]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 23_ M - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v23"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 23_ M - Karl Marx.pdf
folder_name: marx_chapters_v23
expected_file_numbers 44

[1/44] is processing: v23_K_Marx_General_Rules_and_Administrative_Regulations_of_the_International_Working_Mens_Association
✓ generated：marx_chapters_v23/v23_K_Marx_General_Rules_and_Administrative_Regulations_of_the_International_Working_Mens_Association.pdf (page_num 3-20 -> PDF_page 38-55)
[2/44] is processing: v23_K_Marx_Declaration_of_the_General_Council_on_Nechayevs_Misuse_of_the_Name_of_the_International_Working_Mens_Association
✓ generated：marx_chapters_v23/v23_K_Marx_Declaration_of_the_General_Council_on_Nechayevs_Misuse_of_the_Name_of_the_International_Working_Mens_Association.pdf (page_num 23-23 -> PDF_page 58-58)
[3/44] is processing: v23_K_Marx_Resolution_of_the_General_Council_on_the_Rules_of_the_French_Section_of_1871
✓ generated：marx_chapters_v23/v23_K_Marx_Resolution_of_the_General_Council_on_the_Rules_of_the_

In [42]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 30
        pdf_end = end_page + 30
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
   # Main Works
   {"title": "v24_K_Marx_Epilogue_to_Revelations_Concerning_the_Communist_Trial_at_Cologne", "start_page": 51, "end_page": 54},
   {"title": "v24_K_Marx_and_F_Engels_For_Poland", "start_page": 55, "end_page": 58},
   {"title": "v24_K_Marx_Critique_of_the_Gotha_Programme", "start_page": 75, "end_page": 99},
   {"title": "v24_K_Marx_Letter_to_Otechestvennije_Zapiski", "start_page": 196, "end_page": 201},
   {"title": "v24_K_Marx_To_an_Editorial_Board_in_London", "start_page": 202, "end_page": 202},
   {"title": "v24_K_Marx_and_F_Engels_Herr_Bucher", "start_page": 230, "end_page": 231},
   {"title": "v24_K_Marx_Reply_to_Buchers_Declaration", "start_page": 232, "end_page": 233},
   {"title": "v24_K_Marx_Mr_George_Howells_History_of_the_International_Working_Mens_Association", "start_page": 234, "end_page": 239},
   {"title": "v24_K_Marx_The_Parliamentary_Debate_on_the_Anti_Socialist_Law_Outline_of_an_Article", "start_page": 240, "end_page": 250},
   {"title": "v24_K_Marx_and_F_Engels_Circular_Letter_to_August_Bebel_Wilhelm_Liebknecht_Wilhelm_Bracke_and_Others", "start_page": 253, "end_page": 269},
   {"title": "v24_K_Marx_and_F_Engels_About_Karl_Blind", "start_page": 270, "end_page": 271},
   {"title": "v24_K_Marx_Note_on_The_Poverty_of_Philosophy", "start_page": 326, "end_page": 327},
   {"title": "v24_K_Marx_Workers_Questionnaire", "start_page": 328, "end_page": 334},
   {"title": "v24_K_Marx_Introduction_to_the_French_Edition_of_Engels_Socialism_Utopian_and_Scientific", "start_page": 335, "end_page": 339},
   {"title": "v24_K_Marx_Preamble_to_the_Programme_of_the_French_Workers_Party", "start_page": 340, "end_page": 342},
   {"title": "v24_K_Marx_and_F_Engels_To_the_Meeting_in_Geneva_Held_to_Commemorate_the_50th_Anniversary_of_the_Polish_Revolution_of_1830", "start_page": 343, "end_page": 345},
   {"title": "v24_K_Marx_Drafts_of_the_Letter_to_Vera_Zasulich", "start_page": 346, "end_page": 369},
   {"title": "v24_K_Marx_Letter_to_Vera_Zasulich", "start_page": 370, "end_page": 371},
   {"title": "v24_K_Marx_and_F_Engels_To_the_Chairman_of_the_Slavonic_Meeting_March_21st_1881_in_Celebration_of_the_Anniversary_of_the_Paris_Commune", "start_page": 372, "end_page": 373},
   {"title": "v24_K_Marx_and_F_Engels_To_the_Editor_of_The_Daily_News", "start_page": 374, "end_page": 375},
   {"title": "v24_K_Marx_and_F_Engels_Preface_to_the_Second_Russian_Edition_of_the_Manifesto_of_the_Communist_Party", "start_page": 425, "end_page": 426},
  # Preparatory Materials
   {"title": "v24_K_Marx_Notes_on_Bakunins_Book_Statehood_and_Anarchy", "start_page": 485, "end_page": 526},
   {"title": "v24_K_Marx_Marginal_Notes_on_Adolph_Wagners_Lehrbuch_der_politischen_Oekonomie", "start_page": 531, "end_page": 562},
   
]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 24_ M - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v24"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 24_ M - Karl Marx.pdf
folder_name: marx_chapters_v24
expected_file_numbers 23

[1/23] is processing: v24_K_Marx_Epilogue_to_Revelations_Concerning_the_Communist_Trial_at_Cologne
✓ generated：marx_chapters_v24/v24_K_Marx_Epilogue_to_Revelations_Concerning_the_Communist_Trial_at_Cologne.pdf (page_num 51-54 -> PDF_page 82-85)
[2/23] is processing: v24_K_Marx_and_F_Engels_For_Poland
✓ generated：marx_chapters_v24/v24_K_Marx_and_F_Engels_For_Poland.pdf (page_num 55-58 -> PDF_page 86-89)
[3/23] is processing: v24_K_Marx_Critique_of_the_Gotha_Programme
✓ generated：marx_chapters_v24/v24_K_Marx_Critique_of_the_Gotha_Programme.pdf (page_num 75-99 -> PDF_page 106-130)
[4/23] is processing: v24_K_Marx_Letter_to_Otechestvennije_Zapiski
✓ generated：marx_chapters_v24/v24_K_Marx_Letter_to_Otechestvennije_Zapiski.pdf (page_num 196-201 -> PDF_page 227-232)
[5/23] is processing: v24_K_Marx_To_an_Editorial_Board_in_London
✓ g

In [44]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 24
        pdf_end = end_page + 24
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
   # Economic Manuscripts of 1857-58 (First Version of Capital)
   {"title": "v28_K_Marx_Bastiat_and_Carey", "start_page": 5, "end_page": 16},
   {"title": "v28_K_Marx_Introduction", "start_page": 17, "end_page": 48},
   {"title": "v28_K_Marx_Outlines_of_the_Critique_of_Political_Economy_Rough_Draft_of_1857_58_First_Instalment", "start_page": 49, "end_page": 540},
   
]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 28_ Ka - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v28"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 28_ Ka - Karl Marx.pdf
folder_name: marx_chapters_v28
expected_file_numbers 3

[1/3] is processing: v28_K_Marx_Bastiat_and_Carey
✓ generated：marx_chapters_v28/v28_K_Marx_Bastiat_and_Carey.pdf (page_num 5-16 -> PDF_page 30-41)
[2/3] is processing: v28_K_Marx_Introduction
✓ generated：marx_chapters_v28/v28_K_Marx_Introduction.pdf (page_num 17-48 -> PDF_page 42-73)
[3/3] is processing: v28_K_Marx_Outlines_of_the_Critique_of_Political_Economy_Rough_Draft_of_1857_58_First_Instalment
✓ generated：marx_chapters_v28/v28_K_Marx_Outlines_of_the_Critique_of_Political_Economy_Rough_Draft_of_1857_58_First_Instalment.pdf (page_num 49-540 -> PDF_page 74-565)

sucessful extraction！
✓ sucessful files: 3 
❌ failed files: 0
📁 folder_name: marx_chapters_v28

total number of files（ 3）:
  - v28_K_Marx_Bastiat_and_Carey.pdf
  - v28_K_Marx_Introduction.pdf
  - v28_K_Marx_Outlines_of_the_Critique_of_Political_Economy_Rough_Draft_o

In [46]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 22
        pdf_end = end_page + 22
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
   # Economic Manuscripts of 1857-58 (First Version of Capital) - Second Instalment
   {"title": "v29_K_Marx_Outlines_of_the_Critique_of_Political_Economy_Rough_Draft_of_1857_58_Second_Instalment", "start_page": 5, "end_page": 251},
   
   # A Contribution to the Critique of Political Economy Part One
   {"title": "v29_K_Marx_Value", "start_page": 252, "end_page": 253},
   {"title": "v29_K_Marx_Gold_Weighing_Machines", "start_page": 254, "end_page": 256},
   {"title": "v29_K_Marx_A_Contribution_to_the_Critique_of_Political_Economy_Part_One", "start_page": 257, "end_page": 420},
   
   # Preparatory Materials
   {"title": "v29_K_Marx_Index_to_the_7_Notebooks", "start_page": 421, "end_page": 429},
   {"title": "v29_K_Marx_The_Original_Text_of_the_Second_and_the_Beginning_of_the_Third_Chapter_of_A_Contribution_to_the_Critique_of_Political_Economy", "start_page": 430, "end_page": 517},
   {"title": "v29_K_Marx_References_to_My_Own_Notebooks", "start_page": 518, "end_page": 534},

]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 29_ M - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v29"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 29_ M - Karl Marx.pdf
folder_name: marx_chapters_v29
expected_file_numbers 7

[1/7] is processing: v29_K_Marx_Outlines_of_the_Critique_of_Political_Economy_Rough_Draft_of_1857_58_Second_Instalment
✓ generated：marx_chapters_v29/v29_K_Marx_Outlines_of_the_Critique_of_Political_Economy_Rough_Draft_of_1857_58_Second_Instalment.pdf (page_num 5-251 -> PDF_page 28-274)
[2/7] is processing: v29_K_Marx_Value
✓ generated：marx_chapters_v29/v29_K_Marx_Value.pdf (page_num 252-253 -> PDF_page 275-276)
[3/7] is processing: v29_K_Marx_Gold_Weighing_Machines
✓ generated：marx_chapters_v29/v29_K_Marx_Gold_Weighing_Machines.pdf (page_num 254-256 -> PDF_page 277-279)
[4/7] is processing: v29_K_Marx_A_Contribution_to_the_Critique_of_Political_Economy_Part_One
✓ generated：marx_chapters_v29/v29_K_Marx_A_Contribution_to_the_Critique_of_Political_Economy_Part_One.pdf (page_num 257-420 -> PDF_page 280-443)
[5/7] is processing: v29

In [48]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 36
        pdf_end = end_page + 36
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
   # Economic Manuscript of 1861-63 - A Contribution to the Critique of Political Economy Third Chapter
   {"title": "v30_K_Marx_The_Production_Process_of_Capital", "start_page": 9, "end_page": 454},

]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 30_ M - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v30"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 30_ M - Karl Marx.pdf
folder_name: marx_chapters_v30
expected_file_numbers 1

[1/1] is processing: v30_K_Marx_The_Production_Process_of_Capital
✓ generated：marx_chapters_v30/v30_K_Marx_The_Production_Process_of_Capital.pdf (page_num 9-454 -> PDF_page 46-491)

sucessful extraction！
✓ sucessful files: 1 
❌ failed files: 0
📁 folder_name: marx_chapters_v30

total number of files（ 1）:
  - v30_K_Marx_The_Production_Process_of_Capital.pdf


In [49]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 5
        pdf_end = end_page + 5
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
   # Economic Manuscript of 1861-63 (Continuation) - A Contribution to the Critique of Political Economy
   {"title": "v31_K_Marx_The_Production_Process_of_Capital_Continuation", "start_page": 6, "end_page": 582},

]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 31_ Ka - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v31"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 31_ Ka - Karl Marx.pdf
folder_name: marx_chapters_v31
expected_file_numbers 1

[1/1] is processing: v31_K_Marx_The_Production_Process_of_Capital_Continuation
✓ generated：marx_chapters_v31/v31_K_Marx_The_Production_Process_of_Capital_Continuation.pdf (page_num 6-582 -> PDF_page 12-588)

sucessful extraction！
✓ sucessful files: 1 
❌ failed files: 0
📁 folder_name: marx_chapters_v31

total number of files（ 1）:
  - v31_K_Marx_The_Production_Process_of_Capital_Continuation.pdf


In [50]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 8
        pdf_end = end_page + 8
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
    # Economic Manuscript of 1861-63 (Continuation) - A Contribution to the Critique of Political Economy
   {"title": "v32_K_Marx_The_Production_Process_of_Capital_Continuation", "start_page": 7, "end_page": 546},
   
]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 32_ Ka - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v32"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 32_ Ka - Karl Marx.pdf
folder_name: marx_chapters_v32
expected_file_numbers 1

[1/1] is processing: v32_K_Marx_The_Production_Process_of_Capital_Continuation
✓ generated：marx_chapters_v32/v32_K_Marx_The_Production_Process_of_Capital_Continuation.pdf (page_num 7-546 -> PDF_page 16-555)

sucessful extraction！
✓ sucessful files: 1 
❌ failed files: 0
📁 folder_name: marx_chapters_v32

total number of files（ 1）:
  - v32_K_Marx_The_Production_Process_of_Capital_Continuation.pdf


In [51]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 10
        pdf_end = end_page + 10
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
   # Economic Manuscript of 1861-63 (Continuation) - A Contribution to the Critique of Political Economy
   {"title": "v33_K_Marx_Mercantile_Capital_Money_dealing_Capital", "start_page": 9, "end_page": 68},
   {"title": "v33_K_Marx_Third_Chapter_Capital_and_Profit", "start_page": 69, "end_page": 145},
   {"title": "v33_K_Marx_Miscellanea", "start_page": 146, "end_page": 170},
   {"title": "v33_K_Marx_Episode_Reflux_Movements_of_Money_in_Capitalist_Reproduction", "start_page": 171, "end_page": 238},
   {"title": "v33_K_Marx_Mercantile_Capital_Continued", "start_page": 239, "end_page": 504},
  
]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 33_ Ka - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v33"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 33_ Ka - Karl Marx.pdf
folder_name: marx_chapters_v33
expected_file_numbers 5

[1/5] is processing: v33_K_Marx_Mercantile_Capital_Money_dealing_Capital
✓ generated：marx_chapters_v33/v33_K_Marx_Mercantile_Capital_Money_dealing_Capital.pdf (page_num 9-68 -> PDF_page 20-79)
[2/5] is processing: v33_K_Marx_Third_Chapter_Capital_and_Profit
✓ generated：marx_chapters_v33/v33_K_Marx_Third_Chapter_Capital_and_Profit.pdf (page_num 69-145 -> PDF_page 80-156)
[3/5] is processing: v33_K_Marx_Miscellanea
✓ generated：marx_chapters_v33/v33_K_Marx_Miscellanea.pdf (page_num 146-170 -> PDF_page 157-181)
[4/5] is processing: v33_K_Marx_Episode_Reflux_Movements_of_Money_in_Capitalist_Reproduction
✓ generated：marx_chapters_v33/v33_K_Marx_Episode_Reflux_Movements_of_Money_in_Capitalist_Reproduction.pdf (page_num 171-238 -> PDF_page 182-249)
[5/5] is processing: v33_K_Marx_Mercantile_Capital_Continued
✓ generated：marx_chapters_

In [52]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 8
        pdf_end = end_page + 8
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
   # Economic Manuscript of 1861-63 (Conclusion) - A Contribution to the Critique of Political Economy
   {"title": "v34_K_Marx_The_Production_Process_of_Capital_Conclusion", "start_page": 7, "end_page": 338},
   
   # Capital Book I - The Process of Production of Capital
   {"title": "v34_K_Marx_Volume_I_of_Capital_Separate_Pages_of_the_Manuscript", "start_page": 339, "end_page": 466},
   {"title": "v34_K_Marx_Unplaced_Footnotes", "start_page": 467, "end_page": 474},

]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 34_ Ka - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v34"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 34_ Ka - Karl Marx.pdf
folder_name: marx_chapters_v34
expected_file_numbers 3

[1/3] is processing: v34_K_Marx_The_Production_Process_of_Capital_Conclusion
✓ generated：marx_chapters_v34/v34_K_Marx_The_Production_Process_of_Capital_Conclusion.pdf (page_num 7-338 -> PDF_page 16-347)
[2/3] is processing: v34_K_Marx_Volume_I_of_Capital_Separate_Pages_of_the_Manuscript
✓ generated：marx_chapters_v34/v34_K_Marx_Volume_I_of_Capital_Separate_Pages_of_the_Manuscript.pdf (page_num 339-466 -> PDF_page 348-475)
[3/3] is processing: v34_K_Marx_Unplaced_Footnotes
✓ generated：marx_chapters_v34/v34_K_Marx_Unplaced_Footnotes.pdf (page_num 467-474 -> PDF_page 476-483)

sucessful extraction！
✓ sucessful files: 3 
❌ failed files: 0
📁 folder_name: marx_chapters_v34

total number of files（ 3）:
  - v34_K_Marx_The_Production_Process_of_Capital_Conclusion.pdf
  - v34_K_Marx_Unplaced_Footnotes.pdf
  - v34_K_Marx_Volume_I_of_Capita

In [54]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 11
        pdf_end = end_page + 11
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
    # Prefaces and Editions
    {"title": "v35_K_Marx_Preface_to_the_First_German_Edition", "start_page": 7, "end_page": 11},
    {"title": "v35_K_Marx_Afterword_to_the_Second_German_Edition", "start_page": 12, "end_page": 22},
    {"title": "v35_K_Marx_Preface_to_the_French_Edition", "start_page": 23, "end_page": 23},
    {"title": "v35_K_Marx_Afterword_to_the_French_Edition", "start_page": 24, "end_page": 26},
    # Book I - The Process of Production of Capital
    # Part I - Commodities and Money
    {"title": "v35_K_Marx_Chapter_1_Commodities", "start_page": 45, "end_page": 102},
    {"title": "v35_K_Marx_Chapter_2_Exchange", "start_page": 103, "end_page": 112},
    {"title": "v35_K_Marx_Chapter_3_Money_or_the_Circulation_of_Commodities", "start_page": 113, "end_page": 156},
    
    # Part II - The Transformation of Money into Capital
    {"title": "v35_K_Marx_Chapter_4_The_General_Formula_for_Capital", "start_page": 157, "end_page": 165},
    {"title": "v35_K_Marx_Chapter_5_Contradictions_in_the_General_Formula_of_Capital", "start_page": 166, "end_page": 176},
    {"title": "v35_K_Marx_Chapter_6_The_Buying_and_Selling_of_Labour_Power", "start_page": 177, "end_page": 186},
    
    # Part III - The Production of Absolute Surplus Value
    {"title": "v35_K_Marx_Chapter_7_The_Labour_Process_and_the_Process_of_Producing_Surplus_Value", "start_page": 187, "end_page": 208},
    {"title": "v35_K_Marx_Chapter_8_Constant_Capital_and_Variable_Capital", "start_page": 209, "end_page": 220},
    {"title": "v35_K_Marx_Chapter_9_The_Rate_of_Surplus_Value", "start_page": 221, "end_page": 238},
    {"title": "v35_K_Marx_Chapter_10_The_Working_Day", "start_page": 239, "end_page": 306},
    {"title": "v35_K_Marx_Chapter_11_Rate_and_Mass_of_Surplus_Value", "start_page": 307, "end_page": 316},
    
    # Part IV - Production of Relative Surplus Value
    {"title": "v35_K_Marx_Chapter_12_The_Concept_of_Relative_Surplus_Value", "start_page": 317, "end_page": 325},
    {"title": "v35_K_Marx_Chapter_13_Co_operation", "start_page": 326, "end_page": 340},
    {"title": "v35_K_Marx_Chapter_14_Division_of_Labour_and_Manufacture", "start_page": 341, "end_page": 373},
    {"title": "v35_K_Marx_Chapter_15_Machinery_and_Modern_Industry", "start_page": 374, "end_page": 508},
    
    # Part V - The Production of Absolute and of Relative Surplus Value
    {"title": "v35_K_Marx_Chapter_16_Absolute_and_Relative_Surplus_Value", "start_page": 509, "end_page": 518},
    {"title": "v35_K_Marx_Chapter_17_Changes_of_Magnitude_in_the_Price_of_Labour_Power_and_in_Surplus_Value", "start_page": 519, "end_page": 530},
    {"title": "v35_K_Marx_Chapter_18_Various_Formulae_for_the_Rate_of_Surplus_Value", "start_page": 531, "end_page": 534},
    
    # Part VI - Wages
    {"title": "v35_K_Marx_Chapter_19_The_Transformation_of_the_Value_and_Respectively_the_Price_of_Labour_Power_into_Wages", "start_page": 535, "end_page": 541},
    {"title": "v35_K_Marx_Chapter_20_Time_Wages", "start_page": 542, "end_page": 549},
    {"title": "v35_K_Marx_Chapter_21_Piece_Wages", "start_page": 550, "end_page": 557},
    {"title": "v35_K_Marx_Chapter_22_National_Differences_of_Wages", "start_page": 558, "end_page": 564},
    
    # Part VII - The Accumulation of Capital
    {"title": "v35_K_Marx_Chapter_23_Simple_Reproduction", "start_page": 565, "end_page": 577},
    {"title": "v35_K_Marx_Chapter_24_Conversion_of_Surplus_Value_into_Capital", "start_page": 578, "end_page": 606},
    {"title": "v35_K_Marx_Chapter_25_The_General_Law_of_Capitalist_Accumulation", "start_page": 607, "end_page": 703},
    
    # Part VIII - The So-Called Primitive Accumulation
    {"title": "v35_K_Marx_Chapter_26_The_Secret_of_Primitive_Accumulation", "start_page": 704, "end_page": 706},
    {"title": "v35_K_Marx_Chapter_27_Expropriation_of_the_Agricultural_Population_from_the_Land", "start_page": 707, "end_page": 722},
    {"title": "v35_K_Marx_Chapter_28_Bloody_Legislation_Against_the_Expropriated_from_the_End_of_the_15th_Century_Forcing_down_of_Wages_by_Acts_of_Parliament", "start_page": 723, "end_page": 730},
    {"title": "v35_K_Marx_Chapter_29_Genesis_of_the_Capitalist_Farmer", "start_page": 731, "end_page": 732},
    {"title": "v35_K_Marx_Chapter_30_Reaction_of_the_Agricultural_Revolution_on_Industry_Creation_of_the_Home_Market_for_Industrial_Capital", "start_page": 733, "end_page": 737},
    {"title": "v35_K_Marx_Chapter_31_Genesis_of_the_Industrial_Capitalist", "start_page": 738, "end_page": 747},
    {"title": "v35_K_Marx_Chapter_32_Historical_Tendency_of_Capitalist_Accumulation", "start_page": 748, "end_page": 750},
    {"title": "v35_K_Marx_Chapter_33_The_Modern_Theory_of_Colonisation", "start_page": 751, "end_page": 764},
    
]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 35_ K - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v35"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 35_ K - Karl Marx.pdf
folder_name: marx_chapters_v35
expected_file_numbers 37

[1/37] is processing: v35_K_Marx_Preface_to_the_First_German_Edition
✓ generated：marx_chapters_v35/v35_K_Marx_Preface_to_the_First_German_Edition.pdf (page_num 7-11 -> PDF_page 19-23)
[2/37] is processing: v35_K_Marx_Afterword_to_the_Second_German_Edition
✓ generated：marx_chapters_v35/v35_K_Marx_Afterword_to_the_Second_German_Edition.pdf (page_num 12-22 -> PDF_page 24-34)
[3/37] is processing: v35_K_Marx_Preface_to_the_French_Edition
✓ generated：marx_chapters_v35/v35_K_Marx_Preface_to_the_French_Edition.pdf (page_num 23-23 -> PDF_page 35-35)
[4/37] is processing: v35_K_Marx_Afterword_to_the_French_Edition
✓ generated：marx_chapters_v35/v35_K_Marx_Afterword_to_the_French_Edition.pdf (page_num 24-26 -> PDF_page 36-38)
[5/37] is processing: v35_K_Marx_Chapter_1_Commodities
✓ generated：marx_chapters_v35/v35_K_Marx_Chapter_1_Commodi

In [57]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 7
        pdf_end = end_page + 7
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
   # Book II - The Process of Circulation of Capital
   # Part I - The Metamorphoses of Capital and Their Circuits
   {"title": "v36_K_Marx_Chapter_1_The_Circuit_of_Money_Capital", "start_page": 31, "end_page": 69},
   {"title": "v36_K_Marx_Chapter_2_The_Circuit_of_Productive_Capital", "start_page": 70, "end_page": 91},
   {"title": "v36_K_Marx_Chapter_3_The_Circuit_of_Commodity_Capital", "start_page": 92, "end_page": 104},
   {"title": "v36_K_Marx_Chapter_4_The_Three_Formulas_of_the_Circuit", "start_page": 105, "end_page": 124},
   {"title": "v36_K_Marx_Chapter_5_The_Time_of_Circulation", "start_page": 125, "end_page": 132},
   {"title": "v36_K_Marx_Chapter_6_The_Costs_of_Circulation", "start_page": 133, "end_page": 155},
   
   # Part II - The Turnover of Capital
   {"title": "v36_K_Marx_Chapter_7_The_Turnover_Time_and_the_Number_of_Turnovers", "start_page": 156, "end_page": 158},
   {"title": "v36_K_Marx_Chapter_8_Fixed_Capital_and_Circulating_Capital", "start_page": 159, "end_page": 184},
   {"title": "v36_K_Marx_Chapter_9_The_Aggregate_Turnover_of_Advanced_Capital_Cycles_of_Turnover", "start_page": 185, "end_page": 189},
   {"title": "v36_K_Marx_Chapter_10_Theories_of_Fixed_and_Circulating_Capital_The_Physiocrats_and_Adam_Smith", "start_page": 190, "end_page": 216},
   {"title": "v36_K_Marx_Chapter_11_Theories_of_Fixed_and_Circulating_Capital_Ricardo", "start_page": 217, "end_page": 229},
   {"title": "v36_K_Marx_Chapter_12_The_Working_Period", "start_page": 230, "end_page": 238},
   {"title": "v36_K_Marx_Chapter_13_The_Time_of_Production", "start_page": 239, "end_page": 248},
   {"title": "v36_K_Marx_Chapter_14_The_Time_of_Circulation", "start_page": 249, "end_page": 256},
   {"title": "v36_K_Marx_Chapter_15_Effect_of_the_Time_of_Turnover_on_the_Magnitude_of_Advanced_Capital", "start_page": 257, "end_page": 292},
   {"title": "v36_K_Marx_Chapter_16_The_Turnover_of_Variable_Capital", "start_page": 293, "end_page": 317},
   {"title": "v36_K_Marx_Chapter_17_The_Circulation_of_Surplus_Value", "start_page": 318, "end_page": 348},
   
   # Part III - The Reproduction and Circulation of the Aggregate Social Capital
   {"title": "v36_K_Marx_Chapter_18_Introduction", "start_page": 349, "end_page": 356},
   {"title": "v36_K_Marx_Chapter_19_Former_Presentations_of_the_Subject", "start_page": 357, "end_page": 389},
   {"title": "v36_K_Marx_Chapter_20_Simple_Reproduction", "start_page": 390, "end_page": 487},
   {"title": "v36_K_Marx_Chapter_21_Accumulation_and_Reproduction_on_an_Extended_Scale", "start_page": 488, "end_page": 526},

]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 36_ K - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v36"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 36_ K - Karl Marx.pdf
folder_name: marx_chapters_v36
expected_file_numbers 21

[1/21] is processing: v36_K_Marx_Chapter_1_The_Circuit_of_Money_Capital
✓ generated：marx_chapters_v36/v36_K_Marx_Chapter_1_The_Circuit_of_Money_Capital.pdf (page_num 31-69 -> PDF_page 39-77)
[2/21] is processing: v36_K_Marx_Chapter_2_The_Circuit_of_Productive_Capital
✓ generated：marx_chapters_v36/v36_K_Marx_Chapter_2_The_Circuit_of_Productive_Capital.pdf (page_num 70-91 -> PDF_page 78-99)
[3/21] is processing: v36_K_Marx_Chapter_3_The_Circuit_of_Commodity_Capital
✓ generated：marx_chapters_v36/v36_K_Marx_Chapter_3_The_Circuit_of_Commodity_Capital.pdf (page_num 92-104 -> PDF_page 100-112)
[4/21] is processing: v36_K_Marx_Chapter_4_The_Three_Formulas_of_the_Circuit
✓ generated：marx_chapters_v36/v36_K_Marx_Chapter_4_The_Three_Formulas_of_the_Circuit.pdf (page_num 105-124 -> PDF_page 113-132)
[5/21] is processing: v36_K_Marx_Chapte

In [59]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 9
        pdf_end = end_page + 9
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [

   # Book III - The Process of Capitalist Production as a Whole
   # Part I - The Conversion of Surplus Value into Profit and of the Rate of Surplus Value into the Rate of Profit
   {"title": "v37_K_Marx_Chapter_1_Cost_Price_and_Profit", "start_page": 27, "end_page": 44},
   {"title": "v37_K_Marx_Chapter_2_The_Rate_of_Profit", "start_page": 45, "end_page": 51},
   {"title": "v37_K_Marx_Chapter_3_The_Relation_of_the_Rate_of_Profit_to_the_Rate_of_Surplus_Value", "start_page": 52, "end_page": 72},
   {"title": "v37_K_Marx_Chapter_4_The_Effect_of_the_Turnover_on_the_Rate_of_Profit", "start_page": 73, "end_page": 79},
   {"title": "v37_K_Marx_Chapter_5_Economy_in_the_Employment_of_Constant_Capital", "start_page": 80, "end_page": 105},
   {"title": "v37_K_Marx_Chapter_6_The_Effect_of_Price_Fluctuations", "start_page": 106, "end_page": 136},
   {"title": "v37_K_Marx_Chapter_7_Supplementary_Remarks", "start_page": 137, "end_page": 140},
   
   # Part II - Conversion of Profit into Average Profit
   {"title": "v37_K_Marx_Chapter_8_Different_Compositions_of_Capitals_in_Different_Branches_of_Production_and_Resulting_Differences_in_Rates_of_Profit", "start_page": 141, "end_page": 152},
   {"title": "v37_K_Marx_Chapter_9_Formation_of_a_General_Rate_of_Profit_Average_Rate_of_Profit_and_Transformation_of_the_Values_of_Commodities_into_Prices_of_Production", "start_page": 153, "end_page": 170},
   {"title": "v37_K_Marx_Chapter_10_Equalisation_of_the_General_Rate_of_Profit_Through_Competition_Market_Prices_and_Market_Values_Surplus_Profit", "start_page": 171, "end_page": 197},
   {"title": "v37_K_Marx_Chapter_11_Effects_of_General_Wage_Fluctuations_on_Prices_of_Production", "start_page": 198, "end_page": 201},
   {"title": "v37_K_Marx_Chapter_12_Supplementary_Remarks", "start_page": 202, "end_page": 208},
   
   # Part III - The Law of the Tendency of the Rate of Profit to Fall
   {"title": "v37_K_Marx_Chapter_13_The_Law_as_Such", "start_page": 209, "end_page": 229},
   {"title": "v37_K_Marx_Chapter_14_Counteracting_Influences", "start_page": 230, "end_page": 238},
   {"title": "v37_K_Marx_Chapter_15_Exposition_of_the_Internal_Contradictions_of_the_Law", "start_page": 239, "end_page": 265},
   
   # Part IV - Conversion of Commodity Capital and Money Capital into Commercial Capital and Money-Dealing Capital (Merchant's Capital)
   {"title": "v37_K_Marx_Chapter_16_Commercial_Capital", "start_page": 266, "end_page": 278},
   {"title": "v37_K_Marx_Chapter_17_Commercial_Profit", "start_page": 279, "end_page": 300},
   {"title": "v37_K_Marx_Chapter_18_The_Turnover_of_Merchants_Capital_Prices", "start_page": 301, "end_page": 312},
   {"title": "v37_K_Marx_Chapter_19_Money_Dealing_Capital", "start_page": 313, "end_page": 320},
   {"title": "v37_K_Marx_Chapter_20_Historical_Facts_About_Merchants_Capital", "start_page": 321, "end_page": 335},
   
   # Part V - Division of Profit into Interest and Profit of Enterprise. Interest-Bearing Capital
   {"title": "v37_K_Marx_Chapter_21_Interest_Bearing_Capital", "start_page": 336, "end_page": 355},
   {"title": "v37_K_Marx_Chapter_22_Division_of_Profit_Rate_of_Interest_Natural_Rate_of_Interest", "start_page": 356, "end_page": 366},
   {"title": "v37_K_Marx_Chapter_23_Interest_and_Profit_of_Enterprise", "start_page": 367, "end_page": 387},
   {"title": "v37_K_Marx_Chapter_24_Externalisation_of_the_Relations_of_Capital_in_the_Form_of_Interest_Bearing_Capital", "start_page": 388, "end_page": 396},
   {"title": "v37_K_Marx_Chapter_25_Credit_and_Fictitious_Capital", "start_page": 397, "end_page": 411},
   {"title": "v37_K_Marx_Chapter_26_Accumulation_of_Money_Capital_Its_Influence_on_the_Interest_Rate", "start_page": 412, "end_page": 431},
   {"title": "v37_K_Marx_Chapter_27_The_Role_of_Credit_in_Capitalist_Production", "start_page": 432, "end_page": 438},
   {"title": "v37_K_Marx_Chapter_28_Medium_of_Circulation_and_Capital_Views_of_Tooke_and_Fullarton", "start_page": 439, "end_page": 460},
   {"title": "v37_K_Marx_Chapter_29_Component_Parts_of_Bank_Capital", "start_page": 461, "end_page": 474},
   {"title": "v37_K_Marx_Chapter_30_Money_Capital_and_Real_Capital_I", "start_page": 475, "end_page": 492},
   {"title": "v37_K_Marx_Chapter_31_Money_Capital_and_Real_Capital_II_Continued", "start_page": 493, "end_page": 501},
   {"title": "v37_K_Marx_Chapter_32_Money_Capital_and_Real_Capital_III_Concluded", "start_page": 502, "end_page": 516},
   {"title": "v37_K_Marx_Chapter_33_The_Medium_of_Circulation_in_the_Credit_System", "start_page": 517, "end_page": 541},
   {"title": "v37_K_Marx_Chapter_34_The_Currency_Principle_and_the_English_Bank_Legislation_of_1844", "start_page": 542, "end_page": 558},
   {"title": "v37_K_Marx_Chapter_35_Precious_Metal_and_Rate_of_Exchange", "start_page": 559, "end_page": 587},
   {"title": "v37_K_Marx_Chapter_36_Precapitalist_Relationships", "start_page": 588, "end_page": 607},
   
   # Part VI - Transformation of Surplus Profit into Ground Rent
   {"title": "v37_K_Marx_Chapter_37_Introduction", "start_page": 608, "end_page": 632},
   {"title": "v37_K_Marx_Chapter_38_Differential_Rent_General_Remarks", "start_page": 633, "end_page": 641},
   {"title": "v37_K_Marx_Chapter_39_First_Form_of_Differential_Rent_Differential_Rent_I", "start_page": 642, "end_page": 665},
   {"title": "v37_K_Marx_Chapter_40_Second_Form_of_Differential_Rent_Differential_Rent_II", "start_page": 666, "end_page": 676},
   {"title": "v37_K_Marx_Chapter_41_Differential_Rent_II_First_Case_Constant_Price_of_Production", "start_page": 677, "end_page": 683},
   {"title": "v37_K_Marx_Chapter_42_Differential_Rent_II_Second_Case_Falling_Price_of_Production", "start_page": 684, "end_page": 699},
   {"title": "v37_K_Marx_Chapter_43_Differential_Rent_II_Third_Case_Rising_Price_of_Production", "start_page": 700, "end_page": 725},
   {"title": "v37_K_Marx_Chapter_44_Differential_Rent_Also_on_the_Worst_Cultivated_Soil", "start_page": 726, "end_page": 733},
   {"title": "v37_K_Marx_Chapter_45_Absolute_Ground_Rent", "start_page": 734, "end_page": 758},
   {"title": "v37_K_Marx_Chapter_46_Building_Site_Rent_Rent_in_Mining_Price_of_Land", "start_page": 759, "end_page": 767},
   {"title": "v37_K_Marx_Chapter_47_Genesis_of_Capitalist_Ground_Rent", "start_page": 768, "end_page": 800},
   
   # Part VII - Revenues and Their Sources
   {"title": "v37_K_Marx_Chapter_48_The_Trinity_Formula", "start_page": 801, "end_page": 817},
   {"title": "v37_K_Marx_Chapter_49_Concerning_the_Analysis_of_the_Process_of_Production", "start_page": 818, "end_page": 838},
   {"title": "v37_K_Marx_Chapter_50_Illusions_Created_by_Competition", "start_page": 839, "end_page": 852},
   {"title": "v37_K_Marx_Chapter_51_Distribution_Relations_and_Production_Relations", "start_page": 853, "end_page": 869},
   {"title": "v37_K_Marx_Chapter_52_Classes", "start_page": 870, "end_page": 872},
   {"title": "v37_F_Engels_Supplement_to_Capital_Volume_Three", "start_page": 873, "end_page": 900},

]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 37_ K - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v37"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 37_ K - Karl Marx.pdf
folder_name: marx_chapters_v37
expected_file_numbers 53

[1/53] is processing: v37_K_Marx_Chapter_1_Cost_Price_and_Profit
✓ generated：marx_chapters_v37/v37_K_Marx_Chapter_1_Cost_Price_and_Profit.pdf (page_num 27-44 -> PDF_page 37-54)
[2/53] is processing: v37_K_Marx_Chapter_2_The_Rate_of_Profit
✓ generated：marx_chapters_v37/v37_K_Marx_Chapter_2_The_Rate_of_Profit.pdf (page_num 45-51 -> PDF_page 55-61)
[3/53] is processing: v37_K_Marx_Chapter_3_The_Relation_of_the_Rate_of_Profit_to_the_Rate_of_Surplus_Value
✓ generated：marx_chapters_v37/v37_K_Marx_Chapter_3_The_Relation_of_the_Rate_of_Profit_to_the_Rate_of_Surplus_Value.pdf (page_num 52-72 -> PDF_page 62-82)
[4/53] is processing: v37_K_Marx_Chapter_4_The_Effect_of_the_Turnover_on_the_Rate_of_Profit
✓ generated：marx_chapters_v37/v37_K_Marx_Chapter_4_The_Effect_of_the_Turnover_on_the_Rate_of_Profit.pdf (page_num 73-79 -> PDF_page 83-89

In [36]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    return filename if filename else "Unknown"

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 28
        pdf_end = end_page + 28
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
  {
    "title": "v46_Marx_Letter_2_to_Charles_Walstone_Waldstein",
    "start_page": 5,
    "end_page": 5
  },
  {
    "title": "v46_Marx_Letter_4_to_Bernhard_Kraus",
    "start_page": 6,
    "end_page": 6
  },
  {
    "title": "v46_Marx_Letter_9_to_Paul_Lafargue",
    "start_page": 15,
    "end_page": 15
  },
  {
    "title": "v46_Marx_Letter_10_to_Ferdinand_Domela_Nieuwenhuis",
    "start_page": 16,
    "end_page": 16
  },
  {
    "title": "v46_Marx_Letter_12_to_Philip_Stephen_King",
    "start_page": 19,
    "end_page": 19
  },
  {
    "title": "v46_Marx_Letter_15_to_John_Swinton",
    "start_page": 22,
    "end_page": 22
  },
  {
    "title": "v46_Marx_Letter_17_to_Friedrich_Adolph_Sorge",
    "start_page": 24,
    "end_page": 24
  },
  {
    "title": "v46_Marx_Letter_21_to_Nikolai_Danielson",
    "start_page": 30,
    "end_page": 30
  },
  {
    "title": "v46_Marx_Letter_24_to_Ferdinand_Fleckles",
    "start_page": 36,
    "end_page": 36
  },
  {
    "title": "v46_Marx_Letter_28_to_John_Swinton",
    "start_page": 40,
    "end_page": 40
  },
  {
    "title": "v46_Marx_Letter_29_to_Friedrich_Adolph_Sorge",
    "start_page": 42,
    "end_page": 42
  },
  {
    "title": "v46_Marx_Letter_30_to_Friedrich_Adolph_Sorge",
    "start_page": 46,
    "end_page": 46
  },
  {
    "title": "v46_Marx_Letter_31_to_Ferdinand_Fleckles",
    "start_page": 47,
    "end_page": 47
  },
  {
    "title": "v46_Marx_Letter_32_to_Achille_Loria",
    "start_page": 48,
    "end_page": 48
  },
  {
    "title": "v46_Marx_Letter_33_to_Henry_Mayers_Hyndman",
    "start_page": 49,
    "end_page": 49
  },
  {
    "title": "v46_Marx_Letter_35_to_Carl_Hirsch",
    "start_page": 51,
    "end_page": 51
  },
  {
    "title": "v46_Marx_Letter_36_to_Charles_Longuet",
    "start_page": 52,
    "end_page": 52
  },
  {
    "title": "v46_Marx_Letter_37_to_an_Unknown_Correspondent",
    "start_page": 55,
    "end_page": 55
  },
  {
    "title": "v46_Marx_Letter_40_to_Carl_Pearson",
    "start_page": 60,
    "end_page": 60
  },
  {
    "title": "v46_Marx_Letter_41_to_Nikolai_Danielson",
    "start_page": 60,
    "end_page": 60
  },
  {
    "title": "v46_Marx_Letter_42_to_Ferdinand_Domela_Nieuwenhuis",
    "start_page": 65,
    "end_page": 65
  },
  {
    "title": "v46_Marx_Letter_44_to_Vera_Zasulich",
    "start_page": 71,
    "end_page": 71
  },
  {
    "title": "v46_Marx_Letter_51_to_Jenny_Longuet",
    "start_page": 81,
    "end_page": 81
  },
  {
    "title": "v46_Marx_Letter_54_to_Jenny_Longuet",
    "start_page": 89,
    "end_page": 89
  },
  {
    "title": "v46_Marx_Letter_56_to_John_Swinton",
    "start_page": 93,
    "end_page": 93
  },
  {
    "title": "v46_Marx_Letter_57_to_Friedrich_Adolph_Sorge",
    "start_page": 94,
    "end_page": 94
  },
  {
    "title": "v46_Marx_Letter_59_to_Jenny_Longuet",
    "start_page": 95,
    "end_page": 95
  },
  {
    "title": "v46_Marx_Letter_61_to_Friedrich_Adolph_Sorge",
    "start_page": 98,
    "end_page": 98
  },
  {
    "title": "v46_Marx_Letter_62_to_Henry_Mayers_Hyndman",
    "start_page": 102,
    "end_page": 102
  },
  {
    "title": "v46_Marx_Letter_65_to_Jenny_Longuet",
    "start_page": 106,
    "end_page": 106
  },
  {
    "title": "v46_Marx_Letter_66_to_Engels",
    "start_page": 107,
    "end_page": 107
  },
  {
    "title": "v46_Marx_Letter_68_to_Engels",
    "start_page": 110,
    "end_page": 110
  },
  {
    "title": "v46_Marx_Letter_70_to_Carl_Hirsch",
    "start_page": 115,
    "end_page": 115
  },
  {
    "title": "v46_Marx_Letter_71_to_Engels",
    "start_page": 115,
    "end_page": 115
  },
  {
    "title": "v46_Marx_Letter_72_to_Laura_Lafargue",
    "start_page": 118,
    "end_page": 118
  },
  {
    "title": "v46_Marx_Letter_76_to_Engels",
    "start_page": 124,
    "end_page": 124
  },
  {
    "title": "v46_Marx_Letter_77_to_Pyotr_Lavrov",
    "start_page": 124,
    "end_page": 124
  },
  {
    "title": "v46_Marx_Letter_81_to_Engels",
    "start_page": 132,
    "end_page": 132
  },
  {
    "title": "v46_Marx_Letter_82_to_Jenny_Longuet",
    "start_page": 134,
    "end_page": 134
  },
  {
    "title": "v46_Marx_Letter_83_to_Engels",
    "start_page": 136,
    "end_page": 136
  },
  {
    "title": "v46_Marx_Letter_84_to_Jenny_Longuet",
    "start_page": 136,
    "end_page": 136
  },
  {
    "title": "v46_Marx_Letter_87_to_Karl_Kautsky",
    "start_page": 142,
    "end_page": 142
  },
  {
    "title": "v46_Marx_Letter_88_to_Minna_Kautsky",
    "start_page": 143,
    "end_page": 143
  },
  {
    "title": "v46_Marx_Letter_92_to_Jenny_Longuet",
    "start_page": 156,
    "end_page": 156
  },
  {
    "title": "v46_Marx_Letter_93_to_Johann_Philipp_Becker",
    "start_page": 159,
    "end_page": 159
  },
  {
    "title": "v46_Marx_Letter_94_to_Nikolai_Danielson",
    "start_page": 160,
    "end_page": 160
  },
  {
    "title": "v46_Marx_Letter_95_to_Friedrich_Adolph_Sorge",
    "start_page": 161,
    "end_page": 161
  },
  {
    "title": "v46_Marx_Letter_96_to_Jenny_Longuet",
    "start_page": 164,
    "end_page": 164
  },
  {
    "title": "v46_Marx_Letter_99_to_Roland_Daniels",
    "start_page": 167,
    "end_page": 167
  },
  {
    "title": "v46_Marx_Letter_101_to_Laura_Lafargue",
    "start_page": 169,
    "end_page": 169
  },
  {
    "title": "v46_Marx_Letter_102_to_Engels",
    "start_page": 171,
    "end_page": 171
  },
  {
    "title": "v46_Marx_Letter_105_to_Engels",
    "start_page": 176,
    "end_page": 176
  },
  {
    "title": "v46_Marx_Letter_107_to_Amalie_Daniels",
    "start_page": 180,
    "end_page": 180
  },
  {
    "title": "v46_Marx_Letter_109_to_Engels",
    "start_page": 183,
    "end_page": 183
  },
  {
    "title": "v46_Marx_Letter_110_to_Pyotr_Lavrov",
    "start_page": 184,
    "end_page": 184
  },
  {
    "title": "v46_Marx_Letter_114_to_Engels",
    "start_page": 198,
    "end_page": 198
  },
  {
    "title": "v46_Marx_Letter_116_to_Engels",
    "start_page": 202,
    "end_page": 202
  },
  {
    "title": "v46_Marx_Letter_119_to_Jenny_Longuet",
    "start_page": 212,
    "end_page": 212
  },
  {
    "title": "v46_Marx_Letter_120_to_Engels",
    "start_page": 213,
    "end_page": 213
  },
  {
    "title": "v46_Marx_Letter_121_to_Engels",
    "start_page": 216,
    "end_page": 216
  },
  {
    "title": "v46_Marx_Letter_122_to_Jenny_Longuet",
    "start_page": 217,
    "end_page": 217
  },
  {
    "title": "v46_Marx_Letter_123_to_Paul_Lafargue",
    "start_page": 220,
    "end_page": 220
  },
  {
    "title": "v46_Marx_Letter_124_to_Engels",
    "start_page": 222,
    "end_page": 222
  },
  {
    "title": "v46_Marx_Letter_125_to_Jenny_Longuet",
    "start_page": 223,
    "end_page": 223
  },
  {
    "title": "v46_Marx_Letter_126_to_Engels",
    "start_page": 225,
    "end_page": 225
  },
  {
    "title": "v46_Marx_Letter_128_to_Engels",
    "start_page": 228,
    "end_page": 228
  },
  {
    "title": "v46_Marx_Letter_129_to_Jenny_Longuet",
    "start_page": 230,
    "end_page": 230
  },
  {
    "title": "v46_Marx_Letter_130_to_Engels",
    "start_page": 233,
    "end_page": 233
  },
  {
    "title": "v46_Marx_Letter_134_to_Laura_Lafargue",
    "start_page": 238,
    "end_page": 238
  },
  {
    "title": "v46_Marx_Letter_136_to_Engels",
    "start_page": 245,
    "end_page": 245
  },
  {
    "title": "v46_Marx_Letter_138_to_Engels",
    "start_page": 248,
    "end_page": 248
  },
  {
    "title": "v46_Marx_Letter_139_to_Jenny_Longuet",
    "start_page": 250,
    "end_page": 250
  },
  {
    "title": "v46_Marx_Letter_141_to_Laura_Lafargue",
    "start_page": 252,
    "end_page": 252
  },
  {
    "title": "v46_Marx_Letter_142_to_Engels",
    "start_page": 253,
    "end_page": 253
  },
  {
    "title": "v46_Marx_Letter_143_to_Jenny_Longuet",
    "start_page": 255,
    "end_page": 255
  },
  {
    "title": "v46_Marx_Letter_146_to_Engels",
    "start_page": 261,
    "end_page": 261
  },
  {
    "title": "v46_Marx_Letter_147_to_Eleanor_Marx",
    "start_page": 264,
    "end_page": 264
  },
  {
    "title": "v46_Marx_Letter_148_to_Jenny_Longuet",
    "start_page": 265,
    "end_page": 265
  },
  {
    "title": "v46_Marx_Letter_149_to_Eleanor_Marx",
    "start_page": 266,
    "end_page": 266
  },
  {
    "title": "v46_Marx_Letter_150_to_Engels",
    "start_page": 270,
    "end_page": 270
  },
  {
    "title": "v46_Marx_Letter_151_to_Jenny_Longuet",
    "start_page": 271,
    "end_page": 271
  },
  {
    "title": "v46_Marx_Letter_152_to_Engels",
    "start_page": 272,
    "end_page": 272
  },
  {
    "title": "v46_Marx_Letter_153_to_Engels",
    "start_page": 275,
    "end_page": 275
  },
  {
    "title": "v46_Marx_Letter_154_to_Engels",
    "start_page": 276,
    "end_page": 276
  },
  {
    "title": "v46_Marx_Letter_155_to_Laura_Lafargue",
    "start_page": 277,
    "end_page": 277
  },
  {
    "title": "v46_Marx_Letter_158_to_Engels",
    "start_page": 283,
    "end_page": 283
  },
  {
    "title": "v46_Marx_Letter_159_to_Engels",
    "start_page": 284,
    "end_page": 284
  },
  {
    "title": "v46_Marx_Letter_161_to_Engels",
    "start_page": 290,
    "end_page": 290
  },
  {
    "title": "v46_Marx_Letter_165_to_Engels",
    "start_page": 295,
    "end_page": 295
  },
  {
    "title": "v46_Marx_Letter_168_to_Engels",
    "start_page": 303,
    "end_page": 303
  },
  {
    "title": "v46_Marx_Letter_172_to_Engels",
    "start_page": 308,
    "end_page": 308
  },
  {
    "title": "v46_Marx_Letter_173_to_Engels",
    "start_page": 310,
    "end_page": 310
  },
  {
    "title": "v46_Marx_Letter_177_to_Engels",
    "start_page": 317,
    "end_page": 317
  },
  {
    "title": "v46_Marx_Letter_181_to_Engels",
    "start_page": 326,
    "end_page": 326
  },
  {
    "title": "v46_Marx_Letter_186_to_Engels",
    "start_page": 337,
    "end_page": 337
  },
  {
    "title": "v46_Marx_Letter_187_to_Engels",
    "start_page": 338,
    "end_page": 338
  },
  {
    "title": "v46_Marx_Letter_188_to_Laura_Lafargue",
    "start_page": 339,
    "end_page": 339
  },
  {
    "title": "v46_Marx_Letter_199_to_Engels",
    "start_page": 364,
    "end_page": 364
  },
  {
    "title": "v46_Marx_Letter_200_to_Philipp_Stephen_King",
    "start_page": 367,
    "end_page": 367
  },
  {
    "title": "v46_Marx_Letter_202_to_Eleanor_Marx",
    "start_page": 371,
    "end_page": 371
  },
  {
    "title": "v46_Marx_Letter_204_to_Engels",
    "start_page": 374,
    "end_page": 374
  },
  {
    "title": "v46_Marx_Letter_206_to_Engels",
    "start_page": 377,
    "end_page": 377
  },
  {
    "title": "v46_Marx_Letter_208_to_Engels",
    "start_page": 380,
    "end_page": 380
  },
  {
    "title": "v46_Marx_Letter_211_to_Engels",
    "start_page": 385,
    "end_page": 385
  },
  {
    "title": "v46_Marx_Letter_214_to_Engels",
    "start_page": 392,
    "end_page": 392
  },
  {
    "title": "v46_Marx_Letter_215_to_Engels",
    "start_page": 393,
    "end_page": 393
  },
  {
    "title": "v46_Marx_Letter_219_to_Laura_Lafargue",
    "start_page": 398,
    "end_page": 398
  },
  {
    "title": "v46_Marx_Letter_225_to_Engels",
    "start_page": 409,
    "end_page": 409
  },
  {
    "title": "v46_Marx_Letter_229_to_Eleanor_Marx",
    "start_page": 417,
    "end_page": 417
  },
  {
    "title": "v46_Marx_Letter_230_to_James_M_Williamson",
    "start_page": 419,
    "end_page": 419
  },
  {
    "title": "v46_Marx_Letter_231_to_Eleanor_Marx",
    "start_page": 420,
    "end_page": 420
  },
  {
    "title": "v46_Marx_Letter_233_to_Eleanor_Marx",
    "start_page": 422,
    "end_page": 422
  },
  {
    "title": "v46_Marx_Letter_234_to_Engels",
    "start_page": 424,
    "end_page": 424
  },
  {
    "title": "v46_Marx_Letter_235_to_Eleanor_Marx",
    "start_page": 426,
    "end_page": 426
  },
  {
    "title": "v46_Marx_Letter_236_to_Mrs_James_M_Williamson",
    "start_page": 429,
    "end_page": 429
  },
  {
    "title": "v46_Marx_Letter_237_to_James_M_Williamson",
    "start_page": 429,
    "end_page": 429
  }
]
def main():
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 46_ Ka - Karl Marx.pdf")
   
    output_dir = "marx_chapters_v46"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"] + 1
            
            clean_title = sanitize_filename(title)
            # Add index number to filename to ensure uniqueness
            output_filename = f"{i:03d}_{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 46_ Ka - Karl Marx.pdf
folder_name: marx_chapters_v46
expected_file_numbers 116

[1/116] is processing: v46_Marx_Letter_2_to_Charles_Walstone_Waldstein
✓ generated：marx_chapters_v46/001_v46_Marx_Letter_2_to_Charles_Walstone_Waldstein.pdf (page_num 5-6 -> PDF_page 34-35)
[2/116] is processing: v46_Marx_Letter_4_to_Bernhard_Kraus
✓ generated：marx_chapters_v46/002_v46_Marx_Letter_4_to_Bernhard_Kraus.pdf (page_num 6-7 -> PDF_page 35-36)
[3/116] is processing: v46_Marx_Letter_9_to_Paul_Lafargue
✓ generated：marx_chapters_v46/003_v46_Marx_Letter_9_to_Paul_Lafargue.pdf (page_num 15-16 -> PDF_page 44-45)
[4/116] is processing: v46_Marx_Letter_10_to_Ferdinand_Domela_Nieuwenhuis
✓ generated：marx_chapters_v46/004_v46_Marx_Letter_10_to_Ferdinand_Domela_Nieuwenhuis.pdf (page_num 16-17 -> PDF_page 45-46)
[5/116] is processing: v46_Marx_Letter_12_to_Philip_Stephen_King
✓ generated：marx_chapters_v46/005_v46_Marx_Letter_1

In [20]:
pip install pypdf


Collecting pypdf
  Downloading pypdf-5.6.0-py3-none-any.whl.metadata (7.2 kB)
Downloading pypdf-5.6.0-py3-none-any.whl (304 kB)
Installing collected packages: pypdf
Successfully installed pypdf-5.6.0
Note: you may need to restart the kernel to use updated packages.


In [None]:
import pdfplumber

def extract_marx_letters(pdf_path):
    marx_letters = []
    with pdfplumber.open(pdf_path) as pdf:
        current_letter = None
        for page_num, page in enumerate(pdf.pages, start=1):
            text = page.extract_text()
            # 识别以 "MARX TO " 或 "Marx to " 开头的马克思撰写信件标题
            if text.startswith("MARX TO ") or text.startswith("Marx to "):
                if current_letter:
                    # 上一封信结束页为当前页 - 1
                    current_letter["end_page"] = page_num - 1  
                    marx_letters.append(current_letter)
                # 初始化新信件信息，提取标题（取第一行）、起始页
                title_lines = text.splitlines()
                current_letter = {
                    "title": title_lines[0].strip(),  
                    "start_page": page_num,
                    "end_page": None  
                }
            elif current_letter:  
                # 信件未结束，继续关联当前页（处理跨页）
                continue  
        # 处理最后一封信的结束页
        if current_letter:
            current_letter["end_page"] = len(pdf.pages)  
            marx_letters.append(current_letter)
    return marx_letters

if __name__ == "__main__":
    # 替换为实际的PDF文件路径，这里是你提供的路径
    pdf_path = "~/Downloads/Marx & Engels Collected Works Volume 39_ L - Karl Marx.pdf"  
    result = extract_marx_letters(pdf_path)
    for index, letter in enumerate(result, start=1):
        print(f"第 {index} 封马克思信件：")
        print(f"标题：{letter['title']}")
        print(f"页面范围：{letter['start_page']}-{letter['end_page']}\n")

In [70]:
from PyPDF2 import PdfReader, PdfWriter
import os
import re

def sanitize_filename(filename):
    """
    Process the special characters in the titles
    """

    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
    filename = filename.replace(' ', '_')
    filename = filename.replace('.', '')
    filename = filename.replace(',', '')
    filename = filename.replace('(', '')
    filename = filename.replace(')', '')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    
    if not filename or filename.strip() == '':
        return "Unknown"
        
    return filename

def extract_chapter(input_pdf_reader, output_path, start_page, end_page, title):
    """
    Adjusting the page numbers
    """
    try:
        writer = PdfWriter()
        pdf_start = start_page + 30
        pdf_end = end_page + 30
        
        for page_num in range(pdf_start, pdf_end + 1):
            page = input_pdf_reader.pages[page_num]
            writer.add_page(page)
        
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"✓ generated：{output_path} (page_num {start_page}-{end_page} -> PDF_page {pdf_start+1}-{pdf_end+1})")
        return True
        
    except Exception as e:
        print(f"❌ fail to extract：{title} - {str(e)}")
        return False

table_of_contents = [
    
]

def main():
    # Option 1: Move the PDF to your Desktop (recommended)
    input_pdf_path = os.path.expanduser("~/Downloads/Marx & Engels Collected Works Volume 9_ Ka - Karl Marx.pdf")
    
   
    output_dir = "marx_chapters_v9"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print(f"start processing file: {input_pdf_path}")
    print(f"folder_name: {output_dir}")
    print(f"expected_file_numbers {len(table_of_contents)}\n")
    
    try:
        reader = PdfReader(input_pdf_path)
        
        success_count = 0
        failed_count = 0
        
        for i, item in enumerate(table_of_contents, 1):
            title = item["title"]
            start_page = item["start_page"]
            end_page = item["end_page"]
            
            clean_title = sanitize_filename(title)
            output_filename = f"{clean_title}.pdf"
            output_path = os.path.join(output_dir, output_filename)
            
            print(f"[{i}/{len(table_of_contents)}] is processing: {title}")
            
            if extract_chapter(reader, output_path, start_page, end_page, title):
                success_count += 1
            else:
                failed_count += 1
        
        print(f"\nsucessful extraction！")
        print(f"✓ sucessful files: {success_count} ")
        print(f"❌ failed files: {failed_count}")
        print(f"📁 folder_name: {output_dir}")
        
        generated_files = [f for f in os.listdir(output_dir) if f.endswith('.pdf')]
        print(f"\ntotal number of files（ {len(generated_files)}）:")
        for filename in sorted(generated_files):
            print(f"  - {filename}")
    
    except Exception as e:
        print(f"❌ error occurs when processing: {str(e)}")

if __name__ == "__main__":
    main()

start processing file: /Users/sienn/Downloads/Marx & Engels Collected Works Volume 9_ Ka - Karl Marx.pdf
folder_name: marx_chapters_v9
expected_file_numbers 0

❌ error occurs when processing: [Errno 2] No such file or directory: '/Users/sienn/Downloads/Marx & Engels Collected Works Volume 9_ Ka - Karl Marx.pdf'
