In [20]:
!pip install pymupdf 



In [24]:
import os
import fitz  # PyMuPDF library for working with PDF files

def setup_folders():
    """
    Create required folders for input PDFs and output images
    if they do not already exist.
    Returns a dictionary containing folder paths.
    """

    # Define folder names
    folders = {
        'input': 'pdf_files',          # Folder where user places PDF files
        'output': 'converted_images'   # Folder where converted images will be saved
    }

    # Loop through each folder path
    for folder in folders.values():
        # Check if folder already exists
        if not os.path.exists(folder):
            os.makedirs(folder)  # Create folder if it does not exist
            print(f"Created folder: {folder}")

    return folders  # Return dictionary of folder paths

In [22]:
def convert_pdf(pdf_path, output_folder):
    """
    Convert a single PDF file into PNG images.
    Each page of the PDF is saved as a separate image.
    Returns True if successful, False if an error occurs.
    """

    try:
        # Open the PDF document
        doc = fitz.open(pdf_path)

        # Extract filename without extension
        filename = os.path.basename(pdf_path).replace('.pdf', '')

        # Loop through all pages in the PDF
        for page_num in range(len(doc)):

            # Load the current page
            page = doc.load_page(page_num)

            # Convert page to an image (150 DPI resolution)
            image = page.get_pixmap(dpi=150)

            # Save image with page number in filename
            image.save(f"{output_folder}/{filename}_page{page_num + 1}.png")

        # Print success message
        print(f"Converted: {filename} ({len(doc)} pages)")
        return True

    except Exception as e:
        # Handle any errors during conversion
        print(f"Failed to convert {pdf_path}: {str(e)}")
        return False

In [25]:
def main():
    """
    Main function:
    - Creates required folders
    - Finds all PDF files in input folder
    - Prepares them for conversion
    """

    # Create folders (if they don't exist)
    folders = setup_folders()

    # Get list of all PDF files in input folder
    pdf_files = [
        f for f in os.listdir(folders['input'])
        if f.lower().endswith('.pdf')  # Ensure file extension is .pdf
    ]

    # If no PDFs are found, inform user and stop program
    if not pdf_files:
        print(f"\nNo PDF files found in '{folders['input']}' folder")
        print("Please add PDF files and try again")
        return

    print(f"\nFound {len(pdf_files)} PDF(s) to convert")