In [1]:
import os
from PyPDF2 import PdfReader, PdfWriter
from PIL import Image
import io

In [2]:
# Function to concatenate PDFs
def concat_pdfs(pdf_files, output_path):
    pdf_writer = PdfWriter()
    
    for pdf_file in pdf_files:
        with open(pdf_file, "rb") as f:
            pdf_reader = PdfReader(f)
            for page in range(len(pdf_reader.pages)):
                pdf_writer.add_page(pdf_reader.pages[page])
    
    with open(output_path, "wb") as f_out:
        pdf_writer.write(f_out)

# Function to convert images to PDFs and return the file paths
def image_to_pdf(image_files):
    pdf_files = []
    
    for img_file in image_files:
        img = Image.open(img_file)
        pdf_path = img_file.replace(img_file.split('.')[-1], 'pdf')
        img.convert('RGB').save(pdf_path)  # Convert and save as PDF
        pdf_files.append(pdf_path)
    
    return pdf_files

# Function to find all 'all_files.pdf' in subdirectories
def find_pdf_files(base_directory):
    pdf_files = []
    
    for root, dirs, files in os.walk(base_directory):
        for file in files:
            if file == 'all_files.pdf':
                pdf_files.append(os.path.join(root, file))
    
    return pdf_files

# Main function to combine the found PDFs
def combine_all_files(base_directory, output_pdf):
    pdf_files = find_pdf_files(base_directory)
    
    if pdf_files:
        concat_pdfs(pdf_files, output_pdf)
        print(f"Combined PDF saved to: {output_pdf}")
    else:
        print("No 'all_files.pdf' found in subdirectories.")


base_directory = 'Grading'
# Iterate over student directories
student_directories = os.listdir(base_directory)

for student_dir in student_directories:
    student_path = os.path.join(base_directory, student_dir)
    
    if os.path.isdir(student_path):
        pdf_files = []
        image_files = []

        # Collect the PDF and image files
        for filename in os.listdir(student_path):
            file_path = os.path.join(student_path, filename)
            if filename.endswith('.pdf'):
                pdf_files.append(file_path)
            elif filename.endswith(('.jpg', '.jpeg', '.png')):
                image_files.append(file_path)
        
        # Convert images to PDFs
        image_pdfs = image_to_pdf(image_files)
        
        # Combine all PDFs (including converted images)
        all_pdfs = pdf_files + image_pdfs
        
        if all_pdfs:
            output_pdf_path = os.path.join(student_path, 'all_files.pdf')
            concat_pdfs(all_pdfs, output_pdf_path)
            
            # Optionally, remove the intermediate image PDFs if you don't want to keep them
            for image_pdf in image_pdfs:
                os.remove(image_pdf)

# Specify the base directory and output file path
destination_path = 'Grading' # Change this to your desired directory path
output_pdf = os.path.join(destination_path, 'output_combined.pdf')

combine_all_files(base_directory, output_pdf)

Combined PDF saved to: Grading/output_combined.pdf
