In [13]:
import os
import PyPDF2
import numpy as np
from PyPDF2 import PdfReader
import math
import csv

In [16]:

def get_pdf_page_count(pdf_file):
    """
    Get the number of pages in a PDF file.
    """
    try:
        with open(pdf_file, "rb") as file:
            reader = PdfReader(file)
            return len(reader.pages)
    except Exception as e:
        print(f"Error reading {pdf_file}: {e}")
        return None  # Return None if there's an error opening the PDF

def analyze_pdfs_in_subfolder(subfolder_path, output_folder):
    """
    Analyze all PDFs in a subfolder and save the results to a CSV file.
    """
    results = []
    total_files = 0
    total_pages = 0
    less_than_5_count = 0
    greater_than_66_count = 0

    for file_name in os.listdir(subfolder_path):
        if file_name.endswith(".pdf"):
            total_files += 1
            pdf_file_path = os.path.join(subfolder_path, file_name)
            num_pages = get_pdf_page_count(pdf_file_path)

            if num_pages is not None:
                # Count for summary
                total_pages += num_pages
                if num_pages < 5:
                    less_than_5_count += 1
                if num_pages > 66:
                    greater_than_66_count += 1

                # Append individual file results
                results.append({
                    "Blank": "",  # Blank column
                    "File Name": file_name,
                    "Less Than 5 Pages": "Yes" if num_pages < 5 else "No",
                    "Greater Than 66 Pages": "Yes" if num_pages > 66 else "No",
                    "Page Count": num_pages
                })

    # Add summary row
    results.append({
        "Blank": "Totals",
        "File Name": total_files,
        "Less Than 5 Pages": less_than_5_count,
        "Greater Than 66 Pages": greater_than_66_count,
        "Page Count": total_pages
    })

    # Save results to CSV
    subfolder_name = os.path.basename(subfolder_path)
    csv_file_path = os.path.join(output_folder, f"{subfolder_name}_report.csv")
    with open(csv_file_path, mode="w", newline="", encoding="utf-8") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=["Blank", "File Name", "Less Than 5 Pages", "Greater Than 66 Pages", "Page Count"])
        writer.writeheader()
        writer.writerows(results)
    
    print(f"CSV saved for subfolder '{subfolder_name}': {csv_file_path}")

def process_all_subfolders():
    """
    Process all subfolders in the current directory and generate CSV reports.
    """
    current_directory = os.getcwd()
    output_folder = os.path.join(current_directory, "reports")
    os.makedirs(output_folder, exist_ok=True)

    for subfolder_name in os.listdir(current_directory):
        subfolder_path = os.path.join(current_directory, subfolder_name)

        # Skip if not a directory
        if not os.path.isdir(subfolder_path):
            continue

        print(f"Processing Subfolder: {subfolder_name}")
        analyze_pdfs_in_subfolder(subfolder_path, output_folder)

# Run the function to process all subfolders
process_all_subfolders()


Processing Subfolder: 20241021
CSV saved for subfolder '20241021': c:\Users\smk19\Documents\co\LlamaTime\reports\20241021_report.csv
Processing Subfolder: 20241022
CSV saved for subfolder '20241022': c:\Users\smk19\Documents\co\LlamaTime\reports\20241022_report.csv
Processing Subfolder: 20241024
CSV saved for subfolder '20241024': c:\Users\smk19\Documents\co\LlamaTime\reports\20241024_report.csv
Processing Subfolder: 20241028
CSV saved for subfolder '20241028': c:\Users\smk19\Documents\co\LlamaTime\reports\20241028_report.csv
Processing Subfolder: 20241029
CSV saved for subfolder '20241029': c:\Users\smk19\Documents\co\LlamaTime\reports\20241029_report.csv
Processing Subfolder: 20241126
CSV saved for subfolder '20241126': c:\Users\smk19\Documents\co\LlamaTime\reports\20241126_report.csv
Processing Subfolder: 20241202
CSV saved for subfolder '20241202': c:\Users\smk19\Documents\co\LlamaTime\reports\20241202_report.csv
Processing Subfolder: 20241203
CSV saved for subfolder '20241203': c:

In [None]:
#pages = np.array(total_pages_array)

#days = np.ceil(pages/66).astype(int)
#days

array([20, 17, 21, 47, 28])

: 