In [15]:
import os
import logging
from PyPDF2 import PdfReader, PdfWriter
logging.basicConfig(filename='final_folders_11_12.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
base_path = r"\\lawoffice\Applications\ScanDocs\ΔΕΗ\Έργο Αγωγών 2024\2. Έγγραφα"

def process_folder(folder_path,log_number):
    # Find all PDF files in the folder
    pdf_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.pdf')]
    # Extract the date from the pfd filename and sort the pdf files in each folder by date -> pdf_files_sorted
    pdf_files_sorted = sorted(pdf_files, key=lambda x: x.split("_")[-3])
    merged_pdf_path = os.path.join(folder_path, "merged_document.pdf")
    merge_pdfs(pdf_files_sorted, merged_pdf_path,log_number)

def merge_pdfs(pdf_list, output_path, log_number):
    """Merge multiple PDFs into a single PDF."""
    merged_document = PdfWriter()
    total_pages = 0

    for i, pdf in enumerate(pdf_list):
        with open(pdf, 'rb') as mfile:
            reader = PdfReader(mfile)
            pages = len(reader.pages)
            for page in reader.pages:
                merged_document.add_page(page)
            total_pages += pages
            logging.info(f"    Added {i + 1}/{len(pdf_list)} {pdf} with {pages} pages to the merged document {log_number} ({i + 1}/{len(pdf_list)})")

    with open(output_path, 'wb') as output_file:
        merged_document.write(output_file)

    logging.info(f"    Merged {len(pdf_list)} PDFs into {output_path} with {total_pages} pages")


def delete_existing_merged_files(main_folder):
    for r,s,files in os.walk(main_folder):
        for file in files:
            if "merged_document.pdf" in file:
                os.remove(os.path.join(r,file))

def merge_script(paroxi_folder):
    delete_existing_merged_files(paroxi_folder)
    logar_number = os.path.basename(paroxi_folder)
    logar_folder = os.path.join(paroxi_folder,"Λογαριασμοί")
    print(f"Processing {logar_number}")
    process_folder(logar_folder,logar_number)
    print(f"    Merged {logar_number} successfully!") 

def get_date_dict(paroxi_tuples):
    date_dict = {}
    for line in paroxi_tuples.strip().splitlines():
        key, value = line.split('\t')
        if value not in date_dict:
            date_dict[value] = []
        date_dict[value].append(key)
    return date_dict
def merge_specific_paroxes_list_in_specific_date_folder(paroxes_list,date,base_path):
    date_path = os.path.join(base_path,date)
    paroxes_list = paroxes_list.split()
    for paroxi_folder in os.scandir(date_path):
        for paroxi in paroxes_list :
            if paroxi == paroxi_folder.name:
                merge_script(paroxi_folder.path)
def merge_specific_dict_dates_paroxes(date_dict,base_path):
    for date in date_dict:
        for paroxi in date_dict[date]:
            merge_specific_paroxes_list_in_specific_date_folder(paroxi,date,base_path)
           

In [26]:
## merge_script(case_folder_path)
## merge_specific_paroxes_in_specific_date_folder(date,base_path,paroxi_list)
## merge_specific_dict_dates_paroxes(date_dict,base_path)


# Arguments for merge_script
# case_folder_path = paroxi

# Arquments for merge_specific_paroxes_list_in_specific_date_folder
# paroxi_list =""" 70337111702 """
# date = "08_11_2024"

## Arquments for merge_specific_dict_dates_paroxes(date_dict,base_path)
# self-explanatory

In [31]:
paroxi_tuples = """
11793727302	20_9_2024
60034564305	1_10_2024
60045198902	1_10_2024
60426621203	30_9_2024
60430742202	30_9_2024
61440889402	30_9_2024
70322090103	30_9_2024
70337111702	08_11_2024
70501152301	30_9_2024
70612046601	25_10_2024
71303092802	25_10_2024
71305204002	25_10_2024
71332096803	25_10_2024
71670729601	25_10_2024
"""
date_dict = get_date_dict(paroxi_tuples)

In [None]:
# merge_specific_dict_dates_paroxes(date_dict,base_path)