In [1]:
import mercury as mr
import os
import pdfplumber
from PyPDF2 import PdfReader, PdfWriter
from collections import Counter
import pandas as pd
from IPython.display import display, HTML

In [2]:
app = mr.App(
    title="UB Bill File Splitter", 
    description="Utility to split Munis-generated bill files into single and multi page bill files.",
    show_code=False,
    show_prompt=False,
    continuous_update=True,
    static_notebook=False,
    show_sidebar=True,
    full_screen=True,
    allow_download=True,
    allow_share=True,
)

# UB Bill Splitter

### Instructions:<br><br/>
#### 1. Enter the Route
#### 2. Enter the Commit
#### 3. Upload a file to process
#### 4. Click Process File<br><br/>
#### When the file is done processing, a message will appear below.  Go to the Output Files tab on the left to download the processed files. 

In [None]:
route_input = mr.Text(label="Route", value="")
commit_input = mr.Text(label="Commit", value="")
munis_bill_file_input = mr.File(label="Munis Bill File", max_file_size="100MB")

submit_button = mr.Button(
    label="Process File", 
    style="primary",
)

In [3]:
multi_bill_page_numbers = []
single_bill_page_num_buffer = []
save_folder = mr.OutputDir()

mercury.Text

mercury.Text

mercury.File

mercury.Button

In [None]:
route = route_input.value
commit = commit_input.value
single_page_filename = f"Route_{route}-{commit}-single-page-bills.pdf"
multi_page_filename = f"Route_{route}-{commit}-multi-page-bills.pdf"

pdf_file_path = munis_bill_file_input.filepath
single_page_filepath = os.path.join(save_folder.path, single_page_filename)
multi_page_filepath = os.path.join(save_folder.path, multi_page_filename)



if submit_button.clicked and len(route) > 0 and len(commit) > 0 and munis_bill_file_input :
    with pdfplumber.open(pdf_file_path) as pdf:
        total_pages = len(pdf.pages)
        print(f"Total Pages: {total_pages}")
        previous_bill_number = ''

        for i in range(1, total_pages+1, 1):
            page = pdf.pages[i-1]
            page_text = page.extract_text().split('\n')
            bounding_box = (249.1, 50.8887, 306.7, 69.8)
            crop_area = page.crop(bounding_box)
            crop_text = crop_area.extract_text().split('\n')

            if crop_text == previous_bill_number:
                multi_bill_page_numbers.append(i-1)
                multi_bill_page_numbers.append(i)
            else:
                single_bill_page_num_buffer.append(i)

            previous_bill_number = crop_text


    single_bill_page_numbers = list((Counter(single_bill_page_num_buffer) - Counter(multi_bill_page_numbers)).elements())

    pdf = PdfReader(pdf_file_path)
    pdfWriter = PdfWriter()
    
    print(f"List of Multi-Page Bill Pages: {multi_bill_page_numbers}")

    for multi_page_num in multi_bill_page_numbers:
        pdfWriter.add_page(pdf.pages[multi_page_num-1])

        with open(multi_page_filepath, 'wb') as f:
            pdfWriter.write(f)
            f.close()


    print(f"List of Single Bill Pages: {single_bill_page_numbers}")

    pdf = PdfReader(pdf_file_path)
    pdfWriter = PdfWriter()

    for single_page_num in single_bill_page_numbers:
        pdfWriter.add_page(pdf.pages[single_page_num-1])

        with open(single_page_filepath, 'wb') as f:
            pdfWriter.write(f)
            f.close()

    print("Go to Output Files to Download the Processed Bill Files")


