In [None]:
### Some generic imports that will be used throughout the workshop
import datetime as dt
import time
from pathlib import Path

In [None]:
### We will use watchdog to monitor the directory for new files
from watchdog.events import FileSystemEventHandler, FileCreatedEvent
from watchdog.observers import Observer

### We will use custom utility functions to extract invoice data from pdfs and handle
### different file operations. Since they are not the key focus of this workshop, we
### will not go into their implementation details. But if you are curious, feel free
### to check out the code in the utils/ directory.
from utils.invoice_data_extractor import extract_invoice_data
from utils.file_handling import append_data_to_excel_file, wait_until_file_is_ready

### Let's define the directory to watch and the file path to the invoice data excel 
### overview.
WATCHED_DIR = Path("invoices")
INVOICE_DATA_FILE_PATH = WATCHED_DIR / "invoice_data.xlsx"

### Watchdog lets us define custom functionality for different file system events. File
### system events include file creation, modification, deletion, and movement. Here, we
### will define a custom event handler that reacts to new file creation events.
class InvoiceFileHandler(FileSystemEventHandler):
    def on_created(self, event):
        if not isinstance(event, FileCreatedEvent):
            return
        
        p = Path(event.src_path)
        
        if p.suffix.lower() == ".pdf":
            wait_until_file_is_ready(p)
            print(f"New invoice detected: {p.name}")
            extracted_data = extract_invoice_data(p)
            append_data_to_excel_file(extracted_data, INVOICE_DATA_FILE_PATH)

### Let's define a function to start watching the directory for new files, and act 
### when the defined file system events occur. We will therefore have to use the 
### custom event handler we defined above, as well as watchdog's own observer.
def start_watching():
    WATCHED_DIR.mkdir(exist_ok=True)
    observer = Observer()
    observer.schedule(InvoiceFileHandler(), WATCHED_DIR, recursive=False)
    observer.start()
    print(f"Watching directory: {WATCHED_DIR}")

    try:
        while True:
            time.sleep(1)
    except KeyboardInterrupt:
        observer.stop()
    observer.join()

### Finally, let's start the file watching process and see it in action.
start_watching()