In [55]:
import tkinter as tk
from tkinterdnd2 import DND_FILES, TkinterDnD
from tkinter import scrolledtext, ttk
import shutil
from PIL import Image, ImageTk
from io import BytesIO
from pdf2image import convert_from_path
import openai
from openai import OpenAI
import os
import base64
import json
from dotenv import load_dotenv

#import all prompts
from prompts import (
    bank_statement_prompt,
    passport_prompt,
    council_tax_prompt,
    payslip_prompt,
    driving_licence_prompt,
    accountant_certificate_prompt,
    p60_prompt,
    tyo_prompt,
    sa302_prompt
)

#set up API
load_dotenv()
api_key = os.getenv("API_KEY")
os.environ["OPENAI_API_KEY"] = api_key
openai.api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI() 

def decide_document(image_blocks):
    response = client.responses.create(
    model="gpt-4.1-mini",
    input=[{
        "role": "user",
        "content": [
            {"type": "input_text", "text": """You are an extemely high level data analyst and your task is to extract useful information from several different types of documents used in banking. From the images given,
            identify what kind of document has been given to you. Only select from one of these options and output nothing else other than the option given. Bank Statement, Passport, Council Tax, Payslip, Driving Licence, 
            Accountant Certificate, P60, TYO, SA302. If you are unable to identify one of these documents, then output "Document not supported.". Take care when distinguishing between SA302 or TYO, the TYO is a less detailed overall
            view of the year with totals calculated, meanwhile the SA302 is a break down of taxes paid over the year and a lot more detailed."""},
            *image_blocks
        ]
    }]
)
    return(response.output_text)

def convert_pdf_to_png(file_path):
     #import pdf
        poppler_path = r'C:\Users\ranvi\poppler-24.08.0\Library\bin' 
        pages = convert_from_path(file_path, dpi=300, poppler_path=poppler_path)
        for i, page in enumerate(pages):
            page.save(f"page_{i+1}.png", "PNG")
        #using base64 encoding to convert pdf to images
        base64_images = []
        for page in pages:
            buffered = BytesIO()
            page.save(buffered, format="PNG")
            b64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
            base64_images.append(b64)
            image_blocks = [
                {"type": "input_image", "image_url": f"data:image/png;base64,{img}"}
                for img in base64_images
            ]
        return(image_blocks)
    
def convert_tif_to_png(file_path):
        """
        Takes in a .tif file and converts each page to an image.
        """
        images = []
        with Image.open(file_path.pdf_path) as tif:
            i = 0
            while True:
                try:
                    tif.seek(i)
                    # Create a copy of the current page
                    page = tif.copy()
                    # Set the DPI to 150
                    page.info['dpi'] = (150, 150)
                    images.append(page)
                    i += 1
                except EOFError:
                    break
        return images
    
def extract_json_from_document(prompt, image_blocks):
    response = client.responses.create(
    model="gpt-4.1-mini",
    input=[{
        "role": "user",
        "content": [
            {"type": "input_text", "text": prompt},
            *image_blocks
        ]
    }]
)
    return(response.output_text)

def handle_file(file_path):
    if ".pdf" in file_path:
        image_blocks = convert_pdf_to_png(file_path)
    elif ".tif" in file_path:
        image_blocks = convert_tif_to_png(file_path)
    else:
        return("Error: File must be pdf or tif", False)
    is_other = False
    document_type = decide_document(image_blocks)
    #selection based on document tyoe
    if document_type == "Bank Statement":
        return(extract_json_from_document(bank_statement_prompt, image_blocks), is_other)
    
    elif document_type == "Passport":
        return(extract_json_from_document(passport_prompt, image_blocks), is_other)
          
    elif document_type == "Council Tax":
        return(extract_json_from_document(council_tax_prompt, image_blocks), is_other)
        
    elif document_type == "Payslip":
        return(extract_json_from_document(payslip_prompt, image_blocks), is_other)
    
    elif document_type == "Driving Licence":
        return(extract_json_from_document(driving_licence_prompt, image_blocks), is_other)

    elif document_type == "Accountant Certificate":
        return(extract_json_from_document(accountant_certificate_prompt, image_blocks), is_other)
        
    elif document_type == "P60":
        return(extract_json_from_document(p60_prompt, image_blocks), is_other)
    
    elif document_type == "TYO":
        return(extract_json_from_document(tyo_prompt, image_blocks), is_other)
        
    elif document_type == "SA302":
        return(extract_json_from_document(sa302_prompt, image_blocks), is_other)
    else:
        #try extract any useful information
        is_other = True
        response = client.responses.create(
            model="gpt-4.1-mini",
            input=[{
                "role": "user",
                "content": [
                    {"type": "input_text", "text": """You are a high level data extraction model and have been given a document that isn't supported by the
                    software. You should still try identify what the document is and try summarise it. Your response should start exactly like this 
                    'Document type not supported but document appears to be ......' """},
                    *image_blocks
                ]
        }]
    )
        return(response.output_text, is_other)
    
def display_json(data_dict):
    json_text = json.dumps(data_dict, indent=4)
    if not text_box.winfo_ismapped():
        text_box.pack(padx=10, pady=10)
    text_box.config(state="normal")
    text_box.delete(1.0, tk.END)
    text_box.insert(tk.END, json_text)
    text_box.config(state="disabled")

def display_text(text):
    if not text_box.winfo_ismapped():
        text_box.pack(padx=10, pady=10)
    text_box.config(state="normal")
    text_box.delete(1.0, tk.END)
    text_box.insert(tk.END, text)
    text_box.config(state="disabled")
    
def drop(event):
    clear_output()
    status_bar.config(text="Processing document...")
    file_path = event.data
    output_label.config(text=f"File dropped:\n{file_path}")
    progress.pack(pady=10)
    progress.start()
    root.update_idletasks()
    output_text, is_other = handle_file(file_path)
    progress.stop()
    progress.pack_forget()
    if is_other == True:
        display_text(output_text)
    elif output_text == "Error: File must be pdf":
        display_text(output_text)
    else:
        #print(output_text)
        #output_label.config(text=output_text)
        try:
            data = json.loads(output_text)
            #print(type(data))  # <class 'dict'>
            #print(data["account_holder"])  # example field access
        except json.JSONDecodeError as e:
            print("Failed to decode JSON:", e)   
        #print(json.dumps(data, indent=2))
        display_json(data)
        status_bar.config(text="Document processed")
        
    
def clear_output():
    text_box.pack_forget()
    
root = TkinterDnD.Tk()

title_label = tk.Label(
    root,
    text="🧠 OCR Document Extractor Project",
    font=("Segoe UI", 18, "bold"),
    fg="#004080",
    pady=10
)
title_label.pack(side="top", fill="x")

root.title("Drag and Drop Document")
root.geometry("400x200")

drop_label = tk.Label(root, text="📄 Drag and drop a file here (pdf)", font=("Segoe UI", 12), bg="#f0f0f0", fg="#333", relief="groove", bd=2, width=40, height=5)
drop_label.pack(pady=20)

drop_label.drop_target_register(DND_FILES)
drop_label.dnd_bind('<<Drop>>', drop)

output_label = tk.Label(root, text="", fg="green")
output_label.pack()

text_box = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=100, height=35)
text_box.config(state="disabled")


window_width = 800
window_height = 600
root.geometry(f"{window_width}x{window_height}+{(root.winfo_screenwidth() - window_width) // 2}+{(root.winfo_screenheight() - window_height) // 2}")

status_bar = tk.Label(root, text="Waiting for file...", bd=1, relief="sunken", anchor="w")
status_bar.pack(fill="x", side="bottom")

progress = ttk.Progressbar(root, mode='indeterminate', length=200)
progress.pack(pady=10)
progress.pack_forget()  # hide it initially


root.mainloop()