In [None]:
!pip install openai pandas pdf2image

In [None]:
import openai, json, pandas as pd
from PIL import Image
from pdf2image import convert_from_path
import base64, os

openai.api_key = "sk-..."  # Replace with your actual key

In [None]:
def convert_pdf_to_images(pdf_folder_path, image_output_folder):
    os.makedirs(image_output_folder, exist_ok=True)
    all_images = []
    for file in os.listdir(pdf_folder_path):
        if file.endswith(".pdf"):
            pages = convert_from_path(os.path.join(pdf_folder_path, file), dpi=300)
            for i, page in enumerate(pages):
                image_path = os.path.join(image_output_folder, f"{file[:-4]}_page_{i+1}.jpg")
                page.save(image_path, "JPEG")
                all_images.append(image_path)
    return all_images

In [None]:
def extract_text_from_image_with_gpt_vision(image_path):
    with open(image_path, "rb") as image_file:
        image_bytes = image_file.read()
        base64_image = base64.b64encode(image_bytes).decode('utf-8')

    response = openai.ChatCompletion.create(
        model="gpt-4-vision-preview",
        messages=[
            {"role": "system", "content": "You are an OCR assistant for extracting clean, readable text from loan-related cheque images."},
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "Extract all visible text from this cheque image."},
                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}" }}
                ]
            }
        ],
        max_tokens=1024
    )
    ocr_text = response.choices[0].message.content.strip()
    tokens_used = response.usage.total_tokens
    return ocr_text, tokens_used

In [None]:
def parse_cheque_text_via_gpt(ocr_text: str) -> tuple:
    system_msg = "You are a data extraction assistant. Extract loan cheque information."

    user_msg = f"""Extract the following fields and return in JSON format:
    - Date
    - Payee Name
    - Amount
    - Loan Account Number
    - IFSC Code
    - MICR Code

    OCR Text:
    {ocr_text}
    """
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": system_msg},
            {"role": "user", "content": user_msg}
        ],
        temperature=0
    )
    parsed_json = response['choices'][0]['message']['content']
    tokens_used = response.usage.total_tokens
    return json.loads(parsed_json), tokens_used

In [None]:
def process_cheques_and_export(image_paths, output_file="loan_cheques_parsed.xlsx"):
    records = []
    total_tokens_vision = 0
    total_tokens_parse = 0

    for path in image_paths:
        print(f"Processing: {path}")
        ocr_text, tokens_vision = extract_text_from_image_with_gpt_vision(path)
        parsed, tokens_parse = parse_cheque_text_via_gpt(ocr_text)
        parsed['Source Image'] = os.path.basename(path)  # Internal tracking only
        records.append(parsed)
        total_tokens_vision += tokens_vision
        total_tokens_parse += tokens_parse

    df = pd.DataFrame(records)

    # Drop 'Source Image' before export
    if 'Source Image' in df.columns:
        df.drop(columns=['Source Image'], inplace=True)

    df.to_excel(output_file, index=False)

    # Estimate costs (token-based)
    cost_inr_vision = (total_tokens_vision / 1000) * 0.85  # INR
    cost_inr_parse = (total_tokens_parse / 1000) * 0.04    # INR
    total_cost_inr = cost_inr_vision + cost_inr_parse

    print(f"✅ Exported to {output_file}")
    print(f"📊 Total GPT-4 Vision tokens: {total_tokens_vision} (~₹{cost_inr_vision:.2f})")
    print(f"🧠 Total GPT-3.5 tokens: {total_tokens_parse} (~₹{cost_inr_parse:.2f})")
    print(f"🧾 Total Estimated Cost: ₹{total_cost_inr:.2f} INR for {len(image_paths)} cheques")

In [None]:
pdf_folder = "pdfs"  # folder with cheque PDFs
image_folder = "images"

image_files = convert_pdf_to_images(pdf_folder, image_folder)
process_cheques_and_export(image_files)