<a href="https://colab.research.google.com/github/Delta021/Bill-Data-Extraction-API/blob/main/Main_App.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import io
import requests
import uvicorn
import logging
from typing import List
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from dotenv import load_dotenv
import google.generativeai as genai
from pdf2image import convert_from_bytes
from PIL import Image

# Import models
from models import ExtractRequest, ExtractResponse, TokenUsage, ExtractedData, PageLineItems, BillItem

# Load environment variables
load_dotenv()

# Configuration
API_KEY = os.getenv("GOOGLE_API_KEY")
if not API_KEY:
    raise ValueError("GOOGLE_API_KEY is not set in environment variables")

genai.configure(api_key=API_KEY)

# Initialize FastAPI
app = FastAPI(title="Bajaj Health Datathon - Bill Extractor")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Initialize Gemini Model
# We use 'gemini-1.5-flash' for speed and cost-efficiency with high accuracy on tabular data
model = genai.GenerativeModel('gemini-1.5-flash', generation_config={"response_mime_type": "application/json"})

def download_file(url: str) -> bytes:
    """Downloads the file from the provided URL."""
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        return response.content
    except requests.exceptions.RequestException as e:
        raise HTTPException(status_code=400, detail=f"Failed to download document: {str(e)}")

def process_document(file_bytes: bytes, content_type: str = "application/pdf") -> List[Image.Image]:
    """Converts PDF bytes to a list of PIL Images. If image, returns list containing single image."""
    try:
        # Check header to see if it is actually a PDF
        if file_bytes.startswith(b'%PDF'):
            return convert_from_bytes(file_bytes)
        else:
            # Assume it's an image
            return [Image.open(io.BytesIO(file_bytes))]
    except Exception as e:
        raise HTTPException(status_code=400, detail=f"Failed to process file format: {str(e)}")

def construct_prompt() -> str:
    """Constructs the strict system prompt for the LLM."""
    return """
    You are an expert AI Invoice Auditor. Your task is to extract line item details from medical or retail bills with extreme precision.

    RULES FOR EXTRACTION:
    1. **Iterate Page by Page**: Analyze each image provided.
    2. **Classify Page Type**: Determine if the page is 'Bill Detail' (contains list of items), 'Pharmacy' (medicine list), or 'Final Bill' (summary page).
    3. **Extract Line Items**:
       - Extract 'item_name', 'item_amount', 'item_rate', and 'item_quantity'.
       - If 'item_quantity' is missing, default to 1.0.
       - If 'item_rate' is missing, infer it from amount/quantity or set to amount.
       - Ensure 'item_amount' is the Net Amount (after discount).
    4. **Double Counting Prevention**:
       - If a 'Final Bill' page only summarizes categories (e.g., "Pharmacy Total", "Consultation Total") that were already listed in detail on previous pages, DO NOT treat them as new line items.
       - Only extract granular line items.
    5. **Output Format**:
       - Return JSON matching exactly the requested schema.
       - Ensure all numbers are floats/integers, no currency symbols.
    """

@app.post("/extract-bill-data", response_model=ExtractResponse)
async def extract_bill_data(request: ExtractRequest):
    try:
        # 1. Download Document
        file_bytes = download_file(request.document)

        # 2. Convert to Images (Gemini handles images natively)
        images = process_document(file_bytes)

        # 3. Prepare Prompt
        prompt_text = construct_prompt()

        # 4. Call Gemini API
        # We pass the prompt + all images of the pages
        inputs = [prompt_text] + images

        # We force the response schema to match our Pydantic model structure
        response = model.generate_content(
            inputs,
            generation_config=genai.GenerationConfig(
                response_mime_type="application/json",
                response_schema=ExtractedData
            )
        )

        # 5. Parse Response
        # Gemini with response_schema returns a JSON string that matches the schema
        import json
        llm_output = json.loads(response.text)

        # 6. Calculate Usage (Approximate if not provided perfectly by preview API)
        # Note: In production, use response.usage_metadata
        usage = response.usage_metadata
        token_usage = TokenUsage(
            total_tokens=usage.total_token_count,
            input_tokens=usage.prompt_token_count,
            output_tokens=usage.candidates_token_count
        )

        # 7. Construct Final Response
        # We validate the LLM output against our internal model to ensure safety
        validated_data = ExtractedData(**llm_output)

        return ExtractResponse(
            is_success=True,
            token_usage=token_usage,
            data=validated_data
        )

    except Exception as e:
        # In case of failure, we still need to return a valid HTTP response,
        # but for the purpose of this API signature, if it crashes,
        # we return 500. You might want to return is_success=False in a 200 OK
        # depending on strict requirements, but standard REST is 500 on crash.
        logging.error(f"Error processing request: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))

if __name__ == "__main__":
    uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)