# IDP Backend Server

This notebook runs the FastAPI backend server with ngrok tunnel.

In [None]:
# Install required packages
!pip install -q fastapi==0.110.0 \
    uvicorn[standard]==0.27.1 \
    python-multipart==0.0.9 \
    torch==2.2.1+cpu \
    transformers==4.38.2 \
    Pillow==10.2.0 \
    paddleocr==2.7.0.3 \
    python-magic-bin==0.4.14 \
    spacy==3.7.4 \
    pyngrok==7.1.5 \
    python-dotenv==1.0.1

In [None]:
# Download spaCy model
!python -m spacy download en_core_web_sm

In [None]:
# Create all code in a single file
%%writefile app.py

from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from typing import Dict, Any, List, Optional
import torch
from PIL import Image
import io
import numpy as np
from paddleocr import PaddleOCR
import spacy
from transformers import LayoutLMv3Processor, LayoutLMv3ForTokenClassification
import re
import magic

class DocumentProcessor:
    def __init__(self):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Using device: {self.device}")
        
        # Initialize OCR
        self.ocr = PaddleOCR(
            use_angle_cls=True,
            lang='en',
            use_gpu=torch.cuda.is_available(),
            show_log=False
        )
        
        # Initialize spaCy
        self.nlp = spacy.load('en_core_web_sm')
        
        # Initialize LayoutLMv3
        layout_model_name = "microsoft/layoutlmv3-base"
        self.layout_processor = LayoutLMv3Processor.from_pretrained(layout_model_name)
        self.layout_model = LayoutLMv3ForTokenClassification.from_pretrained(
            layout_model_name,
            num_labels=len(self.label2id)
        ).to(self.device)

        # Label mappings
        self.label2id = {
            "O": 0,
            "B-invoice_number": 1,
            "B-date": 2,
            "B-total_amount": 3,
            "B-tax_amount": 4,
            "B-vendor_name": 5,
            "B-customer_name": 6,
            "B-line_item": 7,
            "B-quantity": 8,
            "B-unit_price": 9,
            "B-description": 10
        }
        self.id2label = {v: k for k, v in self.label2id.items()}

    def process_image(self, image: Image.Image) -> Dict[str, Any]:
        # Convert image for processing
        img_array = np.array(image)
        
        # Extract text with PaddleOCR
        ocr_result = self.ocr.ocr(img_array)
        text_blocks = self._extract_text_blocks(ocr_result)
        
        # Process with LayoutLMv3
        layout_fields = self._process_with_layout(image, text_blocks)
        
        # Enhance with spaCy NER
        enhanced_fields = self._enhance_with_spacy(layout_fields)
        
        # Apply pattern matching
        final_fields = self._apply_pattern_matching(enhanced_fields)
        
        # Determine document type
        doc_type = self._classify_document_type(final_fields)
        
        return {
            "fields": final_fields,
            "documentType": doc_type,
            "confidence": self._calculate_confidence(final_fields)
        }

    # [Previous methods remain the same...]

app = FastAPI()

# Configure CORS
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Initialize processor
doc_processor = DocumentProcessor()

ALLOWED_MIME_TYPES = [
    'image/jpeg',
    'image/png',
    'image/tiff',
    'application/pdf'
]

@app.post("/process-document")
async def process_document(file: UploadFile = File(...)) -> Dict[str, Any]:
    try:
        content = await file.read()
        mime = magic.Magic(mime=True)
        file_type = mime.from_buffer(content)
        
        if file_type not in ALLOWED_MIME_TYPES:
            raise HTTPException(status_code=400, detail=f"Unsupported file type: {file_type}")
        
        image = Image.open(io.BytesIO(content))
        result = doc_processor.process_image(image)
        return result
        
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/health")
async def health_check():
    return {"status": "healthy"}

In [None]:
# Setup ngrok tunnel
from pyngrok import ngrok

# Get your authtoken from https://dashboard.ngrok.com/get-started/your-authtoken
NGROK_TOKEN = "your_ngrok_token_here"  # Replace with your token
ngrok.set_auth_token(NGROK_TOKEN)

# Start ngrok tunnel
public_url = ngrok.connect(8000)
print(f"\nBackend URL: {public_url}")
print("\nUpdate your frontend .env file with:")
print(f"VITE_API_URL={public_url}")

In [None]:
# Start the FastAPI server
import nest_asyncio
import uvicorn

nest_asyncio.apply()
uvicorn.run("app:app", host="0.0.0.0", port=8000)