In [14]:
"""
Test script for the Document Intelligence System
Creates sample images with text to test OCR and classification
"""

import requests
from PIL import Image, ImageDraw, ImageFont
import io
import os

def create_sample_invoice():
    """Create a sample invoice image for testing"""
    img = Image.new('RGB', (800, 600), color='white')
    draw = ImageDraw.Draw(img)
    
    # Try to use a default font, fallback to basic if not available
    try:
        font = ImageFont.truetype("arial.ttf", 20)
        small_font = ImageFont.truetype("arial.ttf", 16)
    except:
        font = ImageFont.load_default()
        small_font = ImageFont.load_default()
    
    # Invoice content
    text_lines = [
        "BFI TECH",
        "Software Development Company",
        "",
        "INVOICE",
        "",
        "Bill to: Client Company Ltd",
        "Date: 2025-07-31",
        "",
        "Services:",
        "Software Development    IDR 2,000,000",
        "Consulting              IDR 450,000",
        "",
        "Total Amount: IDR 2,450,000"
    ]
    
    y = 50
    for line in text_lines:
        if line == "BFI TECH" or line == "INVOICE":
            draw.text((50, y), line, fill='black', font=font)
        else:
            draw.text((50, y), line, fill='black', font=small_font)
        y += 35
    
    return img

def create_sample_receipt():
    """Create a sample receipt image for testing"""
    img = Image.new('RGB', (400, 500), color='white')
    draw = ImageDraw.Draw(img)
    
    try:
        font = ImageFont.truetype("arial.ttf", 16)
    except:
        font = ImageFont.load_default()
    
    text_lines = [
        "SUPERMARKET ABC",
        "123 Main Street",
        "",
        "RECEIPT",
        "",
        "Date: 2025-07-31",
        "Cashier: John",
        "",
        "Items:",
        "Bread         5,000",
        "Milk         12,000", 
        "Eggs          8,000",
        "",
        "Total Paid: IDR 25,000",
        "",
        "Thank you for shopping!"
    ]
    
    y = 30
    for line in text_lines:
        draw.text((20, y), line, fill='black', font=font)
        y += 25
    
    return img

def create_sample_id():
    """Create a sample ID card image for testing"""
    img = Image.new('RGB', (600, 400), color='lightblue')
    draw = ImageDraw.Draw(img)
    
    # Draw a simple border
    draw.rectangle([(10, 10), (590, 390)], outline='darkblue', width=3)
    
    try:
        title_font = ImageFont.truetype("arial.ttf", 18)
        font = ImageFont.truetype("arial.ttf", 16)
        small_font = ImageFont.truetype("arial.ttf", 14)
    except:
        title_font = ImageFont.load_default()
        font = ImageFont.load_default()
        small_font = ImageFont.load_default()
    
    # ID card content
    text_lines = [
        ("REPUBLIC OF INDONESIA", title_font, 30),
        ("IDENTITY CARD", title_font, 55),
        ("", font, 80),
        ("ID Number: 1234567890123456", font, 100),
        ("", font, 120),
        ("Full Name: JOHN SMITH ANDERSON", font, 140),
        ("", font, 160),
        ("Date of Birth: 15/03/1990", font, 180),
        ("", font, 200),
        ("Nationality: Indonesian", font, 220),
        ("", font, 240),
        ("Address: Jl. Merdeka No. 123", small_font, 260),
        ("Jakarta Pusat, DKI Jakarta", small_font, 280),
        ("", font, 300),
        ("Valid Until: 15/03/2030", small_font, 320)
    ]
    
    for line, font_type, y in text_lines:
        if line:
            draw.text((30, y), line, fill='black', font=font_type)
    
    # Draw a simple photo placeholder
    draw.rectangle([(450, 100), (550, 200)], outline='black', width=2)
    draw.text((470, 140), "PHOTO", fill='gray', font=small_font)
    
    return img

def test_api_endpoint(image, filename):
    """Test the /analyze endpoint with an image"""
    
    # Save image to bytes
    img_byte_arr = io.BytesIO()
    image.save(img_byte_arr, format='PNG')
    img_byte_arr.seek(0)
    
    try:
        # Make API request
        response = requests.post(
            "http://localhost:8000/analyze",
            files={"file": (filename, img_byte_arr, "image/png")}
        )
        
        if response.status_code == 200:
            result = response.json()
            print(f"\n✅ {filename} Analysis Results:\n")
            print(f"Document Type: {result['document_type']}")
            print("Extracted Fields:")
            for key, value in result['fields'].items():
                print(f"  {key}: {value}")
            print(f"Raw Text Preview:\n {result['raw_text']}.")
        else:
            print(f"❌ Error {response.status_code}: {response.text}")
            
    except requests.exceptions.ConnectionError:
        print("❌ Cannot connect to API. Make sure the FastAPI server is running on http://localhost:8000")
    except Exception as e:
        print(f"❌ Error: {e}")

def main():
    """Run the test suite"""
    print("🧪 Testing Document Intelligence System")
    print("=" * 50)
    
    # Create sample images
    print("📄 Creating sample documents...")
    invoice_img = create_sample_invoice()
    receipt_img = create_sample_receipt()
    id_img = create_sample_id()
    
    # Save sample images for reference
    os.makedirs("samples", exist_ok=True)
    invoice_img.save("samples/sample_invoice.png")
    receipt_img.save("samples/sample_receipt.png")
    id_img.save("samples/sample_id.png")
    print("✅ Sample images saved to 'samples/' directory")
    
    # Test API endpoints
    print("\n🔍 Testing API endpoints...")
    test_api_endpoint(invoice_img, "sample_invoice.png")
    test_api_endpoint(receipt_img, "sample_receipt.png")
    test_api_endpoint(id_img, "sample_id.png")
    
    print("\n" + "=" * 50)
    print("🎯 Test completed!")
    print("💡 You can also test with your own images using:")
    print("   curl -X POST \"http://localhost:8000/analyze\" -F \"file=@your_image.jpg\"")

# if __name__ == "__main__":
#     main()

In [15]:
invoice_img = create_sample_invoice()
receipt_img = create_sample_receipt()
id_img = create_sample_id()

In [16]:
# Save sample images for reference
os.makedirs("samples", exist_ok=True)
invoice_img.save("samples/sample_invoice.png")
receipt_img.save("samples/sample_receipt.png")
id_img.save("samples/sample_id.png")
print("✅ Sample images saved to 'samples/' directory")

✅ Sample images saved to 'samples/' directory


In [13]:
print("   curl -X POST \"http://localhost:8000/analyze\" -F \"file=@your_image.jpg\"")

   curl -X POST "http://localhost:8000/analyze" -F "file=@your_image.jpg"


In [12]:
test_api_endpoint(invoice_img, "sample_invoice.png")
print("="*100)
test_api_endpoint(receipt_img, "sample_receipt.png")


✅ sample_invoice.png Analysis Results:

Document Type: invoice
Extracted Fields:
  company: Client Company Ltd
Date
  total_amount: IDR 2,000,000
  date: 2025-07-31
Raw Text Preview:
 BFI TECH

Software Development Company

INVOICE

Bill to: Client Company Ltd
Date: 2025-07-31

Services:

Software Development IDR 2,000,000

Consulting IDR 450,000

Total Amount: IDR 2,450,000.

✅ sample_receipt.png Analysis Results:

Document Type: receipt
Extracted Fields:
  vendor: SUPERMARKET ABC
  total_paid: Paid: IDR 25,000
  date: 2025-07-31
Raw Text Preview:
 SUPERMARKET ABC
123 Main Street

RECEIPT

Date: 2025-07-31
Cashier: John

Items
Bread 5,000
Milk 12,000
Eggs 8,000

Total Paid: IDR 25,000

Thank you for shopping!.
