In [1]:
# %pip install retab

We strongly recommend to use [Pydantic](https://docs.pydantic.dev/latest/) to build schemas.

Pydantic is the most widely used data validation library for Python.

In [2]:
# Draft a Schema using Pydantic

from pydantic import BaseModel

class Invoice(BaseModel):
    date: str
    invoice_number: str
    total: str
    status: str
    customer: str
    customer_address: str
    customer_email: str
    customer_phone: str
    customer_website: str

In [3]:
# Get Structured Data

from dotenv import load_dotenv
from retab import Retab

load_dotenv() # We recommend to create a .env file containing your RETAB_API_KEY=sk_retab_***

client = Retab()

response = client.documents.extract(
    documents=["../assets/code/invoice.jpeg"],
    model="gpt-4o-mini",          # or any model your plan supports
    json_schema=Invoice.model_json_schema(),
    modality="text",            
)

print(response.model_dump_json(indent=4))

{
    "id": "chatcmpl-ByhhlMki2637MzgF8CZNoAWzXRboU",
    "choices": [
        {
            "finish_reason": "stop",
            "index": 0,
            "logprobs": null,
            "message": {
                "content": "{\"date\": \"2021-11-24\", \"invoice_number\": \"1437\", \"total\": \"$12,113.67\", \"status\": \"\", \"customer\": \"Johnson Carrie\", \"customer_address\": \"45 Lightning Road, Arizona, AZ 88776\", \"customer_email\": \"proprietor@abcxyz.com\", \"customer_phone\": \"321-321-1234\", \"customer_website\": \"www.amnoshsuppliers.com\"}",
                "refusal": null,
                "role": "assistant",
                "annotations": null,
                "audio": null,
                "function_call": null,
                "tool_calls": null,
                "parsed": {
                    "date": "2021-11-24",
                    "invoice_number": "1437",
                    "total": "$12,113.67",
                    "status": "",
                    "customer":