### Setup your environnement variables

In [1]:
import os

os.environ["OPENAI_API_KEY"] = "YOUR API KEY"
os.environ["UIFORM_API_KEY"] = "YOUR API KEY" # go to https://uiform.com to create your API Key

In [2]:
from dotenv import load_dotenv
assert load_dotenv("../.env.local", override=True) # Load environment variables from a .env file

# Get started

In [3]:
from uiform import UiForm
from uiform._utils.json_schema import filter_reasoning_fields_json
uiclient = UiForm()
models_list = uiclient.models.list()

  from .autonotebook import tqdm as notebook_tqdm


# Example 1 - Analyze informations from a document

#### Option A - You use our preprocess endpoint and openAI client

In [4]:
from pydantic import BaseModel

class CalendarEvent(BaseModel):
    name: str
    date: str
    participants: list[str]

In [5]:
import json
from uiform import UiForm, Schema
from openai import OpenAI

with open("freight/schema.json", "r") as f:
    json_schema = json.load(f)


uiclient = UiForm()
doc_msg = uiclient.documents.create_messages(
    document = "freight/booking_confirmation.jpg",
)

schema_obj = Schema(
    json_schema = json_schema
)



In [6]:

# Now you can use your favorite model to analyze your document
client = OpenAI()
completion = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=schema_obj.openai_messages + doc_msg.openai_messages,
    response_format={
        "type": "json_schema",
        "json_schema": {
            "name": schema_obj.id,
            "schema": schema_obj.inference_json_schema,
            "strict": True
        }
    }
)
print(completion.choices[0].message.content)

{"reasoning___root":"Analysis shows:\n- Clear booking confirmation document from ACME Corporation\n- Involves a single shipment with specific details related to dangerous goods\n- Client identified as ACME Corporation from the letterhead and includes relevant VAT number\n- Total price and payment terms clearly stated \n- All required signatures and dates present, but verification of VAT format needed","booking_id":"BC-67890","payment":{"total_price":1500,"currency":"EUR"},"client":{"company_name":"ACME Corporation","reasoning___VAT_number":"Found VAT number GB123456789 in the header section labeled as 'VAT Number'. Format conforms to UK VAT number standards.","VAT_number":"GB123456789","city":"London","postal_code":"WC2N 5DU","country":"GB","code":null,"email":"client@acme.com"},"reasoning___shipments":"Shipment Analysis:\n1: 500 kg of liquid hazardous material (UN1202) classified as flammable liquid. Requires specific storage conditions as indicated.","shipments":[{"shipment_id":"SHIP

In [7]:
# Validate the response against the original schema if you want to remove the reasoning fields
assert completion.choices[0].message.content is not None
extraction = schema_obj.pydantic_model.model_validate(
     filter_reasoning_fields_json(completion.choices[0].message.content)
)
extraction.model_dump()

{'booking_id': 'BC-67890',
 'payment': {'total_price': 1500, 'currency': 'EUR'},
 'client': {'company_name': 'ACME Corporation',
  'VAT_number': 'GB123456789',
  'city': 'London',
  'postal_code': 'WC2N 5DU',
  'country': 'GB',
  'code': None,
  'email': 'client@acme.com'},
 'shipments': [{'shipment_id': 'SHIP-001',
   'sender': {'company_name': 'ACME Corporation',
    'address': {'city': 'London',
     'postal_code': 'WC2N 5DU',
     'country': 'GB',
     'line1': '123 Elm Street',
     'line2': 'Suite 500'},
    'phone_number': '+44 20 7946 0958',
    'email_address': 'client@acme.com',
    'pickup_datetime': {'date': '2023-02-05',
     'start_time': '08:00:00',
     'end_time': '12:00:00'},
    'observations': 'Ensure compliance with hazardous goods handling requirements.'},
   'recipient': {'company_name': 'Beta Industries',
    'address': {'city': 'Munich',
     'postal_code': '80331',
     'country': 'DE',
     'line1': '769 Pine Street',
     'line2': ''},
    'phone_number': '+

In [9]:
## If you want to log the extraction (keep it in our database)
# Limitation: we don't get the likelihoods
# For now one might send the messages in UiForm-compatible format.
uiclient.documents.extractions.log(
    document = "freight/booking_confirmation.jpg",
    messages = schema_obj.messages + doc_msg.messages,
    completion = completion,
    json_schema = json_schema,
    model = "gpt-4o-mini",
    temperature = 0,
)

{'extraction_id': 'extr_AXl9Rs_HhUM_ARcHJ0awe',
 'status': 'success',
 'error_message': None}

In [8]:
# You can use the messages from different providers (but you cannot mix them)
out_log_openai = uiclient.documents.extractions.log(
    document = "freight/booking_confirmation.jpg",
    openai_messages = schema_obj.openai_messages + doc_msg.openai_messages,
    completion = completion,
    json_schema = json_schema,
    model = "gpt-4o-mini",
    temperature = 0,
)
print(out_log_openai)

out_log_anthropic = uiclient.documents.extractions.log(
    document = "freight/booking_confirmation.jpg",
    anthropic_messages = schema_obj.anthropic_messages + doc_msg.anthropic_messages,
    anthropic_system_prompt = schema_obj.anthropic_system_prompt,
    completion = completion,
    json_schema = json_schema,
    model = "gpt-4o-mini",
    temperature = 0,
)
print(out_log_anthropic)





{'extraction_id': 'extr_5LK9bnCsx8MDs3TdeTvbx', 'status': 'success', 'error_message': None}


#### Option B - Using UiForm `extract` endpoint

In [19]:
import json
from uiform.client import UiForm

uiclient = UiForm()
completion = uiclient.documents.extractions.parse(
    json_schema = "freight/schema.json",
    document = "freight/booking_confirmation.jpg",
    model="gpt-4o-mini",
    temperature=0,
)

filter_reasoning_fields_json(completion.choices[0].message.content)

{'booking_id': 'BC-67890',
 'payment': {'total_price': 1500, 'currency': 'EUR'},
 'client': {'company_name': 'ACME CORPORATION',
  'VAT_number': 'GB123456789',
  'city': 'London',
  'postal_code': 'WC2N 5DU',
  'country': 'GB',
  'code': None,
  'email': 'client@acme.com'},
 'shipments': [{'shipment_id': 'SHIP-001',
   'sender': {'company_name': 'ACME Corporation',
    'address': {'city': 'London',
     'postal_code': 'WC2N 5DU',
     'country': 'GB',
     'line1': '123 Elm Street',
     'line2': 'Suite 500'},
    'phone_number': '+44 20 7946 0958',
    'email_address': 'client@acme.com',
    'pickup_datetime': {'date': '2023-02-05',
     'start_time': '08:00:00',
     'end_time': '12:00:00'},
    'observations': None},
   'recipient': {'company_name': 'Beta Industries',
    'address': {'city': 'Munich',
     'postal_code': '80331',
     'country': 'DE',
     'line1': '769 Pine Street',
     'line2': None},
    'phone_number': '+49 89 123456',
    'email_address': None,
    'delivery_d

## Appendix A - Use text as modality

In [22]:
from uiform.client import UiForm

uiclient = UiForm()

completion = uiclient.documents.extractions.parse(
    json_schema = "freight/schema.json",
    document="freight/booking_confirmation.jpg",
    model="gpt-4o-mini-2024-07-18",
    temperature=0,
    modality='text'
)

print(completion.choices[0].message.parsed)

booking_id='SHIP-001' payment=TransportPriceData(total_price=1500.0, currency='EUR') client=ClientData(company_name='ACME Corporation', VAT_number=None, city='London', postal_code='WC2N S5DU', country='GB', code=None, email='client@acme.com') shipments=[ShipmentData(shipment_id='SHIP-001', sender=SenderData(company_name='ACME Corporation', address=AddressDataSimple(city='Manchester', postal_code='M1 4WP', country='GB', line1='456 Oak Avenue', line2='Floor 3 Suite 500'), phone_number='+442079460958', email_address='client@acme.com', pickup_datetime=PickupDatetimeData(date='2023-02-05', start_time='08:00:00', end_time='12:00:00'), observations=None), recipient=RecipientData(company_name='Beta Industries', address=AddressDataSimple(city='Munich', postal_code='80331', country='DE', line1='789 Pine Street', line2=None), phone_number='+4989123456', email_address=None, delivery_datetime=DeliveryDatetimeData(date='2023-03-05', start_time='10:00:00', end_time='16:00:00'), observations=None), go

## Appendix B - Add image settings

In [20]:
from uiform.client import UiForm

uiclient = UiForm()

image_settings = {
    'dpi': 72,
    'browser_canvas': 'A4', 
    'image_to_text': 'ocr',
    'correct_image_orientation': True
}


completion = uiclient.documents.extractions.parse(
    json_schema = "freight/schema.json",
    document = "freight/booking_confirmation.jpg",
    model="gpt-4o-mini-2024-07-18",
    temperature=0,
    image_settings=image_settings
)

print(completion.choices[0].message.parsed)

booking_id='SHIP-001' payment=TransportPriceData(total_price=1500, currency='EUR') client=ClientData(company_name='ACME Corporation', VAT_number='GB123456789', city='London', postal_code='WC2N 5DU', country='GB', code='BC-67890', email='client@acme.com') shipments=[ShipmentData(shipment_id='SHIP-001', sender=SenderData(company_name='ACME Corporation', address=AddressDataSimple(city='London', postal_code='WC2N 5DU', country='GB', line1='123 Elm Street', line2='Suite 500'), phone_number='+44 20 7946 0958', email_address='client@acme.com', pickup_datetime=PickupDatetimeData(date='2023-05-02', start_time='08:00:00', end_time='12:00:00'), observations=None), recipient=RecipientData(company_name='Beta Industries', address=AddressDataSimple(city='Munich', postal_code='80331', country='DE', line1='789 Pine Street', line2=None), phone_number='+49 89 123456', email_address=None, delivery_datetime=DeliveryDatetimeData(date='2023-05-03', start_time='10:00:00', end_time='16:00:00'), observations=No

## Appendix C - Use o3-mini model with reasoning effort

In [None]:
from uiform.client import UiForm

uiclient = UiForm()

image_settings = {
    'dpi': 72,
    'browser_canvas': 'A4', 
    'image_to_text': 'ocr',
    'correct_image_orientation': True
}


completion = uiclient.documents.extractions.parse(
    json_schema = "freight/schema.json",
    document = "freight/booking_confirmation.jpg",
    model="o3-mini",
    temperature=0,
    reasoning_effort="low",
    image_settings=image_settings
)

print(completion.choices[0].message.parsed)

## Appendix D - Consensus extraction

You can also benefit from consensus by running multiple extractions (with non-zero temperature) and gathering the best results with a more reliable likelihoods.

Be careful, the cost of consensus is as much higher as the number of consensus model you run. ($Total = N * Cost(model)$)

In [16]:
from uiform.client import UiForm

uiclient = UiForm()

completion = uiclient.documents.extractions.parse(
    json_schema = "freight/schema.json",
    document = "freight/booking_confirmation.jpg",
    model="gpt-4o-mini",
    temperature=0.5,
    # image_settings=image_settings,
    n_consensus=5
)

filter_reasoning_fields_json(completion.choices[0].message.content or "{}")

{'booking_id': 'BC-67890',
 'payment': {'total_price': 1500, 'currency': 'EUR'},
 'client': {'company_name': 'ACME Corporation',
  'VAT_number': 'GB123456789',
  'city': 'London',
  'postal_code': 'WC2N 5DU',
  'country': 'GB',
  'code': None,
  'email': 'client@acme.com'},
 'shipments': [{'shipment_id': 'SHIP-001',
   'sender': {'company_name': 'ACME Corporation',
    'address': {'city': 'London',
     'postal_code': 'WC2N 5DU',
     'country': 'GB',
     'line1': '123 Elm Street',
     'line2': 'Suite 500'},
    'phone_number': '+44 20 7946 0958',
    'email_address': 'client@acme.com',
    'pickup_datetime': {'date': '2023-02-05',
     'start_time': '08:00:00',
     'end_time': '12:00:00'},
    'observations': None},
   'recipient': {'company_name': 'Beta Industries',
    'address': {'city': 'Munich',
     'postal_code': '80331',
     'country': 'DE',
     'line1': '789 Pine Street',
     'line2': None},
    'phone_number': '+49 89 123456',
    'email_address': None,
    'delivery_d

In [17]:
completion.likelihoods

{'reasoning___root': 1.0,
 'booking_id': 1.0,
 'payment': {'total_price': 1.0, 'currency': 1.0, '_consensus_score': 1.0},
 'client': {'company_name': 1.0,
  'reasoning___VAT_number': 1.0,
  'VAT_number': 1.0,
  'city': 1.0,
  'postal_code': 1.0,
  'country': 1.0,
  'code': 0.6,
  'email': 1.0,
  '_consensus_score': 0.94286},
 'reasoning___shipments': 1.0,
 'shipments': [{'shipment_id': 0.8,
   'reasoning___sender': 1.0,
   'sender': {'company_name': 1.0,
    'address': {'city': 0.8,
     'postal_code': 0.8,
     'country': 1.0,
     'line1': 0.8,
     'line2': 0.8,
     '_consensus_score': 0.84},
    'phone_number': 1.0,
    'email_address': 1.0,
    'pickup_datetime': {'date': 1.0,
     'start_time': 1.0,
     'end_time': 1.0,
     '_consensus_score': 1.0},
    'observations': 0.6,
    '_consensus_score': 0.90667},
   'reasoning___recipient': 1.0,
   'recipient': {'company_name': 1.0,
    'address': {'city': 1.0,
     'postal_code': 1.0,
     'country': 1.0,
     'line1': 0.8,
     'l