In [None]:
import os
import base64
from openai import OpenAI
from dotenv import load_dotenv

1. Setup Environment  

In [None]:
load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"), timeout=600.0)

2. Helper function to encode local images

In [None]:
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

3. Define the Extraction Path

In [None]:
# image_path = "WalmartReceipt.png"  
# base64_image = encode_image(image_path)
# user_prompt = "Extract the merchant name, date, total amount, and items into JSON format."

image_path = "Document.jpg" 
base64_image = encode_image(image_path)
user_prompt = "Extract all text on the document, separated into field name and then value found, into JSON format."


4. The GPT-5.2 API Call  
We use 'gpt-5.2' and 'high reasoning' for OCR to improve accuracy and reduce hallucinations  

In [None]:
response = client.chat.completions.create(
    model="gpt-5.2",
    messages=[
        {
            "role": "system", 
            "content": "You are a professional data extraction agent. Extract the requested fields into valid JSON."
        },
        {
            "role": "user",
            "content": [
                {"type": "text", "text": user_prompt},
                {
                    "type": "image_url",
                    "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
                }
            ]
        }
    ],
    # 2026 Structured Output feature
    response_format={"type": "json_object"}, 
    # Adjusting reasoning effort for OCR precision
    # non, medium, high, xhigh
    extra_body={"reasoning_effort": "low"} 
)

5. Output the result

In [None]:
print(response.choices[0].message.content)

6. Get the token usage metrics

In [None]:
print(f"Prompt Tokens:     {response.usage.prompt_tokens}")
print(f"Completion Tokens: {response.usage.completion_tokens}")
print(f"Total Tokens:      {response.usage.total_tokens}")

Reasoning Tokens used

In [None]:
# Detailed breakdown for Reasoning Models
print(f"Visible Output Tokens: {response.usage.completion_tokens}")

# Access the "thinking" tokens used by GPT-5.2
reasoning_tokens = response.usage.completion_tokens_details.reasoning_tokens
print(f"Reasoning (Thinking) Tokens: {reasoning_tokens}")