In [1]:
import requests
import fitz
import json
import os
import base64

# Enables the display of multiple outputs when running a cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [2]:
apikey = os.getenv('OPENAI_API_KEY')

In [3]:
def encode_image(image_path: str):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

In [4]:
path = 'E:/Downloads/invoice.pdf'

pdf_base64 = encode_image(path)
pdf_base64[:20] + ' ...TRIMMED'

'JVBERi0xLjQKJb662+4K ...TRIMMED'

In [10]:
pdf_bytes = base64.b64decode(pdf_base64)
str(pdf_bytes[:20]) + ' ...TRIMMED'

"b'%PDF-1.4\\n%\\xbe\\xba\\xdb\\xee\\n1 0 o' ...TRIMMED"

In [62]:
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
png_base64_list = []

for page_num in range(len(doc)):
    page = doc.load_page(page_num)
    pix = page.get_pixmap(alpha=True)
    png_bytes = pix.tobytes(output="png")
    png_base64 = base64.b64encode(png_bytes).decode('utf-8')
    png_base64_list.append(png_base64)

png_base64 = png_base64_list[0]
png_base64

'iVBORw0KGgoAAAANSUhEUgAAAw8AAARUCAYAAAAwMi9sAAAACXBIWXMAAA7EAAAOxAGVKw4bAAJ6FElEQVR4nOy9ZZwbR7b+f9/8f/fepbuY3WyyyYbJsR0ndsiYmJkdM4zZMTMzMzPDmJnHPMwMHmae8aAhyb3nX6eUllsaSdOaGY3k8fPi+5HUOl1dfbrUOk/Xqar/KHj0EwEAAAAAAACAJZ4+/V/6D3tXAgAAAAAAAOD4QDwAAAAAAAAANAHxAAAAAAAAANAExAMAAAAAAABAExAPAAAAAAAAAE1APAAAAAAAAAA0AfEAAAAAAAAA0ATEAwAAAAAAAEATEA8AAAAAAAAATUA8AAAAAAAAADQB8QAAAAAAAADQBMQDAAAAAAAAQBMQDwAAAAAAAABNQDwAAAAAAAAANAHxAAAAAAAAANAExAMAAAAAAABAExAPAAAAAAAAAE1APAAAAAAAAAA0AfEAAAAAAAAA0ATEAwAAAAAAAEATEA8AAAAAAAAATUA8AAAAAAAAADQB8QAAAAAAAADQBMQDAAAAAAAAQBMQDwAAAAAAoFzycp9Qfv5Tys0pLddWi401cHl87Ly8J5r3eaSyN1cfZXt53/O5c3n2vgaOAMQDAAAAAACwSFHhT1Ra+pRKSpgnVFz8s0m7ErH98WN+NR1oczlPxPeFhYbbCwt+oseluu+N9ykueiaO95hKxPELC80H8CUlujKUz6WlP8m6ynqbqE9pyc/670tLy4qS4iLV/uK8CwuemTwul1NcbP9rVF1APAAAAAAAALNwYH/tmhv17T+OGnzdm7p0G00XL96nHNXTehYDCQkZdPjwFRo0ZCat3XBAfF9Spqy42AyaNHklubkHy895uaUUHZ1Mx5wv0+KlaykxKU1vy0IiMjKZps9YQ1837C+ZOWsdxcWllhEZj/Kf0ZZtB2jrjuOyvlyXufM20lff9qYvvuxNU6evoZCQGGH7TH6fk11I+/efpcbN+lHtz7p

In [63]:
url = 'https://api.openai.com/v1/chat/completions'

headers = {
    'Content-Type': 'application/json',
    'Authorization': f'Bearer {apikey}'
}

data = {
    "model": "gpt-4-turbo",
    "response_format": { "type": "json_object" },
    "messages": [
    {
        "role": "user",
        "content": [
        {
            "type": "text",
            "text": "Extract the invoice identifier and the total amount without money symbol. Output must be in a json structure with the following keys: identifier and amount."
        },
        {
            "type": "image_url",
            "image_url": {
            "url": f"data:image/jpeg;base64,{png_base64}"
            }
        }
        ]
    }
    ]
}

In [64]:
response = requests.post(url=url, headers=headers, json=data)
response.text

'{\n  "id": "chatcmpl-9hh63QaKMxEQRvhvwX9UrXDK2RYdw",\n  "object": "chat.completion",\n  "created": 1720200795,\n  "model": "gpt-4-turbo-2024-04-09",\n  "choices": [\n    {\n      "index": 0,\n      "message": {\n        "role": "assistant",\n        "content": "{\\n  \\"identifier\\": \\"INVO-005\\",\\n  \\"amount\\": \\"425\\"\\n}"\n      },\n      "logprobs": null,\n      "finish_reason": "stop"\n    }\n  ],\n  "usage": {\n    "prompt_tokens": 1140,\n    "completion_tokens": 19,\n    "total_tokens": 1159\n  },\n  "system_fingerprint": "fp_7d35882d38"\n}\n'

In [66]:
json_data = response.json()
json_data

{'id': 'chatcmpl-9hh63QaKMxEQRvhvwX9UrXDK2RYdw',
 'object': 'chat.completion',
 'created': 1720200795,
 'model': 'gpt-4-turbo-2024-04-09',
 'choices': [{'index': 0,
   'message': {'role': 'assistant',
    'content': '{\n  "identifier": "INVO-005",\n  "amount": "425"\n}'},
   'logprobs': None,
   'finish_reason': 'stop'}],
 'usage': {'prompt_tokens': 1140,
  'completion_tokens': 19,
  'total_tokens': 1159},
 'system_fingerprint': 'fp_7d35882d38'}

In [67]:
content = json_data['choices'][0]['message']['content'] if 'choices' in json_data else None
cleaned = json.loads(content)
cleaned

{'identifier': 'INVO-005', 'amount': '425'}