In [1]:
from typing import Dict, Any, Optional
import os
import json
import base64
from pathlib import Path
from io import BytesIO
import re

from mistralai import Mistral
from mistralai.client import MistralClient
from mistralai.models import File

from dotenv import load_dotenv

In [2]:
load_dotenv()

True

In [4]:
client = Mistral(api_key=os.getenv('MISTRAL_API_KEY'))

In [5]:
def _encode_image(image_path: str) -> str:
    """
    Encode an image file to base64 string.
    
    Args:
        image_path: Path to the image file
        
    Returns:
        Base64 encoded string of the image
    """
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

In [6]:
image_path = "tests/test_data/IMG_1356.jpeg"

In [7]:
os.path.isfile(image_path)

True

In [8]:
ocr_response = client.ocr.process(
    model="mistral-ocr-latest",
    include_image_base64=True,
    document={
        "type": "image_url",
        "image_url": f"data:image/jpg;base64,{_encode_image(image_path)}"
    }
)

In [9]:
ocr_response.pages[0].markdown

"# GROCERY \n\n284061239 FAIRLIFE\nNF \\$19.96\n4 @ \\$4.99 ea\n284031158 GG EGGS\nNF \\$7.39\n266010019 GG CITRUS\nNF \\$4.99\n071051828 WAVY LAYS\nNF \\$5.89\n071050637 LAY'S\nNF \\$5.98\n2 @ \\$2.99 ea\nRegular Price \\$3.99\nBOG050\\% Circle\nNON RETAIL\n004100019 TARGET BAG\nT P \\$0.00\n2 @ \\$0.00 ea\nBag Fee\n\\$0.20\nSUBTOTAL \\$44.41\nTOTAL \\$11.1"