In [7]:
import sys
from dotenv import load_dotenv

sys.path.append("..")

from karanta.llm_clients.azure_client import AzureOPENAILLM
from karanta.data.process_pdf_utils import render_pdf_to_base64png
from karanta.constants import TARGET_IMAGE_DIM
from karanta.data.utils import load_prompt_template, create_vision_message

load_dotenv()

True

In [8]:
local_pdf_path = "/Users/odunayoogundepo/Desktop/test_images_karanta/pdf/no_type/complete-works_page_38.pdf"
page = 1

image_base64 = render_pdf_to_base64png(local_pdf_path, page, TARGET_IMAGE_DIM)

In [9]:
prompt_template = load_prompt_template(
    "text_present_prompt",
    "/Users/odunayoogundepo/Desktop/ocr_training/karanta-ocr/configs/prompts/create_tests.yaml",
)

In [None]:
def text_order_response_format() -> dict:
    return {
        "type": "json_schema",
        "json_schema": {
            "name": "text_order_response",
            "schema": {
                "type": "object",
                "properties": {
                    "tests": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "test_type": {
                                    "type": "string",
                                    "enum": ["text_order"],
                                    "description": "The type of test to be performed.",
                                },
                                "before": {
                                    "type": "string",
                                    "description": "The text that should appear before the target text.",
                                },
                                "after": {
                                    "type": "string",
                                    "description": "The text that should appear after the target text.",
                                },
                                "target": {
                                    "type": "string",
                                    "description": "The target text whose order is to be verified.",
                                },
                            },
                            "additionalProperties": False,
                            "required": ["test_type", "before", "after", "target"],
                        },
                        "description": "A list of tests to check for the presence of specific text in the image.",
                    }
                },
                "additionalProperties": False,
                "required": ["tests"],
            },
            "strict": True,
        },
    }

In [15]:
def text_present_response_format() -> dict:
    return {
        "type": "json_schema",
        "json_schema": {
            "name": "text_present_response",
            "schema": {
                "type": "object",
                "properties": {
                    "tests": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "test_type": {
                                    "type": "string",
                                    "enum": ["text_present"],
                                    "description": "The type of test to be performed.",
                                },
                                "text": {
                                    "type": "string",
                                    "description": "The text that is present in the image.",
                                },
                                "case_sensitive": {
                                    "type": "boolean",
                                    "description": "Indicates whether the text matching should be case sensitive.",
                                },
                                "first_n": {
                                    "type": ["string", "null"],
                                    "description": "If provided, only the first N characters of the text should be considered for matching. If null, consider the full text.",
                                },
                                "last_n": {
                                    "type": ["string", "null"],
                                    "description": "If provided, only the last N characters of the text should be considered for matching. If null, consider the full text.",
                                },
                            },
                            "additionalProperties": False,
                            "required": [
                                "test_type",
                                "text",
                                "case_sensitive",
                                "first_n",
                                "last_n",
                            ],
                        },
                        "description": "A list of tests to check for the presence of specific text in the image.",
                    }
                },
                "additionalProperties": False,
                "required": ["tests"],
            },
            "strict": True,
        },
    }

In [None]:
def text_absent_response_format() -> dict:
    return {
        "type": "json_schema",
        "json_schema": {
            "name": "text_absent_response",
            "schema": {
                "type": "object",
                "properties": {
                    "tests": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "test_type": {
                                    "type": "string",
                                    "enum": ["text_present"],
                                    "description": "The type of test to be performed.",
                                },
                                "text": {
                                    "type": "string",
                                    "description": "The text that is present in the image.",
                                },
                                "case_sensitive": {
                                    "type": "boolean",
                                    "description": "Indicates whether the text matching should be case sensitive.",
                                },
                                "first_n": {
                                    "type": ["string", "null"],
                                    "description": "If provided, only the first N characters of the text should be considered for matching. If null, consider the full text.",
                                },
                                "last_n": {
                                    "type": ["string", "null"],
                                    "description": "If provided, only the last N characters of the text should be considered for matching. If null, consider the full text.",
                                },
                            },
                            "additionalProperties": False,
                            "required": [
                                "test_type",
                                "text",
                                "case_sensitive",
                                "first_n",
                                "last_n",
                            ],
                        },
                        "description": "A list of tests to check for the presence of specific text in the image.",
                    }
                },
                "additionalProperties": False,
                "required": ["tests"],
            },
            "strict": True,
        },
    }

In [16]:
client = AzureOPENAILLM("gpt-4.1")
messages = create_vision_message(prompt_template, None, image_base64)

# Make API call
response = await client.completion(
    [messages],  # Azure client expects nested structure
    text_present_response_format(),
    temperature=0.1,
    max_tokens=6000,
)

In [17]:
response

[ModelCompletion(generation={'tests': [{'text': 'SOCIETY OF YOUNG NIGERIAN WRITERS', 'case_sensitive': True, 'first_n': '10', 'last_n': None}, {'text': 'D.O. FAGUNWA CORRESPONDENCE COURSE', 'case_sensitive': True, 'first_n': '15', 'last_n': None}, {'text': 'BY E-MAIL AND POSTAL MAIL', 'case_sensitive': True, 'first_n': '20', 'last_n': None}, {'text': 'OGBOJU ODE NINU IGBO IRUNMOLE', 'case_sensitive': True, 'first_n': None, 'last_n': '20'}, {'text': 'Questions Set and Edited by: Wole Adedoyin', 'case_sensitive': True, 'first_n': None, 'last_n': '20'}]}, model='gpt-4o')]