In [None]:
from datetime import date
from google import genai
from google.genai import types

from pydantic import BaseModel, Field

from dotenv import load_dotenv
load_dotenv()

client = genai.Client()


class ZimbabweNationalRegistrationCard(BaseModel):
    id_number: str
    surname: str
    first_name: str
    date_of_birth: date
    village_of_origin: str | None
    place_of_birth: str
    date_of_issue: date

    class Config:
        json_encoders = {
            date: lambda v: v.strftime('%d/%m/%Y')
        }


from pydantic import BaseModel
from datetime import date
from typing import Optional

class AgentDetails(BaseModel):
    full_name_and_surname: str
    agent_number: str

class NextOfKinDetails(BaseModel):
    full_name_and_surname: str
    contact_number: str

class InsuredDetails(BaseModel):
    full_name_and_surname: str
    title: str
    id_number: str
    contact_number: str
    residential_address: str
    gender: str
    date_of_birth: date
    email_address: Optional[str]
    next_of_kin_details: NextOfKinDetails

class BankDetails(BaseModel):
    bank_name: str
    account_number: str
    branch_code: str
    date_of_birth: date  # seems unusual here, but as per image

class InsuranceDocument(BaseModel):
    agent_details: AgentDetails
    insured_details: InsuredDetails
    bank_details: BankDetails

    class Config:
        json_encoders = {
            date: lambda v: v.strftime('%d-%m-%Y')
        }

with open('images/form1.jpg', 'rb') as f:
    image_bytes = f.read()

response = client.models.generate_content(
    model='gemini-2.5-flash',
    contents=[
        types.Part.from_bytes(
            data=image_bytes,
            mime_type='image/jpeg',
        ),
        'Extract that information?'
    ],
    config={
        "response_mime_type": "application/json",
        "response_schema": InsuranceDocument,
    },
)

# print(response.text)
card_info = response.parsed
print(card_info.model_dump_json(indent=4))

### ID

In [None]:
"""
This code sample shows Prebuilt ID Document operations with the Azure AI Document Intelligence client library.
The async versions of the samples require Python 3.8 or later.

To learn more, please visit the documentation - Quickstart: Document Intelligence (formerly Form Recognizer) SDKs
https://learn.microsoft.com/azure/ai-services/document-intelligence/quickstarts/get-started-sdks-rest-api?pivots=programming-language-python
"""

from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeDocumentRequest, DocumentAnalysisFeature

from dotenv import load_dotenv
import os
import base64

load_dotenv()


endpoint = os.getenv("AZURE_ENDPOINT")
key = os.getenv("AZURE_KEY")

# sample document
formUrl = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/DriverLicense.png"

document_intelligence_client  = DocumentIntelligenceClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
)

# poller = document_intelligence_client.begin_analyze_document(
#     "prebuilt-idDocument", AnalyzeDocumentRequest(url_source=formUrl)
# )

local_image_path = r'D:\D_Documents\Dataal Africa\Ops Code\InsureFlow\Backend\images\id5.jpg'
with open(local_image_path, "rb") as f:
        image_data = f.read()
        base64_image = base64.b64encode(image_data).decode("utf-8")

poller = document_intelligence_client.begin_analyze_document(
    "prebuilt-idDocument",
    body={"base64Source": base64_image},
    features=[DocumentAnalysisFeature.QUERY_FIELDS],    # Specify which add-on capabilities to enable.
    query_fields=["PlaceOfBirth", "VillageOfOrigin"]
)


In [None]:
id_documents = poller.result()

for idx, id_document in enumerate(id_documents.documents):
    print("--------Recognizing ID document #{}--------".format(idx + 1))
    first_name = id_document.fields.get("FirstName")
    if first_name:
        print(
            "First Name: {} has confidence: {}".format(
                first_name.value_string, first_name.confidence
            )
        )
    last_name = id_document.fields.get("LastName")
    if last_name:
        print(
            "Last Name: {} has confidence: {}".format(
                last_name.value_string, last_name.confidence
            )
        )
    document_number = id_document.fields.get("DocumentNumber")
    if document_number:
        print(
            "Document Number: {} has confidence: {}".format(
                document_number.value_string, document_number.confidence
            )
        )
    dob = id_document.fields.get("DateOfBirth")
    if dob:
        print(
            "Date of Birth: {} has confidence: {}".format(dob.value_date, dob.confidence)
        )
    doe = id_document.fields.get("DateOfExpiration")
    if doe:
        print(
            "Date of Expiration: {} has confidence: {}".format(
                doe.value_date, doe.confidence
            )
        )
    pob = id_document.fields.get("PlaceOfBirth")
    if pob:
        print("Place Of Birth: {} has confidence: {}".format(pob.content, pob.confidence))
    
    voo = id_document.fields.get("VillageOfOrigin")
    if voo:
        print(
            "Village Of Origin: {} has confidence: {}".format(
                voo.value_string, voo.confidence
            )
        )


In [None]:
result = poller.result()
for page in result.pages:
        print(f"----Analyzing layout from page #{page.page_number}----")
        print(
            f"Page has width: {page.width} and height: {page.height}, measured with unit: {page.unit}"
        )

ID 2

In [1]:
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeDocumentRequest, DocumentAnalysisFeature

from dotenv import load_dotenv
import os
import base64

load_dotenv()


endpoint = os.getenv("AZURE_ENDPOINT")
key = os.getenv("AZURE_KEY")


document_intelligence_client  = DocumentIntelligenceClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
)

# poller = document_intelligence_client.begin_analyze_document(
#     "prebuilt-idDocument", AnalyzeDocumentRequest(url_source=formUrl)
# )

local_image_path = r'D:\D_Documents\Dataal Africa\Ops Code\InsureFlow\Backend\images\id1.jpg'
with open(local_image_path, "rb") as f:
        image_data = f.read()
        base64_image = base64.b64encode(image_data).decode("utf-8")

poller = document_intelligence_client.begin_analyze_document(
    "Zimbabwe_National_ID_Extractor_v1",
    body={"base64Source": base64_image}
)


In [None]:
id_documents = poller.result()

for idx, id_document in enumerate(id_documents.documents):
    print("--------Recognizing ID document #{}--------".format(idx + 1))
    first_name = id_document.fields.get("FirstName")
    if first_name:
        print(
            "First Name: {} has confidence: {}".format(
                first_name.value_string, first_name.confidence
            )
        )
    last_name = id_document.fields.get("LastName")
    if last_name:
        print(
            "Last Name: {} has confidence: {}".format(
                last_name.value_string, last_name.confidence
            )
        )
    document_number = id_document.fields.get("IdNumber")
    if document_number:
        print(
            "Document Number: {} has confidence: {}".format(
                document_number.value_string, document_number.confidence
            )
        )
    dob = id_document.fields.get("DateOfBirth")
    if dob:
        print(
            "Date of Birth: {} has confidence: {}".format(dob.value_string, dob.confidence)
        )
    doe = id_document.fields.get("DateOfExpiration")
    if doe:
        print(
            "Date of Expiration: {} has confidence: {}".format(
                doe.value_string, doe.confidence
            )
        )
    pob = id_document.fields.get("PlaceOfBirth")
    if pob:
        print("Place Of Birth: {} has confidence: {}".format(pob.content, pob.confidence))
    
    voo = id_document.fields.get("VillageOfOrigin")
    if voo:
        print(
            "Village Of Origin: {} has confidence: {}".format(
                voo.value_string, voo.confidence
            )
        )


In [None]:
id_documents.documents

## Claim Form

In [6]:
from PIL import Image

# Load images
local_image_path_11 = r"D:\D_Pictures\Project Logos\MyShopLogoDark.jpg"
local_image_path_12 = r"D:\D_Pictures\Project Logos\MyShopLogoDark.jpg"


# Load images
image_11 = Image.open(local_image_path_11)
image_12 = Image.open(local_image_path_12)

# Save images as PDF
combined_image_path = r'D:\D_Documents\Dataal Africa\Ops Code\InsureFlow\Experiments\images\form\combined.pdf'
image_11.save(combined_image_path, "PDF", save_all=True, append_images=[image_12])

# Analyze document
with open(combined_image_path, "rb") as f:
    combined_image_data = f.read()
    base64_combined_image = base64.b64encode(combined_image_data).decode("utf-8")

poller = document_intelligence_client.begin_analyze_document(
    "Temporary_Loss_Of_Income_Protection_Proposal_Form_Extractor_v1",
    body={"base64Source": base64_combined_image}
)

In [7]:
id_documents = poller.result()

for idx, id_document in enumerate(id_documents.documents):
    print("--------Recognizing ID document #{}--------".format(idx + 1))

    # Extract fields
    agent_full_name = id_document.fields.get("AgentFullName")
    agent_number = id_document.fields.get("AgentNumber")
    full_name = id_document.fields.get("FullName")
    title = id_document.fields.get("Title")
    gender = id_document.fields.get("Gender")
    id_number = id_document.fields.get("IdNumber")
    date_of_birth = id_document.fields.get("DateOfBirth")
    contact_number = id_document.fields.get("ContactNumber")
    email = id_document.fields.get("Email")
    address = id_document.fields.get("Address")
    nok_full_name = id_document.fields.get("NOK_FullName")
    nok_contact_number = id_document.fields.get("NOK_ContactNumber")
    bank_name = id_document.fields.get("BankName")
    b_date_of_birth = id_document.fields.get("B_DateOfBirth")
    account_number = id_document.fields.get("AccountNumber")
    branch_code = id_document.fields.get("BranchCode")
    existing_insurance_with_other_company = id_document.fields.get("ExistingInsuranceWithOtherCompany")
    existing_chronic_condition = id_document.fields.get("ExistingChronicCondition")
    claim_ailment = id_document.fields.get("ClaimAilment")
    claim_amount = id_document.fields.get("ClaimAmount")
    declined_coverage = id_document.fields.get("DeclinedCoverage")
    declined_cover_reason = id_document.fields.get("DeclinedCoverReason")
    dependents = id_document.fields.get("Dependents")

    # Print extracted fields
    print("Agent Full Name: {} has confidence: {}".format(agent_full_name.value_string, agent_full_name.confidence))
    print("Agent Number: {} has confidence: {}".format(agent_number.value_string, agent_number.confidence))
    print("Full Name: {} has confidence: {}".format(full_name.value_string, full_name.confidence))
    print("Title: {} has confidence: {}".format(title.value_string, title.confidence))
    print("Gender: {} has confidence: {}".format(gender.value_string, gender.confidence))
    print("ID Number: {} has confidence: {}".format(id_number.value_string, id_number.confidence))
    print("Date of Birth: {} has confidence: {}".format(date_of_birth.value_string, date_of_birth.confidence))
    print("Contact Number: {} has confidence: {}".format(contact_number.value_string, contact_number.confidence))
    print("Email: {} has confidence: {}".format(email.value_string, email.confidence))
    print("Address: {} has confidence: {}".format(address.value_string, address.confidence))
    print("NOK Full Name: {} has confidence: {}".format(nok_full_name.value_string, nok_full_name.confidence))
    print("NOK Contact Number: {} has confidence: {}".format(nok_contact_number.value_string, nok_contact_number.confidence))
    print("Bank Name: {} has confidence: {}".format(bank_name.value_string, bank_name.confidence))
    print("Beneficiary Date of Birth: {} has confidence: {}".format(b_date_of_birth.value_string, b_date_of_birth.confidence))
    print("Account Number: {} has confidence: {}".format(account_number.value_string, account_number.confidence))
    print("Branch Code: {} has confidence: {}".format(branch_code.value_string, branch_code.confidence))
    print("Existing Insurance With Other Company: {} has confidence: {}".format(existing_insurance_with_other_company.value_string, existing_insurance_with_other_company.confidence))
    print("Existing Chronic Condition: {} has confidence: {}".format(existing_chronic_condition.value_string, existing_chronic_condition.confidence))
    print("Claim Ailment: {} has confidence: {}".format(claim_ailment.value_string, claim_ailment.confidence))
    print("Claim Amount: {} has confidence: {}".format(claim_amount.value_string, claim_amount.confidence))
    print("Declined Coverage: {} has confidence: {}".format(declined_coverage.value_selection_mark, declined_coverage.confidence))
    print("Declined Cover Reason: {} has confidence: {}".format(declined_cover_reason.value_string, declined_cover_reason.confidence))

    if dependents and dependents.value_array:
        print("Dependents:")
        for dependent in dependents.value_array:
            if dependent.value_object:
                dependent_full_name = dependent.value_object.get("FullName")
                dependent_id_number = dependent.value_object.get("IdNumber")
                dependent_date_of_birth = dependent.value_object.get("DateOfBirth")
                dependent_age = dependent.value_object.get("Age")
                dependent_gender = dependent.value_object.get("Gender")
                dependent_relationship = dependent.value_object.get("Relationship")

                if dependent_full_name:
                    print("  - Full Name: {} has confidence: {}".format(dependent_full_name.value_string, dependent_full_name.confidence))
                if dependent_id_number:
                    print("  - ID Number: {} has confidence: {}".format(dependent_id_number.value_string, dependent_id_number.confidence))
                if dependent_date_of_birth:
                    print("  - Date of Birth: {} has confidence: {}".format(dependent_date_of_birth.value_string, dependent_date_of_birth.confidence))
                if dependent_age:
                    print("  - Age: {} has confidence: {}".format(dependent_age.value_string, dependent_age.confidence))
                if dependent_gender:
                    print("  - Gender: {} has confidence: {}".format(dependent_gender.value_string, dependent_gender.confidence))
                if dependent_relationship:
                    print("  - Relationship: {} has confidence: {}".format(dependent_relationship.value_string, dependent_relationship.confidence))

--------Recognizing ID document #1--------
Agent Full Name: None has confidence: 0.938
Agent Number: None has confidence: 0.88
Full Name: None has confidence: 0.936
Title: None has confidence: 0.94
Gender: None has confidence: 0.954
ID Number: None has confidence: 0.916
Date of Birth: None has confidence: 0.874
Contact Number: None has confidence: 0.84
Email: None has confidence: 0.965
Address: None has confidence: 0.921
NOK Full Name: None has confidence: 0.939
NOK Contact Number: None has confidence: 0.841
Bank Name: None has confidence: 0.758
Beneficiary Date of Birth: None has confidence: 0.954
Account Number: None has confidence: 0.936
Branch Code: None has confidence: 0.933
Existing Insurance With Other Company: None has confidence: 0.82
Existing Chronic Condition: None has confidence: 0.937
Claim Ailment: None has confidence: 0.931
Claim Amount: None has confidence: 0.875
Declined Coverage: None has confidence: 0.99
Declined Cover Reason: None has confidence: 0.916


In [8]:
declined_coverage.value_string

In [9]:
id_document.confidence

0.001

## Freehand

In [None]:
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeDocumentRequest
import numpy as np

from dotenv import load_dotenv
import os
import base64

load_dotenv()


endpoint = os.getenv("AZURE_ENDPOINT")
key = os.getenv("AZURE_KEY")


def format_bounding_box(bounding_box):
    if not bounding_box:
        return "N/A"
    reshaped_bounding_box = np.array(bounding_box).reshape(-1, 2)
    return ", ".join(["[{}, {}]".format(x, y) for x, y in reshaped_bounding_box])

def analyze_read():
    # sample document
    # formUrl = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/sample-layout.pdf"

    document_intelligence_client  = DocumentIntelligenceClient(
        endpoint=endpoint, credential=AzureKeyCredential(key)
    )
    
    local_image_path = r'D:\D_Documents\Dataal Africa\Ops Code\InsureFlow\Experiments\images\form\free tests\Handwriting-test-dataset-for-OCR-operation.png'
    with open(local_image_path, "rb") as f:
            image_data = f.read()
            base64_image = base64.b64encode(image_data).decode("utf-8")
    
    poller = document_intelligence_client.begin_analyze_document(
        "prebuilt-read", body={"base64Source": base64_image}
    )
    result = poller.result()

    print ("Document contains content: ", result.content)

    for idx, style in enumerate(result.styles):
        print(
            "Document contains {} content".format(
                "handwritten" if style.is_handwritten else "no handwritten"
            )
        )

    for page in result.pages:
        print("----Analyzing Read from page #{}----".format(page.page_number))
        print(
            "Page has width: {} and height: {}, measured with unit: {}".format(
                page.width, page.height, page.unit
            )
        )

        for line_idx, line in enumerate(page.lines):
            print(
                "...Line # {} has text content '{}' within bounding box '{}'".format(
                    line_idx,
                    line.content,
                    format_bounding_box(line.polygon),
                )
            )

        for word in page.words:
            print(
                "...Word '{}' has a confidence of {}".format(
                    word.content, word.confidence
                )
            )

    print("----------------------------------------")


if __name__ == "__main__":
    analyze_read()


NameError: name 'base64_combined_image' is not defined