In [None]:
from datetime import date
from google import genai
from google.genai import types

from pydantic import BaseModel, Field

from dotenv import load_dotenv
load_dotenv()

client = genai.Client()


class ZimbabweNationalRegistrationCard(BaseModel):
    id_number: str
    surname: str
    first_name: str
    date_of_birth: date
    village_of_origin: str | None
    place_of_birth: str
    date_of_issue: date

    class Config:
        json_encoders = {
            date: lambda v: v.strftime('%d/%m/%Y')
        }


from pydantic import BaseModel
from datetime import date
from typing import Optional

class AgentDetails(BaseModel):
    full_name_and_surname: str
    agent_number: str

class NextOfKinDetails(BaseModel):
    full_name_and_surname: str
    contact_number: str

class InsuredDetails(BaseModel):
    full_name_and_surname: str
    title: str
    id_number: str
    contact_number: str
    residential_address: str
    gender: str
    date_of_birth: date
    email_address: Optional[str]
    next_of_kin_details: NextOfKinDetails

class BankDetails(BaseModel):
    bank_name: str
    account_number: str
    branch_code: str
    date_of_birth: date  # seems unusual here, but as per image

class InsuranceDocument(BaseModel):
    agent_details: AgentDetails
    insured_details: InsuredDetails
    bank_details: BankDetails

    class Config:
        json_encoders = {
            date: lambda v: v.strftime('%d-%m-%Y')
        }

with open('images/form1.jpg', 'rb') as f:
    image_bytes = f.read()

response = client.models.generate_content(
    model='gemini-2.5-flash',
    contents=[
        types.Part.from_bytes(
            data=image_bytes,
            mime_type='image/jpeg',
        ),
        'Extract that information?'
    ],
    config={
        "response_mime_type": "application/json",
        "response_schema": InsuranceDocument,
    },
)

# print(response.text)
card_info = response.parsed
print(card_info.model_dump_json(indent=4))

### ID

In [30]:
"""
This code sample shows Prebuilt ID Document operations with the Azure AI Document Intelligence client library.
The async versions of the samples require Python 3.8 or later.

To learn more, please visit the documentation - Quickstart: Document Intelligence (formerly Form Recognizer) SDKs
https://learn.microsoft.com/azure/ai-services/document-intelligence/quickstarts/get-started-sdks-rest-api?pivots=programming-language-python
"""

from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeDocumentRequest, DocumentAnalysisFeature

from dotenv import load_dotenv
import os
import base64

load_dotenv()


endpoint = os.getenv("AZURE_ENDPOINT")
key = os.getenv("AZURE_KEY")

# sample document
formUrl = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/DriverLicense.png"

document_intelligence_client  = DocumentIntelligenceClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
)

# poller = document_intelligence_client.begin_analyze_document(
#     "prebuilt-idDocument", AnalyzeDocumentRequest(url_source=formUrl)
# )

local_image_path = r'D:\D_Documents\Dataal Africa\Ops Code\InsureFlow\Backend\images\id5.jpg'
with open(local_image_path, "rb") as f:
        image_data = f.read()
        base64_image = base64.b64encode(image_data).decode("utf-8")

poller = document_intelligence_client.begin_analyze_document(
    "prebuilt-idDocument",
    body={"base64Source": base64_image},
    features=[DocumentAnalysisFeature.QUERY_FIELDS],    # Specify which add-on capabilities to enable.
    query_fields=["PlaceOfBirth", "VillageOfOrigin"]
)


In [31]:
id_documents = poller.result()

for idx, id_document in enumerate(id_documents.documents):
    print("--------Recognizing ID document #{}--------".format(idx + 1))
    first_name = id_document.fields.get("FirstName")
    if first_name:
        print(
            "First Name: {} has confidence: {}".format(
                first_name.value_string, first_name.confidence
            )
        )
    last_name = id_document.fields.get("LastName")
    if last_name:
        print(
            "Last Name: {} has confidence: {}".format(
                last_name.value_string, last_name.confidence
            )
        )
    document_number = id_document.fields.get("DocumentNumber")
    if document_number:
        print(
            "Document Number: {} has confidence: {}".format(
                document_number.value_string, document_number.confidence
            )
        )
    dob = id_document.fields.get("DateOfBirth")
    if dob:
        print(
            "Date of Birth: {} has confidence: {}".format(dob.value_date, dob.confidence)
        )
    doe = id_document.fields.get("DateOfExpiration")
    if doe:
        print(
            "Date of Expiration: {} has confidence: {}".format(
                doe.value_date, doe.confidence
            )
        )
    pob = id_document.fields.get("PlaceOfBirth")
    if pob:
        print("Place Of Birth: {} has confidence: {}".format(pob.content, pob.confidence))
    
    voo = id_document.fields.get("VillageOfOrigin")
    if voo:
        print(
            "Village Of Origin: {} has confidence: {}".format(
                voo.value_string, voo.confidence
            )
        )


--------Recognizing ID document #1--------
First Name: CHIMUNDEGE EXPETROLLECADOLATOR has confidence: 0.179
Last Name: CHIMUNDEGE has confidence: 0.541
Document Number: 45- 190221 E 45 has confidence: 0.503
Date of Birth: 1994-04-03 has confidence: 0.584
Place Of Birth: KAJOKOTO has confidence: 0.443
Village Of Origin: MOUNT DARWIN has confidence: 0.981


In [14]:
result = poller.result()
for page in result.pages:
        print(f"----Analyzing layout from page #{page.page_number}----")
        print(
            f"Page has width: {page.width} and height: {page.height}, measured with unit: {page.unit}"
        )

----Analyzing layout from page #1----
Page has width: 296.0 and height: 170.0, measured with unit: LengthUnit.PIXEL


ID 2

In [6]:
"""
This code sample shows Prebuilt ID Document operations with the Azure AI Document Intelligence client library.
The async versions of the samples require Python 3.8 or later.

To learn more, please visit the documentation - Quickstart: Document Intelligence (formerly Form Recognizer) SDKs
https://learn.microsoft.com/azure/ai-services/document-intelligence/quickstarts/get-started-sdks-rest-api?pivots=programming-language-python
"""

from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeDocumentRequest, DocumentAnalysisFeature

from dotenv import load_dotenv
import os
import base64

load_dotenv()


endpoint = os.getenv("AZURE_ENDPOINT")
key = os.getenv("AZURE_KEY")

# sample document
formUrl = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/DriverLicense.png"

document_intelligence_client  = DocumentIntelligenceClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
)

# poller = document_intelligence_client.begin_analyze_document(
#     "prebuilt-idDocument", AnalyzeDocumentRequest(url_source=formUrl)
# )

local_image_path = r'D:\D_Documents\Dataal Africa\Ops Code\InsureFlow\Backend\images\id1.jpg'
with open(local_image_path, "rb") as f:
        image_data = f.read()
        base64_image = base64.b64encode(image_data).decode("utf-8")

poller = document_intelligence_client.begin_analyze_document(
    "Zimbabwe_National_ID_Extractor_v1",
    body={"base64Source": base64_image}
)


In [10]:
id_documents = poller.result()

for idx, id_document in enumerate(id_documents.documents):
    print("--------Recognizing ID document #{}--------".format(idx + 1))
    first_name = id_document.fields.get("FirstName")
    if first_name:
        print(
            "First Name: {} has confidence: {}".format(
                first_name.value_string, first_name.confidence
            )
        )
    last_name = id_document.fields.get("LastName")
    if last_name:
        print(
            "Last Name: {} has confidence: {}".format(
                last_name.value_string, last_name.confidence
            )
        )
    document_number = id_document.fields.get("IdNumber")
    if document_number:
        print(
            "Document Number: {} has confidence: {}".format(
                document_number.value_string, document_number.confidence
            )
        )
    dob = id_document.fields.get("DateOfBirth")
    if dob:
        print(
            "Date of Birth: {} has confidence: {}".format(dob.value_string, dob.confidence)
        )
    doe = id_document.fields.get("DateOfExpiration")
    if doe:
        print(
            "Date of Expiration: {} has confidence: {}".format(
                doe.value_string, doe.confidence
            )
        )
    pob = id_document.fields.get("PlaceOfBirth")
    if pob:
        print("Place Of Birth: {} has confidence: {}".format(pob.content, pob.confidence))
    
    voo = id_document.fields.get("VillageOfOrigin")
    if voo:
        print(
            "Village Of Origin: {} has confidence: {}".format(
                voo.value_string, voo.confidence
            )
        )


--------Recognizing ID document #1--------
First Name: ANDREW WILLIAM L has confidence: 0.586
Last Name: ROBERTS has confidence: 0.878
Document Number: 75- 191961 R 00 has confidence: 0.866
Date of Birth: 17/11/1959 has confidence: 0.902
Place Of Birth: HARARE has confidence: 0.882
Village Of Origin: None has confidence: 0.973


In [9]:
id_documents.documents

[{'docType': 'Zimbabwe_National_ID_Extractor_v1', 'boundingRegions': [{'pageNumber': 1, 'polygon': [0, 0, 296, 0, 296, 170, 0, 170]}], 'fields': {'IdNumber': {'type': 'string', 'valueString': '75- 191961 R 00', 'content': '75- 191961 R 00', 'boundingRegions': [{'pageNumber': 1, 'polygon': [144, 33, 222, 33, 222, 43, 144, 43]}], 'confidence': 0.866, 'spans': [{'offset': 52, 'length': 15}]}, 'LastName': {'type': 'string', 'valueString': 'ROBERTS', 'content': 'ROBERTS', 'boundingRegions': [{'pageNumber': 1, 'polygon': [144, 46, 196, 45, 196, 56, 144, 56]}], 'confidence': 0.878, 'spans': [{'offset': 82, 'length': 7}]}, 'FirstName': {'type': 'string', 'valueString': 'ANDREW WILLIAM L', 'content': 'ANDREW WILLIAM L', 'boundingRegions': [{'pageNumber': 1, 'polygon': [144, 58, 248, 58, 248, 69, 144, 69]}], 'confidence': 0.586, 'spans': [{'offset': 101, 'length': 16}]}, 'DateOfBirth': {'type': 'string', 'valueString': '17/11/1959', 'content': '17/11/1959', 'boundingRegions': [{'pageNumber': 1, 