In [2]:
import pandas as pd
import numpy as np
# pd.set_option('display.height', 1000)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('max_colwidth', 200)
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import os

os.environ["ANTHROPIC_API_KEY"] = "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"


def df_stats(df):
    from tabulate import tabulate
    print("\n***** Shape: ", df.shape," *****\n")
    
    columns_list = df.columns.values.tolist()
    isnull_list = df.isnull().sum().values.tolist()
    isunique_list = df.nunique().values.tolist()
    dtypes_list = df.dtypes.tolist()
    
    list_stat_val = list(zip(columns_list, isnull_list, isunique_list, dtypes_list))
    df_stat_val = pd.DataFrame(list_stat_val, columns=['Name', 'Null', 'Unique', 'Dtypes'])
    print(tabulate(df_stat_val, headers='keys', tablefmt='psql'))
    return df.head()

  from IPython.core.display import display, HTML


In [16]:
from dotenv import load_dotenv
from llama_index.multi_modal_llms.anthropic import AnthropicMultiModal

load_dotenv()
anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY")

# Initiated Anthropic MultiModal class
anthropic_mm_llm = AnthropicMultiModal(
    max_tokens=300,
    model="claude-3-sonnet-20240229",
    anthropic_api_key=anthropic_api_key,
)

from llama_index.core.multi_modal_llms.generic_utils import load_image_urls
from llama_index.core import SimpleDirectoryReader

# load image documents from urls
image_documents = load_image_urls(["https://i.ibb.co/vmGcz7Z/IMG-0387.jpg"])



In [18]:
import re
from pydantic import BaseModel, Field
from datetime import datetime
from typing import Optional, List

class PersonInfo(BaseModel):
    first_name: str
    last_name: str
    date_of_birth: datetime
    gender: Optional[str] = None
    ramq: str = Field(..., pattern=r"^[A-Z]{4}\d{8}$", description="RAMQ number in format AAAA00000000")


def extract_person_info(text: str) -> Optional[PersonInfo]:
    # Parse the JSON response
    import json
    data = json.loads(text)

    # Extract date of birth
    dob = datetime.strptime(data['date_of_birth'], "%Y/%m/%d")

    # Extract gender based on RAMQ
    gender = None
    if 'ramq' in data:
        gender_digit = int(data['ramq'][6])
        if gender_digit in [5, 6]:
            gender = "female"
        elif gender_digit in [0, 1]:
            gender = "male"

    return PersonInfo(
        first_name=data['first_name'],
        last_name=data['last_name'],
        date_of_birth=dob,
        gender=gender,
        ramq=data['ramq'],
    )


response = anthropic_mm_llm.complete(
    prompt="Extract the person's full name, date of birth, gender, and RAMQ number from the image and output as JSON using keys first_name, last_name, date_of_birth in %Y/%m/%d format and ramq which should have 4 letters and 8 digits. Make sure you get the right answer in JSON. Do not be verbose.",
    image_documents=image_documents,
)
print(response)

try:
    person_info = extract_person_info(response.text)
    if person_info:
        print(person_info)
    else:
        print("Could not extract all required information. ")
except Exception as e:
    print(f"Error processing response: {e}")
    print("Raw response:", response.text)

{
  "first_name": "Jacques",
  "last_name": "Soulieres",
  "date_of_birth": "1945/01/17",
  "ramq": "SOUJ45011713"
}
first_name='Jacques' last_name='Soulieres' date_of_birth=datetime.datetime(1945, 1, 17, 0, 0) gender='male' ramq='SOUJ45011713'


In [1]:
!python3 main.py "https://i.ibb.co/vmGcz7Z/IMG-0387.jpg"

RAMQ: ('first_name', 'Jacques')
Last Name: ('last_name', 'Soulieres')
First Name: ('date_of_birth', datetime.datetime(1945, 1, 17, 0, 0))
Date of Birth: ('gender', 'male')
Gender: ('ramq', 'SOUJ45011713')
