In [1]:
import pandas as pd
import numpy as np
# pd.set_option('display.height', 1000)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('max_colwidth', 200)
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import os


def df_stats(df):
    from tabulate import tabulate
    print("\n***** Shape: ", df.shape," *****\n")
    
    columns_list = df.columns.values.tolist()
    isnull_list = df.isnull().sum().values.tolist()
    isunique_list = df.nunique().values.tolist()
    dtypes_list = df.dtypes.tolist()
    
    list_stat_val = list(zip(columns_list, isnull_list, isunique_list, dtypes_list))
    df_stat_val = pd.DataFrame(list_stat_val, columns=['Name', 'Null', 'Unique', 'Dtypes'])
    print(tabulate(df_stat_val, headers='keys', tablefmt='psql'))
    return df.head()

  from IPython.core.display import display, HTML


In [2]:
from dotenv import load_dotenv
from llama_index.multi_modal_llms.anthropic import AnthropicMultiModal

load_dotenv()
anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY")

# Initiated Anthropic MultiModal class
anthropic_mm_llm = AnthropicMultiModal(
    max_tokens=300,
    model="claude-3-opus-20240229",
    anthropic_api_key=anthropic_api_key,
)

from llama_index.core.multi_modal_llms.generic_utils import load_image_urls
from llama_index.core import SimpleDirectoryReader

# load image documents from urls
image_documents = load_image_urls(
    ["https://i.ibb.co/7Jm0KvX/IMG-2618.jpg","https://i.ibb.co/m92CFv2/IMG-2608.jpg", "https://i.ibb.co/3MdsCGv/IMG-2610.jpg", "https://i.ibb.co/vV9MCcj/IMG-2611.jpg", "https://i.ibb.co/1qjwRbb/Screenshot-2024-09-13-at-9-25-47-PM.png"]
)

In [3]:
from anthropic_vision_script import validate_ramq
validate_ramq("MORC64581817")

True

In [4]:
import anthropic_vision_script
from anthropic_vision_script import get_ramq as get_ramq_from_image
# Assuming you have a list of image URLs
image_urls = [
    "https://i.ibb.co/x3Y2Jjh/Screenshot-2024-09-20-at-3-22-48-PM.png",
    "https://i.ibb.co/GPTwzx5/Screenshot-2024-09-20-at-3-22-43-PM.png"
]

for url in image_urls:
    try:
        person_info = get_ramq_from_image(url, is_image=True)
        print(f"RAMQ: {person_info[0]}")
        print(f"Name: {person_info[2]} {person_info[1]}")
        print(
            f"Date of Birth: {person_info[3].date()}"
        )  # Use .date() to get only the date part
        print(f"RAMQ is valid: {person_info[5]}")
        print(f"MRN: {person_info[6]}")
        print("---")
    except Exception as e:
        print(f"Error processing image {url}: {str(e)}")
        print("---")

Message(id='msg_01GnvpA2u3d2zNST75HPVgzA', content=[TextBlock(text='{\n    "first_name": "Jacques",\n    "last_name": "Soulieres",\n    "ramq": "SOUJ45011713",\n    "mrn": "N52101"\n}', type='text')], model='claude-3-5-sonnet-20241022', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=Usage(input_tokens=558, output_tokens=54, cache_creation_input_tokens=0, cache_read_input_tokens=0))
{
    "first_name": "Jacques",
    "last_name": "Soulieres",
    "ramq": "SOUJ45011713",
    "mrn": "N52101"
}
RAMQ: SOUJ45011713
Name: Jacques Soulieres
Date of Birth: 1945-01-17
RAMQ is valid: True
MRN: N52101
---
Message(id='msg_014DebAjeCreYKCwKaDBMT5R', content=[TextBlock(text='{\n    "first_name": "Line",\n    "last_name": "Masson",\n    "ramq": "MASL64521523",\n    "mrn": "115997"\n}', type='text')], model='claude-3-5-sonnet-20241022', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=Usage(input_tokens=627, output_tokens=52, cache

In [8]:
import re
from pydantic import BaseModel, Field
from datetime import datetime
from typing import Optional, List

class PersonInfo(BaseModel):
    first_name: str
    last_name: str
    date_of_birth: datetime
    gender: Optional[str] = None
    ramq: str = Field(..., pattern=r"^[A-Z]{4}\d{8}$", description="RAMQ number should have 4 letters followed by 8 digits")


def extract_person_info(text: str) -> Optional[PersonInfo]:
    # Parse the JSON response
    import json
    data = json.loads(text)

    # Extract date of birth from RAMQ
    ramq = data['ramq']
    year = int(ramq[4:6])
    month = int(ramq[6:8])
    day = int(ramq[8:10])

    # Adjust year for century and ensure it is <= current year
    current_year = datetime.now().year
    if year > 50:
        year += 1900
    else:
        year += 2000

    if year > current_year:
        year -= 100

    # Adjust month for gender
    gender = None
    gender_digit = int(ramq[6])
    if gender_digit in [5, 6]:
        gender = "female"
        month -= 50
    elif gender_digit in [0, 1]:
        gender = "male"

    dob = datetime(year, month, day)

    return PersonInfo(
        first_name=data['first_name'],
        last_name=data['last_name'],
        date_of_birth=dob,
        gender=gender,
        ramq=data['ramq'],
    )


for idx, image_doc in enumerate(image_documents):
    response = anthropic_mm_llm.complete(
        prompt="Perform OCR. Extract the RAMQ number, which MUST have exactly 4 letters followed by exactly 8 digits, totaling 12 characters. Remove all spaces from RAMQ. The first 3 letters of RAMQ are the person's last name use that to look up the last name in the text. First name starts with the 4th letter of the RAMQ AND Should be a name! Extract the person's first name, last name, date of birth, and RAMQ number. Output as JSON with keys: 'first_name', 'last_name', and 'ramq'. Ensure the RAMQ is exactly 12 characters (4 letters + 8 digits). Double-check your output before responding. Do not be VERBOSE and DO NOT include any text outside the JSON object.",
        image_documents=[image_doc],
    )
    print(response)


    try:
        person_info = extract_person_info(response.text)
        if person_info:
            print(person_info)
        else:
            print("Could not extract all required information. ")
    except Exception as e:
        print(f"Error processing response: {e}")
        print("Raw response:", response.text)

{
  "first_name": "ALICE",
  "last_name": "PERREAULT",
  "ramq": "PERA51020264"
}
first_name='ALICE' last_name='PERREAULT' date_of_birth=datetime.datetime(1951, 2, 2, 0, 0) gender='male' ramq='PERA51020264'
{
  "first_name": "CLAUDETTE",
  "last_name": "GARAND",
  "dob": "1943-11-14",
  "ramq": "GARC43611416"
}
first_name='CLAUDETTE' last_name='GARAND' date_of_birth=datetime.datetime(1943, 11, 14, 0, 0) gender='female' ramq='GARC43611416'
{
  "first_name": "DENISE",
  "last_name": "DURAND",
  "ramq": "DURD42601912"
}
first_name='DENISE' last_name='DURAND' date_of_birth=datetime.datetime(1942, 10, 19, 0, 0) gender='female' ramq='DURD42601912'
{
  "first_name": "Line",
  "last_name": "Masson",
  "dob": "1964-02-15",
  "ramq": "MASL64521523"
}
first_name='Line' last_name='Masson' date_of_birth=datetime.datetime(1964, 2, 15, 0, 0) gender='female' ramq='MASL64521523'
{
  "last_name": "MEZOURI",
  "first_name": "LANOUARIA",
  "ramq": "MEZL69552812"
}
first_name='LANOUARIA' last_name='MEZOURI

In [7]:
import re
from pydantic import BaseModel, Field
from datetime import datetime
from typing import Optional, List



person_text = "Le paitent est Robert-Calin Avrma, né le 13 janvier 88, ramq AVRR13018805"
prompt = "Extract the person's full name, date of birth, gender, and RAMQ number from the text and output as JSON using keys first_name, last_name, and ramq which should have 4 letters and 8 digits. Make sure you get the right answer in JSON. Do not be verbose. Here is the text:" + person_text

response = anthropic_mm_llm.complete(
    prompt=prompt,
    image_documents=None,
)
print(response)

try:
    person_info = extract_person_info(response.text)
    if person_info:
        print(person_info)
    else:
        print("Could not extract all required information. ")
except Exception as e:
    print(f"Error processing response: {e}")
    print("Raw response:", response.text)


{
  "first_name": "Robert-Calin",
  "last_name": "Avrma",
  "date_of_birth": "1988-01-13",
  "gender": "male",
  "ramq": "AVRR13018805"
}
Error processing response: day is out of range for month
Raw response: {
  "first_name": "Robert-Calin",
  "last_name": "Avrma",
  "date_of_birth": "1988-01-13",
  "gender": "male",
  "ramq": "AVRR13018805"
}


In [11]:
import argparse
import re
from anthropic_vision_script import get_ramq

image_urls = [
    "https://i.ibb.co/7Jm0KvX/IMG-2618.jpg",
    "https://i.ibb.co/m92CFv2/IMG-2608.jpg",
    "https://i.ibb.co/3MdsCGv/IMG-2610.jpg", 
    "https://i.ibb.co/vV9MCcj/IMG-2611.jpg",
]

def process_url(url, is_image=True):
    result = get_ramq(url, is_image)
    if isinstance(result, tuple) and len(result) == 7:
        ramq, last_name, first_name, dob, gender, is_valid, mrn = result
        print(f"RAMQ: {ramq}")
        print(f"Last Name: {last_name}")
        print(f"First Name: {first_name}")
        print(f"Date of Birth: {dob}")
        print(f"Gender: {gender}")
        print(f"MRN: {mrn}")
        print(f"Valid RAMQ: {is_valid}")
    
for url in image_urls:
    process_url(url)

Message(id='msg_01K34ZNumvjN5EZLbnCgBoww', content=[TextBlock(text='{\n  "first_name": "Alice",\n  "last_name": "Perreault",\n  "ramq": "PERA51072611",\n  "mrn": "102687"\n}', type='text')], model='claude-3-5-sonnet-20241022', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=Usage(input_tokens=650, output_tokens=53, cache_creation_input_tokens=0, cache_read_input_tokens=0))
{
  "first_name": "Alice",
  "last_name": "Perreault",
  "ramq": "PERA51072611",
  "mrn": "102687"
}
RAMQ: PERA51072611
Last Name: Perreault
First Name: Alice
Date of Birth: 1951-07-26 00:00:00
Gender: male
MRN: 102687
Valid RAMQ: False
Message(id='msg_01A4U9sBFv2CPWVqh66Mkf5k', content=[TextBlock(text='{\n    "first_name": "CLAUDETTE",\n    "last_name": "GARAND",\n    "ramq": "GARC43611416",\n    "mrn": "1F04-0"\n}', type='text')], model='claude-3-5-sonnet-20241022', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=Usage(input_tokens=650, output_

### Patient list

In [None]:
from anthropic_vision_script import get_patient_list

# Test with text input
text_input = """
Patient List:
1. John Doe, Patient #12345, Room 101
2. Jane Smith, Patient #67890, Room 202
3. Bob Johnson
"""


patient_list = get_patient_list(text_input, is_image=False)
print("Patient List from Text:")
for patient in patient_list.patients:
    print(f"Name: {patient.first_name} {patient.last_name}")
    if patient.patient_number:
        print(f"Patient Number: {patient.patient_number}")
    if patient.room_number:
        print(f"Room Number: {patient.room_number}")
    print("---")


# Test with additional prompt
additional_prompt = "Also extract the patient's age if available."

try:
    patient_list = get_patient_list(
        image_url, is_image=True, additional_prompt=additional_prompt
    )
    print("Patient List with Additional Prompt:")
    for patient in patient_list.patients:
        print(f"Name: {patient.first_name} {patient.last_name}")
        if patient.patient_number:
            print(f"Patient Number: {patient.patient_number}")
        if patient.room_number:
            print(f"Room Number: {patient.room_number}")
        print("---")
except Exception as e:
    print(f"An error occurred: {str(e)}")