In [2]:
import pandas as pd
import numpy as np
# pd.set_option('display.height', 1000)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('max_colwidth', 200)
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import os


def df_stats(df):
    from tabulate import tabulate
    print("\n***** Shape: ", df.shape," *****\n")
    
    columns_list = df.columns.values.tolist()
    isnull_list = df.isnull().sum().values.tolist()
    isunique_list = df.nunique().values.tolist()
    dtypes_list = df.dtypes.tolist()
    
    list_stat_val = list(zip(columns_list, isnull_list, isunique_list, dtypes_list))
    df_stat_val = pd.DataFrame(list_stat_val, columns=['Name', 'Null', 'Unique', 'Dtypes'])
    print(tabulate(df_stat_val, headers='keys', tablefmt='psql'))
    return df.head()

  from IPython.core.display import display, HTML


In [None]:
from dotenv import load_dotenv
from llama_index.multi_modal_llms.anthropic import AnthropicMultiModal

load_dotenv()
anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY")

# Initiated Anthropic MultiModal class
anthropic_mm_llm = AnthropicMultiModal(
    max_tokens=300,
    model="claude-3-opus-20240229",
    anthropic_api_key=anthropic_api_key,
)

from llama_index.core.multi_modal_llms.generic_utils import load_image_urls
from llama_index.core import SimpleDirectoryReader

# load image documents from urls
image_documents = load_image_urls(
    ["https://i.ibb.co/7Jm0KvX/IMG-2618.jpg","https://i.ibb.co/m92CFv2/IMG-2608.jpg", "https://i.ibb.co/3MdsCGv/IMG-2610.jpg", "https://i.ibb.co/vV9MCcj/IMG-2611.jpg", "https://i.ibb.co/1qjwRbb/Screenshot-2024-09-13-at-9-25-47-PM.png"]
)



In [None]:

import anthropic_vision_script
from anthropic_vision_script import get_ramq as get_ramq_from_image
# Assuming you have a list of image URLs
image_urls = [
    "https://i.ibb.co/x3Y2Jjh/Screenshot-2024-09-20-at-3-22-48-PM.png",
    "https://i.ibb.co/GPTwzx5/Screenshot-2024-09-20-at-3-22-43-PM.png"
]

for url in image_urls:
    try:
        person_info = get_ramq_from_image(url, is_image=True)
        print(f"RAMQ: {person_info[0]}")
        print(f"Name: {person_info[2]} {person_info[1]}")
        print(
            f"Date of Birth: {person_info[3].date()}"
        )  # Use .date() to get only the date part
        print("---")
    except Exception as e:
        print(f"Error processing image {url}: {str(e)}")
        print("---")

In [None]:
import re
from pydantic import BaseModel, Field
from datetime import datetime
from typing import Optional, List

class PersonInfo(BaseModel):
    first_name: str
    last_name: str
    date_of_birth: datetime
    gender: Optional[str] = None
    ramq: str = Field(..., pattern=r"^[A-Z]{4}\d{8}$", description="RAMQ number should have 4 letters followed by 8 digits")


def extract_person_info(text: str) -> Optional[PersonInfo]:
    # Parse the JSON response
    import json
    data = json.loads(text)

    # Extract date of birth from RAMQ
    ramq = data['ramq']
    year = int(ramq[4:6])
    month = int(ramq[6:8])
    day = int(ramq[8:10])

    # Adjust year for century and ensure it is <= current year
    current_year = datetime.now().year
    if year > 50:
        year += 1900
    else:
        year += 2000

    if year > current_year:
        year -= 100

    # Adjust month for gender
    gender = None
    gender_digit = int(ramq[6])
    if gender_digit in [5, 6]:
        gender = "female"
        month -= 50
    elif gender_digit in [0, 1]:
        gender = "male"

    dob = datetime(year, month, day)

    return PersonInfo(
        first_name=data['first_name'],
        last_name=data['last_name'],
        date_of_birth=dob,
        gender=gender,
        ramq=data['ramq'],
    )


for idx, image_doc in enumerate(image_documents):
    response = anthropic_mm_llm.complete(
        prompt="Perform OCR. Extract the RAMQ number, which MUST have exactly 4 letters followed by exactly 8 digits, totaling 12 characters. Remove all spaces from RAMQ. The first 3 letters of RAMQ are the person's last name use that to look up the last name in the text. First name starts with the 4th letter of the RAMQ AND Should be a name! Extract the person's first name, last name, date of birth, and RAMQ number. Output as JSON with keys: 'first_name', 'last_name', 'date_of_birth' (in %Y/%m/%d format), and 'ramq'. Ensure the RAMQ is exactly 12 characters (4 letters + 8 digits). Double-check your output before responding. Do not be VERBOSE and DO NOT include any text outside the JSON object.",
        image_documents=[image_doc],
    )
    print(response)


    try:
        person_info = extract_person_info(response.text)
        if person_info:
            print(person_info)
        else:
            print("Could not extract all required information. ")
    except Exception as e:
        print(f"Error processing response: {e}")
        print("Raw response:", response.text)

In [None]:
import re
from pydantic import BaseModel, Field
from datetime import datetime
from typing import Optional, List


class PersonInfo(BaseModel):
    first_name: str
    last_name: str
    date_of_birth: datetime
    gender: Optional[str] = None
    ramq: str = Field(
        ...,
        pattern=r"^[A-Z]{4}\d{8}$",
        description="RAMQ number in format AAAA00000000",
    )


def extract_person_info(text: str) -> Optional[PersonInfo]:
    # Parse the JSON response
    import json

    data = json.loads(text)

    # Extract date of birth
    dob = datetime.strptime(data["date_of_birth"], "%Y/%m/%d")

    # Extract gender based on RAMQ
    gender = None
    if "ramq" in data:
        gender_digit = int(data["ramq"][6])
        if gender_digit in [5, 6]:
            gender = "female"
        elif gender_digit in [0, 1]:
            gender = "male"

    return PersonInfo(
        first_name=data["first_name"],
        last_name=data["last_name"],
        date_of_birth=dob,
        gender=gender,
        ramq=data["ramq"],
    )

person_text = "Le paitent est Robert-Calin Avrma, né le 13 janvier 88, ramq AVRR13018805"
prompt = "Extract the person's full name, date of birth, gender, and RAMQ number from the text and output as JSON using keys first_name, last_name, date_of_birth in %Y/%m/%d format and ramq which should have 4 letters and 8 digits. Make sure you get the right answer in JSON. Do not be verbose. Here is the text:" + person_text

response = anthropic_mm_llm.complete(
    prompt=prompt,
    image_documents=None,
)
print(response)

try:
    person_info = extract_person_info(response.text)
    if person_info:
        print(person_info)
    else:
        print("Could not extract all required information. ")
except Exception as e:
    print(f"Error processing response: {e}")
    print("Raw response:", response.text)


In [None]:
image_urls = [
    "https://i.ibb.co/7Jm0KvX/IMG-2618.jpg",
    "https://i.ibb.co/m92CFv2/IMG-2608.jpg",
    "https://i.ibb.co/3MdsCGv/IMG-2610.jpg",
    "https://i.ibb.co/vV9MCcj/IMG-2611.jpg",
]

for url in image_urls:
    !python3 main.py "{url}" --is_image=True
    print("\n")  # Add a newline between outputs for readability

In [None]:
from llama_index.core.multi_modal_llms.generic_utils import load_image_urls

from PIL import Image
import requests
from io import BytesIO
import matplotlib.pyplot as plt

image_url = "https://i.ibb.co/m92CFv2/IMG-2608.jpg"
image_documents = load_image_urls([image_url])
print(image_documents)
img_response = requests.get(image_url)
print(image_documents[0])
img = Image.open(BytesIO(img_response.content))
plt.imshow(img)

In [None]:
!python3 main.py "Le paitent est Robert-Calin Avrma, né le 13 janvier 88, ramq AVRR13018805" --is_image=False


In [None]:
### Patient list

In [None]:
import os
import json
import re
from datetime import datetime
from typing import List, Optional
import tempfile

import requests
from io import BytesIO
from PIL import Image

from dotenv import load_dotenv
from pydantic import BaseModel, Field

from llama_index.multi_modal_llms.anthropic import AnthropicMultiModal
from llama_index.core.schema import ImageDocument

from typing import List, Optional


class PatientInfo(BaseModel):
    first_name: str
    last_name: str
    patient_number: Optional[str] = None
    room_number: Optional[str] = None


class PatientList(BaseModel):
    patients: List[PatientInfo]


def get_patient_list(
    input_data: str, is_image: bool = True, additional_prompt: str = ""
):
    base_prompt = "Extract a list of patients from the image or text. For each patient, provide their first name and last name. If available, also include their patient number (should be string and digits or digit) and room number (should be a 3-4 digits witgh dashes). Output as JSON with a 'patients' key containing a list of patient objects. Each patient object should have keys: first_name, last_name, and optionally patient_number and room_number. "
    prompt = base_prompt + additional_prompt

    if is_image:
        try:
            response = requests.get(input_data)
            img = Image.open(BytesIO(response.content))

            with tempfile.TemporaryDirectory() as temp_dir:
                image_file = os.path.join(temp_dir, "temp_image.png")
                img.save(image_file)
                image_documents = [ImageDocument(image_path=image_file)]
                response = anthropic_mm_llm.complete(
                    prompt=prompt,
                    image_documents=image_documents,
                )
        except Exception as e:
            raise ValueError(f"Error loading image: {str(e)}")
    else:
        response = anthropic_mm_llm.complete(
            prompt=f"{prompt} Here is the text: {input_data}",
            image_documents=None,
        )

    # Parse the JSON response
    data = json.loads(response.text)

    patients = []
    for patient_data in data["patients"]:
        patient = PatientInfo(
            first_name=patient_data["first_name"],
            last_name=patient_data["last_name"],
            patient_number=patient_data.get("patient_number"),
            room_number=patient_data.get("room_number"),
        )
        patients.append(patient)

    return PatientList(patients=patients)


In [None]:
load_dotenv()
anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY")

# Initiated Anthropic MultiModal class
anthropic_mm_llm = AnthropicMultiModal(
    max_tokens=300,
    model="claude-3-sonnet-20240229",
    anthropic_api_key=anthropic_api_key,
)


# Test with text input
text_input = """
Patient List:
1. John Doe, Patient #12345, Room 101
2. Jane Smith, Patient #67890, Room 202
3. Bob Johnson
"""

try:
    patient_list = get_patient_list(text_input, is_image=False)
    print("Patient List from Text:")
    for patient in patient_list.patients:
        print(f"Name: {patient.first_name} {patient.last_name}")
        if patient.patient_number:
            print(f"Patient Number: {patient.patient_number}")
        if patient.room_number:
            print(f"Room Number: {patient.room_number}")
        print("---")
except Exception as e:
    print(f"An error occurred: {str(e)}")

# Test with additional prompt
additional_prompt = "Also extract the patient's age if available."

try:
    patient_list = get_patient_list(
        image_url, is_image=True, additional_prompt=additional_prompt
    )
    print("Patient List with Additional Prompt:")
    for patient in patient_list.patients:
        print(f"Name: {patient.first_name} {patient.last_name}")
        if patient.patient_number:
            print(f"Patient Number: {patient.patient_number}")
        if patient.room_number:
            print(f"Room Number: {patient.room_number}")
        print("---")
except Exception as e:
    print(f"An error occurred: {str(e)}")