In [22]:
import snowflake.connector
import pandas as pd
from google.cloud import aiplatform

In [None]:
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

AI READ

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


In [39]:
# Snowflake connection details
SNOWFLAKE_USER = "DOLPHIN"
SNOWFLAKE_PASSWORD = "Maapaa@1603"
SNOWFLAKE_ACCOUNT = "URB63596"
SNOWFLAKE_WAREHOUSE = "ANIMAL_TASK_WH"
SNOWFLAKE_DATABASE = "mimic_iv_medi_assist"
SNOWFLAKE_SCHEMA = "staging_mimc"
DISCHARGE_TABLE = "MIMIC_IV_MEDI_ASSIST.STAGING_MIMIC.STG_DISCHARGE"

In [40]:
# Vertex AI endpoint details
PROJECT_ID = "168237001903"  # Replace with your GCP Project ID
ENDPOINT_ID = "2125990394700234752"  # Replace with your Endpoint ID
LOCATION = "us-central1"  # Replace with your endpoint region


---

MODEL - ruslanmv/Medical-Llama3-8B (Version 1)

---

In [41]:
# Query to extract clinical discharge notes
QUERY = """
SELECT SUBJECT_ID, HADM_ID, TEXT 
FROM MIMIC_IV_MEDI_ASSIST.STAGING_MIMIC.STG_DISCHARGE
WHERE TEXT IS NOT NULL
LIMIT 2
"""

In [42]:
def truncate_text(text, max_length=1024):
    """
    Truncates the input text to ensure it fits within the model's token limit.

    Args:
        text (str): The input clinical note.
        max_length (int): Maximum allowed length for the text in tokens.

    Returns:
        str: The truncated text.
    """
    truncated = text[:max_length]
    print(f"Truncated text to {len(truncated)} characters (max {max_length}).")
    return truncated

In [43]:
# Fetch clinical notes from Snowflake
def fetch_clinical_notes():
    try:
        # Connect to Snowflake
        conn = snowflake.connector.connect(
            user=SNOWFLAKE_USER,
            password=SNOWFLAKE_PASSWORD,
            account=SNOWFLAKE_ACCOUNT,
            warehouse=SNOWFLAKE_WAREHOUSE,
            database=SNOWFLAKE_DATABASE,
            schema=SNOWFLAKE_SCHEMA,
        )
        # Execute the query and fetch results
        df = pd.read_sql(QUERY, conn)
        conn.close()
        return df
    except Exception as e:
        print(f"Error fetching data from Snowflake: {e}")
        return pd.DataFrame()

In [44]:
def format_instance(note):
    """
    Formats the clinical note with an instruction for ICD-10 code generation.

    Args:
        note (str): The input clinical note.

    Returns:
        str: The formatted instance for the model.
    """
    prompt = f"""
    You are a clinical language model specialized in generating ICD-10 codes.
    Below is a clinical note. Analyze it and generate the top 10 most relevant ICD-10 codes.

    Clinical Note:
    {note}

    Please return the ICD-10 codes as a Python list of strings.
    """
    print(f"Formatted prompt: {prompt[:200]}...")  # Display the first 200 characters for debugging
    return prompt


In [45]:
def predict_custom_trained_model_sample(project, endpoint_id, location, instances):
    try:
        # Initialize the Vertex AI Prediction client
        aiplatform.init(project=project, location=location)

        # Load the endpoint
        endpoint = aiplatform.Endpoint(endpoint_name=f"projects/{project}/locations/{location}/endpoints/{endpoint_id}")

        # Format the payload to match the expected input structure with truncation
        formatted_instances = [{"inputs": truncate_text(instance["input"])} for instance in instances]


        # Send the prediction request
        response = endpoint.predict(instances=formatted_instances)

        # Extract the predictions
        return response.predictions
    except Exception as e:
        print(f"Error generating predictions: {e}")
        return None


In [46]:
def predict_icd_codes(project, endpoint_id, location, instances):
    """
    Sends clinical notes to a Vertex AI endpoint and retrieves ICD-10 code predictions.

    Args:
        project (str): GCP project ID.
        endpoint_id (str): Vertex AI endpoint ID.
        location (str): Location of the endpoint (e.g., "us-central1").
        instances (list): List of clinical notes to process.

    Returns:
        list: Predictions from the model, or None if an error occurs.
    """
    try:
        print("\nInitializing Vertex AI client...")
        aiplatform.init(project=project, location=location)

        # Load the endpoint
        print(f"Loading endpoint: projects/{project}/locations/{location}/endpoints/{endpoint_id}")
        endpoint = aiplatform.Endpoint(endpoint_name=f"projects/{project}/locations/{location}/endpoints/{endpoint_id}")

        # Format the payload with instructions
        print("Formatting instances with instructions for ICD-10 code generation...")
        formatted_instances = [{"inputs": truncate_text(format_instance(instance["input"]), max_length=896)} for instance in instances]

        # Send the prediction request
        print("Sending prediction request to the endpoint...")
        response = endpoint.predict(
            instances=formatted_instances,
            parameters={"max_new_tokens": 128}  # Reserve tokens for output
        )

        # Extract predictions
        print(f"Predictions received: {response.predictions}")
        return response.predictions

    except Exception as error:
        print(f"Error generating predictions: {error}")
        return None

In [51]:
def fetch_clinical_notes():
    """
    Fetches clinical notes from the Snowflake database.

    Returns:
        DataFrame: A pandas DataFrame containing the clinical notes.
    """
    # Snowflake connection details
# Snowflake connection details
    SNOWFLAKE_USER = "DOLPHIN"
    SNOWFLAKE_PASSWORD = "Maapaa@1603"
    SNOWFLAKE_ACCOUNT = "URB63596"
    SNOWFLAKE_WAREHOUSE = "ANIMAL_TASK_WH"
    SNOWFLAKE_DATABASE = "mimic_iv_medi_assist"
    SNOWFLAKE_SCHEMA = "staging_mimc"
    DISCHARGE_TABLE = "MIMIC_IV_MEDI_ASSIST.STAGING_MIMIC.STG_DISCHARGE"

    query = """
    SELECT SUBJECT_ID, HADM_ID, TEXT 
    FROM MIMIC_IV_MEDI_ASSIST.STAGING_MIMIC.STG_DISCHARGE
    WHERE TEXT IS NOT NULL
    LIMIT 1
    """

    try:
        print("\nConnecting to Snowflake...")
        conn = snowflake.connector.connect(
            user=SNOWFLAKE_USER,
            password=SNOWFLAKE_PASSWORD,
            account=SNOWFLAKE_ACCOUNT,
            warehouse=SNOWFLAKE_WAREHOUSE,
            database=SNOWFLAKE_DATABASE,
            schema=SNOWFLAKE_SCHEMA,
        )
        print("Executing query to fetch clinical notes...")
        df = pd.read_sql(query, conn)
        conn.close()
        print(f"Fetched {len(df)} records from Snowflake.")
        return df

    except Exception as e:
        print(f"Error fetching data from Snowflake: {e}")
        return pd.DataFrame()


In [52]:
if __name__ == "__main__":
    # Vertex AI configuration
    PROJECT_ID = "168237001903"  # Replace with your GCP Project ID
    ENDPOINT_ID = "2125990394700234752"  # Replace with your Endpoint ID
    LOCATION = "us-central1"  # Replace with your endpoint region

    print("\nStarting ICD-10 Code Prediction Workflow...\n")

    # Step 1: Fetch clinical notes from Snowflake
    print("Step 1: Fetching clinical notes...")
    clinical_notes_df = fetch_clinical_notes()

    # Step 2: Process each clinical note and generate ICD-10 codes
    if not clinical_notes_df.empty:
        print("Step 2: Processing clinical notes...")
        for _, row in clinical_notes_df.iterrows():
            subject_id = row["SUBJECT_ID"]
            hadm_id = row["HADM_ID"]
            clinical_note = row["TEXT"]

            print(f"\nProcessing SUBJECT_ID: {subject_id}, HADM_ID: {hadm_id}")
            print(f"Clinical Note: {clinical_note[:200]}...")  # Display the first 200 characters

            # Prepare instances with correct structure
            instances = [{"input": clinical_note}]

            # Generate ICD-10 codes using the Vertex AI endpoint
            icd_codes = predict_icd_codes(
                project=PROJECT_ID,
                endpoint_id=ENDPOINT_ID,
                location=LOCATION,
                instances=instances
            )

            # Print the results
            if icd_codes:
                print(f"Generated ICD-10 Codes for SUBJECT_ID {subject_id}: {icd_codes}")
            else:
                print(f"Failed to generate ICD-10 codes for SUBJECT_ID {subject_id}.")
    else:
        print("No clinical notes found in the Snowflake table.")

    print("\nICD-10 Code Prediction Workflow Completed.")


Starting ICD-10 Code Prediction Workflow...

Step 1: Fetching clinical notes...

Connecting to Snowflake...
Executing query to fetch clinical notes...


  df = pd.read_sql(query, conn)


Fetched 1 records from Snowflake.
Step 2: Processing clinical notes...

Processing SUBJECT_ID: 15992303, HADM_ID: 22502053
Clinical Note:  
Name:  ___               Unit No:   ___
 
Admission Date:  ___              Discharge Date:   ___
 
Date of Birth:  ___             Sex:   M
 
Service: MEDICINE
 
Allergies: 
Amoxicillin / azithromy...

Initializing Vertex AI client...
Loading endpoint: projects/168237001903/locations/us-central1/endpoints/2125990394700234752
Formatting instances with instructions for ICD-10 code generation...
Formatted prompt: 
    You are a clinical language model specialized in generating ICD-10 codes.
    Below is a clinical note. Analyze it and generate the top 10 most relevant ICD-10 codes.

    Clinical Note:
     
Na...
Truncated text to 896 characters (max 896).
Sending prediction request to the endpoint...
Predictions received: ['04 therapy, so after evaluation with characteristic\nradiographic findings in the liver, transplutaminase and AST\nsubsequently

---

 meta-llama/Llama-3.1-8B-Instruct
 
 https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct

---

---

SERVERLESS

---

In [None]:
from huggingface_hub import InferenceClient

client = InferenceClient(api_key="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")

messages = [
	{
		"role": "user",
		"content": "What is the capital of France?"
	}
]

completion = client.chat.completions.create(
    model="meta-llama/Llama-3.1-8B-Instruct", 
	messages=messages, 
	max_tokens=500
)

print(completion.choices[0].message)

In [None]:
import snowflake.connector
from huggingface_hub import InferenceClient
import pandas as pd 

In [2]:
SNOWFLAKE_USER = "DOLPHIN"
SNOWFLAKE_PASSWORD = "Maapaa@1603"
SNOWFLAKE_ACCOUNT = "URB63596"
SNOWFLAKE_WAREHOUSE = "ANIMAL_TASK_WH"
SNOWFLAKE_DATABASE = "mimic_iv_medi_assist"
SNOWFLAKE_SCHEMA = "staging_mimc"
DISCHARGE_TABLE = "MIMIC_IV_MEDI_ASSIST.STAGING_MIMIC.STG_DISCHARGE"


In [3]:
query = """
SELECT SUBJECT_ID, HADM_ID, TEXT 
FROM MIMIC_IV_MEDI_ASSIST.STAGING_MIMIC.STG_DISCHARGE
WHERE TEXT IS NOT NULL
LIMIT 1
"""

In [4]:
print("\n[INFO] Connecting to Snowflake...")
conn = snowflake.connector.connect(
    user=SNOWFLAKE_USER,
    password=SNOWFLAKE_PASSWORD,
    account=SNOWFLAKE_ACCOUNT,
    warehouse=SNOWFLAKE_WAREHOUSE,
    database=SNOWFLAKE_DATABASE,
    schema=SNOWFLAKE_SCHEMA,)

print("[INFO] Executing query to fetch clinical notes...\n")
clinical_notes_df = pd.read_sql(query, conn)
conn.close()

print("\n[INFO] printing dataframe data...\n")
print(clinical_notes_df)


[INFO] Connecting to Snowflake...
[INFO] Executing query to fetch clinical notes...



  clinical_notes_df = pd.read_sql(query, conn)



[INFO] printing dataframe data...

   SUBJECT_ID   HADM_ID                                               TEXT
0    15992303  22502053   \nName:  ___               Unit No:   ___\n \...


In [12]:
HF_API_KEY = "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"

# Initialize the Hugging Face Inference Client
print("\n[INFO] Initializing Hugging Face client...\n")
client = InferenceClient(api_key=HF_API_KEY)

print("\n[INFO] Step 2: Processing clinical notes...")

for _, row in clinical_notes_df.iterrows():
    print("====================================================================================")
    subject_id = row["SUBJECT_ID"]
    hadm_id = row["HADM_ID"]
    clinical_note = row["TEXT"]

    print(f"\n[INFO] Processing SUBJECT_ID: {subject_id} \n[INFO] Processing HADM_ID: {hadm_id}")
    
    print("\n[INFO] Formatting message for the model...")

    #TOP 10 ICD CODES
    # message = [
    #     {
    #         "role": "user",
    #         "content": f"""
    #         You are an intelligent clinical language model specialized in generating ICD-10 codes.
    #         Below is a patient's clinical note. Generate only the 10 most relevant ICD-10 codes as a Python list of strings. No explanation and code required in the output.

    #         Clinical Note:
    #         {clinical_note}

    #         Return the output in the following format:

    #         ["ICD10_CODE_1", "ICD10_CODE_2", "ICD10_CODE_3", ..., "ICD10_CODE_10"]
    #         """
    #     }
    # ]

    # Generate top 5 ICD-10 codes with descriptions
    message = [
        {
            "role": "user",
            "content": f"""
            You are an intelligent clinical language model specialized in generating ICD-10 codes.
            Below is a patient's clinical note. Generate only the 5 most relevant ICD-10 codes along with their descriptions. No explanation or additional information is required in the output.

            Clinical Note:
            {clinical_note}

            Return the output in the following format:
            [
                {{"ICD10_CODE_1": "ICD10_CODE_1_DESCRIPTION"}},
                {{"ICD10_CODE_2": "ICD10_CODE_2_DESCRIPTION"}},
                {{"ICD10_CODE_3": "ICD10_CODE_3_DESCRIPTION"}},
                {{"ICD10_CODE_4": "ICD10_CODE_4_DESCRIPTION"}},
                {{"ICD10_CODE_5": "ICD10_CODE_5_DESCRIPTION"}}
            ]
            """
        }
    ]


    print("\n[INFO] Sending request to the Hugging Face model...")
    completion = client.chat.completions.create(
        model="meta-llama/Llama-3.1-8B-Instruct",
        messages=message,
        max_tokens=500
    )

    print("\n [INFO] completion...")
    print(completion)

    icd_response = completion["choices"][0]["message"]["content"].strip()


    print("\n [INFO] response...")
    print(icd_response)

    print("====================================================================================")
    #print(f"[DEBUG] Clinical Note: {clinical_note[:200]}...")  # Show first 200 characters



[INFO] Initializing Hugging Face client...


[INFO] Step 2: Processing clinical notes...

[INFO] Processing SUBJECT_ID: 15992303 
[INFO] Processing HADM_ID: 22502053

[INFO] Formatting message for the model...

[INFO] Sending request to the Hugging Face model...

 [INFO] completion...
ChatCompletionOutput(choices=[ChatCompletionOutputComplete(finish_reason='stop', index=0, message=ChatCompletionOutputMessage(role='assistant', content='[\n    {"K70.9": "Chronic liver disease, unspecified"},\n    {"R73.0": "Jaundice, unspecified"},\n    {"K75.859A": "Acute hepatitis A with hepatic failure"},\n    {"D69.3": "Hemophagocytic lymphohistiocytosis (HLH)"},\n    {"B00.3": "Herpes simplex virus type 1 infection"}\n]', tool_calls=None), logprobs=None)], created=1732729906, id='', model='meta-llama/Llama-3.1-8B-Instruct', system_fingerprint='2.3.1-sha-a094729', usage=ChatCompletionOutputUsage(completion_tokens=91, prompt_tokens=5252, total_tokens=5343))

 [INFO] response...
[
    {"K70.9": "Chron

---

Implementing GCP

---

In [56]:
from google.oauth2 import service_account

# Path to your service account JSON key
SERVICE_ACCOUNT_FILE = "/Users/shreyajaiswal/Desktop/Start-to-Complete/MediAssist-Healthcare-Data-Solutions/vertex-ai.json"

# Authenticate using the service account
credentials = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE)


In [57]:
import snowflake.connector
import pandas as pd
import requests
from google.oauth2 import service_account
import google.auth.transport.requests
import json

In [58]:
# Define required SCOPES
SCOPES = ['https://www.googleapis.com/auth/cloud-platform']

# Service account JSON file
SERVICE_ACCOUNT_FILE = '/Users/shreyajaiswal/Desktop/Start-to-Complete/MediAssist-Healthcare-Data-Solutions/vertex-ai.json'

# Vertex AI Configuration
PROJECT_ID = "168237001903"
ENDPOINT_ID = "282329297245437952"
LOCATION = "us-central1"

# Construct the Endpoint URL
ENDPOINT_URL = f"https://{LOCATION}-aiplatform.googleapis.com/v1/projects/{PROJECT_ID}/locations/{LOCATION}/endpoints/{ENDPOINT_ID}:predict"


In [59]:
# Authenticate using the service account
print("[INFO] Authenticating with Google Cloud...")
credentials = service_account.Credentials.from_service_account_file(
    SERVICE_ACCOUNT_FILE, scopes=SCOPES
)

# Refresh credentials to obtain the bearer token
auth_request = google.auth.transport.requests.Request()
credentials.refresh(auth_request)
AUTH_TOKEN = credentials.token


[INFO] Authenticating with Google Cloud...


In [60]:
# Snowflake Configuration
SNOWFLAKE_USER = "DOLPHIN"
SNOWFLAKE_PASSWORD = "Maapaa@1603"
SNOWFLAKE_ACCOUNT = "URB63596"
SNOWFLAKE_WAREHOUSE = "ANIMAL_TASK_WH"
SNOWFLAKE_DATABASE = "mimic_iv_medi_assist"
SNOWFLAKE_SCHEMA = "staging_mimc"
DISCHARGE_TABLE = "MIMIC_IV_MEDI_ASSIST.STAGING_MIMIC.STG_DISCHARGE"

In [61]:
# Snowflake Query
query = """
SELECT SUBJECT_ID, HADM_ID, TEXT 
FROM MIMIC_IV_MEDI_ASSIST.STAGING_MIMIC.STG_DISCHARGE
WHERE TEXT IS NOT NULL
LIMIT 1
"""

In [62]:
# Step 1: Connect to Snowflake and fetch clinical notes
print("[INFO] Connecting to Snowflake...")
connection = snowflake.connector.connect(
    user=SNOWFLAKE_USER,
    password=SNOWFLAKE_PASSWORD,
    account=SNOWFLAKE_ACCOUNT,
    warehouse=SNOWFLAKE_WAREHOUSE,
    database=SNOWFLAKE_DATABASE,
    schema=SNOWFLAKE_SCHEMA
)

try:
    clinical_notes_df = pd.read_sql(query, connection)
    print("[INFO] Clinical notes fetched successfully!")
finally:
    connection.close()

[INFO] Connecting to Snowflake...


  clinical_notes_df = pd.read_sql(query, connection)


[INFO] Clinical notes fetched successfully!


In [66]:
# Step 2: Process each clinical note and send to Vertex AI
for _, row in clinical_notes_df.iterrows():
    print("=" * 80)
    subject_id = row["SUBJECT_ID"]
    hadm_id = row["HADM_ID"]
    clinical_note = row["TEXT"]

    print(f"[INFO] Processing SUBJECT_ID: {subject_id}, HADM_ID: {hadm_id}")

    # Prepare the instance payload with token configuration
    payload = {
        "instances": [
            {
                "inputs": f"""
                You are an intelligent clinical language model specialized in generating ICD-10 codes.
                Below is a patient's clinical note. Generate only the 5 most relevant ICD-10 codes along with their descriptions. No explanation or additional information is required in the output.

                Clinical Note:
                {clinical_note}

                Return the output in the following format:
                [
                    {{"ICD10_CODE_1": "ICD10_CODE_1_DESCRIPTION"}},
                    {{"ICD10_CODE_2": "ICD10_CODE_2_DESCRIPTION"}},
                    {{"ICD10_CODE_3": "ICD10_CODE_3_DESCRIPTION"}},
                    {{"ICD10_CODE_4": "ICD10_CODE_4_DESCRIPTION"}},
                    {{"ICD10_CODE_5": "ICD10_CODE_5_DESCRIPTION"}}
                ]
                """,
                "parameters": {
                    "max_input_tokens": 1500,
                    "max_output_tokens": 1000
                }
            }
        ]
    }

    headers = {
        "Authorization": f"Bearer {AUTH_TOKEN}",
        "Content-Type": "application/json"
    }

    # Send the request to Vertex AI
    try:
        print("[INFO] Sending request to Vertex AI...")
        response = requests.post(ENDPOINT_URL, headers=headers, json=payload)

        if response.status_code == 200:
            predictions = response.json().get("predictions", [])
            print("[INFO] Top 5 ICD Codes with Descriptions:")
            for prediction in predictions:
                print(json.dumps(prediction, indent=2))
        else:
            print(f"[ERROR] Failed to get prediction for SUBJECT_ID {subject_id}, HADM_ID {hadm_id}")
            print(f"Status Code: {response.status_code}, Response: {response.text}")

    except Exception as e:
        print(f"[ERROR] An error occurred while processing SUBJECT_ID {subject_id}, HADM_ID {hadm_id}: {e}")

    print("=" * 80)

print("[INFO] Workflow completed.")


[INFO] Processing SUBJECT_ID: 15992303, HADM_ID: 22502053
[INFO] Sending request to Vertex AI...
[ERROR] Failed to get prediction for SUBJECT_ID 15992303, HADM_ID 22502053
Status Code: 500, Response: {"error":"Incomplete generation","error_type":"Incomplete generation"}
[INFO] Workflow completed.


---
---

In [None]:
import snowflake.connector
import pandas as pd
from google.cloud import aiplatform

In [62]:
def fetch_clinical_notes():
    """
    Fetches clinical notes from the Snowflake table MIMIC_IV_MEDI_ASSIST.STAGING_MIMIC.STG_DISCHARGE.

    Returns:
        DataFrame: A pandas DataFrame containing SUBJECT_ID, HADM_ID, and TEXT.
    """
    # Snowflake connection details
    # Snowflake connection details
    SNOWFLAKE_USER = "DOLPHIN"
    SNOWFLAKE_PASSWORD = "Maapaa@1603"
    SNOWFLAKE_ACCOUNT = "URB63596"
    SNOWFLAKE_WAREHOUSE = "ANIMAL_TASK_WH"
    SNOWFLAKE_DATABASE = "mimic_iv_medi_assist"
    SNOWFLAKE_SCHEMA = "staging_mimc"
    DISCHARGE_TABLE = "MIMIC_IV_MEDI_ASSIST.STAGING_MIMIC.STG_DISCHARGE"

    query = """
    SELECT SUBJECT_ID, HADM_ID, TEXT 
    FROM MIMIC_IV_MEDI_ASSIST.STAGING_MIMIC.STG_DISCHARGE
    WHERE TEXT IS NOT NULL
    LIMIT 10
    """

    try:
        print("\nConnecting to Snowflake...")
        conn = snowflake.connector.connect(
            user=SNOWFLAKE_USER,
            password=SNOWFLAKE_PASSWORD,
            account=SNOWFLAKE_ACCOUNT,
            warehouse=SNOWFLAKE_WAREHOUSE,
            database=SNOWFLAKE_DATABASE,
            schema=SNOWFLAKE_SCHEMA,
        )
        print("Executing query to fetch clinical notes...")
        df = pd.read_sql(query, conn)
        conn.close()
        print(f"Fetched {len(df)} records from Snowflake.")
        return df

    except Exception as e:
        print(f"Error fetching data from Snowflake: {e}")
        return pd.DataFrame()


In [68]:
def truncate_text(text, max_length=2000):
    """
    Truncates the input text to ensure it fits within the model's token limit.

    Args:
        text (str): The input clinical note.
        max_length (int): Maximum allowed length for the text in tokens.

    Returns:
        str: The truncated text.
    """
    truncated = text[:max_length]
    print(f"[INFO] Truncated text to {len(truncated)} characters (max {max_length}).")
    return truncated


In [63]:
def format_instance(note):
    """
    Formats the clinical note with an instruction for ICD-10 code generation.

    Args:
        note (str): The input clinical note.

    Returns:
        dict: The formatted instance for the model.
    """
    prompt = f"""
    You are a clinical language model specialized in generating ICD-10 codes.
    Below is a clinical note. Analyze it and generate the top 10 most relevant ICD-10 codes.

    Clinical Note:
    {note}

    Please return the ICD-10 codes as a Python list of strings.
    """
    return {"inputs": prompt}

In [69]:
def predict_icd_codes(project, endpoint_id, location, clinical_notes):
    """
    Sends clinical notes to a Vertex AI endpoint and retrieves ICD-10 code predictions.

    Args:
        project (str): GCP project ID.
        endpoint_id (str): Vertex AI endpoint ID.
        location (str): Location of the endpoint (e.g., "us-central1").
        clinical_notes (list): List of clinical notes to process.

    Returns:
        list: Predictions from the model for each note.
    """
    try:
        print("\nInitializing Vertex AI client...")
        aiplatform.init(project=project, location=location)

        # Load the endpoint
        print(f"Loading endpoint: projects/{project}/locations/{location}/endpoints/{endpoint_id}")
        endpoint = aiplatform.Endpoint(endpoint_name=f"projects/{project}/locations/{location}/endpoints/{endpoint_id}")

        predictions = []
        for note in clinical_notes:
            # Format the payload
            # Format the payload
            instance = format_instance(truncate_text(note, max_length=1900))  # Reserve tokens for max_new_tokens
            print(f"Sending prediction request for clinical note: {note[:200]}...")  # Show the first 200 characters

            # Send the prediction request
            response = endpoint.predict(instances=[instance], parameters={"max_new_tokens": 128})
            predictions.append(response.predictions[0])  # Assume the model returns a list of predictions

        return predictions

    except Exception as error:
        print(f"Error generating predictions: {error}")
        return []

In [70]:
if __name__ == "__main__":
    # Vertex AI configuration
# Vertex AI endpoint details
    PROJECT_ID = "168237001903"  # Replace with your GCP Project ID
    ENDPOINT_ID = "6289568250204258304"  # Replace with your Endpoint ID
    LOCATION = "us-central1"  # Replace with your endpoint region

    print("\n[INFO] Starting ICD-10 Code Prediction Workflow...\n")

    # Step 1: Fetch clinical notes from Snowflake
    print("[INFO] Step 1: Fetching clinical notes...")
    clinical_notes_df = fetch_clinical_notes()

    # Step 2: Process each clinical note and generate ICD-10 codes
    if not clinical_notes_df.empty:
        print("[INFO] Step 2: Processing clinical notes...\n")

        # Extract clinical notes from the DataFrame
        clinical_notes = clinical_notes_df["TEXT"].tolist()

        # Generate ICD-10 codes using the Vertex AI endpoint
        print("[INFO] Sending clinical notes for prediction...")
        icd_codes_predictions = predict_icd_codes(
            project=PROJECT_ID,
            endpoint_id=ENDPOINT_ID,
            location=LOCATION,
            clinical_notes=clinical_notes
        )

        # Display results
        print("\n[INFO] Results:")
        for idx, row in clinical_notes_df.iterrows():
            subject_id = row["SUBJECT_ID"]
            hadm_id = row["HADM_ID"]
            icd_codes = icd_codes_predictions[idx] if idx < len(icd_codes_predictions) else None

            print(f"\n[RESULT] SUBJECT_ID: {subject_id}, HADM_ID: {hadm_id}")
            if icd_codes:
                print(f"[RESULT] Generated ICD-10 Codes: {icd_codes}")
            else:
                print("[WARNING] Failed to generate ICD-10 codes for this clinical note.")
    else:
        print("[WARNING] No clinical notes found in the Snowflake table.")

    print("\n[INFO] ICD-10 Code Prediction Workflow Completed.")


[INFO] Starting ICD-10 Code Prediction Workflow...

[INFO] Step 1: Fetching clinical notes...

Connecting to Snowflake...
Executing query to fetch clinical notes...


  df = pd.read_sql(query, conn)


Fetched 10 records from Snowflake.
[INFO] Step 2: Processing clinical notes...

[INFO] Sending clinical notes for prediction...

Initializing Vertex AI client...
Loading endpoint: projects/168237001903/locations/us-central1/endpoints/6289568250204258304
[INFO] Truncated text to 1900 characters (max 1900).
Sending prediction request for clinical note:  
Name:  ___               Unit No:   ___
 
Admission Date:  ___              Discharge Date:   ___
 
Date of Birth:  ___             Sex:   M
 
Service: MEDICINE
 
Allergies: 
Amoxicillin / azithromy...
[INFO] Truncated text to 1900 characters (max 1900).
Sending prediction request for clinical note:  
Name:  ___                 Unit No:   ___
 
Admission Date:  ___              Discharge Date:   ___
 
Date of Birth:  ___             Sex:   F
 
Service: MEDICINE
 
Allergies: 
Patient recorded as h...
[INFO] Truncated text to 1900 characters (max 1900).
Sending prediction request for clinical note:  
Name:  ___                 Unit No:   _

KeyboardInterrupt: 

In [53]:
from huggingface_hub import InferenceClient

client = InferenceClient(api_key="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")

messages = [
	{
		"role": "user",
		"content": "What is the capital of France?"
	}
]

completion = client.chat.completions.create(
    model="meta-llama/Meta-Llama-3-8B-Instruct", 
	messages=messages, 
	max_tokens=500
)

print(completion.choices[0].message)

ChatCompletionOutputMessage(role='assistant', content='The capital of France is Paris!', tool_calls=None)


---
 meta-llama/Llama-3.1-8B-Instruct

 ---