In [1]:
import pandas as pd
import snowflake.connector
from google.cloud import aiplatform
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value

In [2]:
# Snowflake Connection Parameters
SNOWFLAKE_USER = "DOLPHIN"
SNOWFLAKE_PASSWORD = "Maapaa@1603"
SNOWFLAKE_ACCOUNT = "URB63596"
SNOWFLAKE_WAREHOUSE = "ANIMAL_TASK_WH"
SNOWFLAKE_DATABASE = "mimic_iv_medi_assist"
SNOWFLAKE_SCHEMA = "staging_mimc"
DISCHARGE_TABLE = "MIMIC_IV_MEDI_ASSIST.STAGING_MIMIC.STG_DISCHARGE"

In [3]:
# Vertex AI endpoint configuration
PROJECT_ID = "168237001903"
ENDPOINT_ID = "490761517499678720"
LOCATION = "us-central1"

In [7]:
# Query to fetch clinical notes
query = f"""
SELECT SUBJECT_ID, HADM_ID, TEXT 
FROM {DISCHARGE_TABLE}
WHERE TEXT IS NOT NULL
LIMIT 1
"""

In [5]:
def predict_custom_trained_model_sample(project, endpoint_id, instances, location="us-central1"):
    """
    Sends a prediction request to the Vertex AI endpoint.
    """
    client_options = {"api_endpoint": f"{location}-aiplatform.googleapis.com"}
    client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)

    instances = [json_format.ParseDict(instance_dict, Value()) for instance_dict in instances]
    parameters_dict = {}
    parameters = json_format.ParseDict(parameters_dict, Value())
    endpoint = client.endpoint_path(project=project, location=location, endpoint=endpoint_id)

    response = client.predict(endpoint=endpoint, instances=instances, parameters=parameters)
    print("Response from Vertex AI:")
    print(" Deployed model ID:", response.deployed_model_id)
    predictions = response.predictions
    for prediction in predictions:
        print(" Prediction:", prediction)
    return predictions

In [8]:
# Step 1: Fetch clinical notes from Snowflake
print("[INFO] Connecting to Snowflake...")
conn = snowflake.connector.connect(
    user=SNOWFLAKE_USER,
    password=SNOWFLAKE_PASSWORD,
    account=SNOWFLAKE_ACCOUNT,
    warehouse=SNOWFLAKE_WAREHOUSE,
    database=SNOWFLAKE_DATABASE,
    schema=SNOWFLAKE_SCHEMA,
)

print("[INFO] Executing query to fetch clinical notes...")
clinical_notes_df = pd.read_sql(query, conn)
conn.close()

print("[INFO] Retrieved clinical notes:")
print(clinical_notes_df)

[INFO] Connecting to Snowflake...
[INFO] Executing query to fetch clinical notes...


  clinical_notes_df = pd.read_sql(query, conn)


[INFO] Retrieved clinical notes:
   SUBJECT_ID   HADM_ID                                               TEXT
0    15992303  22502053   \nName:  ___               Unit No:   ___\n \...


In [13]:
# Step 2: Process each clinical note and send to Vertex AI
for _, row in clinical_notes_df.iterrows():
    subject_id = row["SUBJECT_ID"]
    hadm_id = row["HADM_ID"]
    clinical_note = row["TEXT"]

    print(f"\n[INFO] Processing SUBJECT_ID: {subject_id}, HADM_ID: {hadm_id}")

    # Prepare the instance payload with the required "inputs" field
    instance = {
        "inputs": f"""
        You are an intelligent clinical language model specialized in generating ICD-10 medical codes only.
        Below is a patient's clinical note. Generate only the 5 most important ICD-10 codes as a Python list of strings. 
        No explanation or code required in the output. Read the Clinical Note below and follow the expected output format.
        Do not write the solution to the problem in the Python code. Only generate and print the ICD-10 codes.

        Clinical Note:
        {clinical_note[:50]}

        Return the output in the following format:
        ["ICD10_CODE_1", "ICD10_CODE_2", "ICD10_CODE_3", ..., "ICD10_CODE_5"]
        """
    }

    try:
        # Send the prediction request to Vertex AI
        predictions = predict_custom_trained_model_sample(
            project=PROJECT_ID,
            endpoint_id=ENDPOINT_ID,
            instances=[instance],
            location=LOCATION,
        )

        # Display predictions
        print(f"[INFO] Predicted ICD-10 Codes for SUBJECT_ID {subject_id}: {predictions}")

    except Exception as e:
        print(f"[ERROR] Failed to get predictions for SUBJECT_ID {subject_id}: {e}")



[INFO] Processing SUBJECT_ID: 15992303, HADM_ID: 22502053


Python(46587) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


Response from Vertex AI:
 Deployed model ID: 484881329314332672
 Prediction: 


# In this problem, and continuing with our clinical note example.
        Output[:]
[SN James]
Tachycardia: patient has a heart rate of 120/min, is short of breath, and has chest pain. 

Dehydration: Patient states they have not passed urine in 12 hours. 
Dehydration: Patient has dry mouth and dark yellow urine.
        """

# Step 5: Generate the recommended ICD-10 codes for the patient's clinical condition based on the clinical note provided.
# Generate the ICD-10 codes by reading the clinical note carefully, following the given steps.

#  ICD-10 Codes
icd_codes = ["T49.3", "E12.00", "E11.52","T78.4","R03.1"]
# List of selected ICD-10 codes for this patient's clinical conditions.
print(icd_codes)        


# Let's assume the answer to this problem actually looks like below for the sake of formatting.


# ["T49.3", "T38.9", "T78.4","R03.1","R07.0"]         


# Because we were asked to demonstrate how to p

In [16]:
import snowflake.connector
from google.cloud import aiplatform
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value
import json

# Snowflake connection details
SNOWFLAKE_USER = "DOLPHIN"
SNOWFLAKE_PASSWORD = "Maapaa@1603"
SNOWFLAKE_ACCOUNT = "URB63596"
SNOWFLAKE_WAREHOUSE = "ANIMAL_TASK_WH"
SNOWFLAKE_DATABASE = "mimic_iv_medi_assist"
SNOWFLAKE_SCHEMA = "staging_mimc"
DISCHARGE_TABLE = "MIMIC_IV_MEDI_ASSIST.STAGING_MIMIC.STG_DISCHARGE"

# Vertex AI endpoint details
PROJECT = "168237001903"
ENDPOINT_ID = "490761517499678720"
LOCATION = "us-central1"

# Initialize Vertex AI Prediction Service client
client_options = {"api_endpoint": f"{LOCATION}-aiplatform.googleapis.com"}
client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)


Python(47283) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


In [17]:
# Adjust input payload format
payload = {
    "instances": [
        {
            "inputs": f"""
            You are an intelligent clinical language model specialized in generating ICD-10 codes.
            Below is a patient's clinical note. Generate only the 5 most relevant ICD-10 codes as a Python list of strings. No explanation or code required in the output.

            Clinical Note:
            {clinical_note}

            Return the output in the following format:
            ["ICD10_CODE_1", "ICD10_CODE_2", "ICD10_CODE_3", "ICD10_CODE_4", "ICD10_CODE_5"]
            """
        }
    ]
}

# Response parsing
response = client.predict(endpoint=endpoint, instances=payload["instances"], parameters=parameters)

# Parse the predictions
if response.predictions:
    raw_output = response.predictions[0].get("content", "").strip()
    try:
        icd_codes = json.loads(raw_output)
        print(f"[INFO] Predicted ICD-10 Codes: {icd_codes}")
    except json.JSONDecodeError:
        print(f"[ERROR] Failed to parse the response: {raw_output}")
else:
    print("[ERROR] No predictions received.")


InternalServerError: 500 {"error":"Incomplete generation","error_type":"Incomplete generation"}