In [42]:
! pip install snowflake-connector-python

Python(77035) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.




---

IMPORTS

---

In [43]:
import snowflake.connector
import pandas as pd
import re
from datetime import datetime
from huggingface_hub import InferenceClient


---

SNOWFLAKE

---

In [44]:
# Snowflake Configuration
SNOWFLAKE_CONFIG = {
    "user": "DOLPHIN",
    "password": "Maapaa@1603",
    "account": "URB63596",
    "warehouse": "ANIMAL_TASK_WH",
    "database": "mimic_iv_medi_assist",
    "schema": "PROD_MIMIC",
}

In [45]:
# SQL Queries
SELECT_QUERY = f"""
SELECT DIS_RECORD_ID, DIS_NOTE_TEXT
FROM MIMIC_IV_MEDI_ASSIST.PROD_MIMIC.DIM_DISCHARGE
WHERE DIS_NOTE_TEXT IS NOT NULL
"""

UNPROCESSED_RECORDS_QUERY = f"""
SELECT d.DIS_RECORD_ID, d.DIS_NOTE_TEXT
FROM MIMIC_IV_MEDI_ASSIST.PROD_MIMIC.DIM_DISCHARGE d
WHERE d.DIS_NOTE_TEXT IS NOT NULL
  AND d.DIS_RECORD_ID NOT IN (
      SELECT DISTINCT l.DIS_RECORD_ID
      FROM MIMIC_IV_MEDI_ASSIST.PROD_MIMIC.DIM_MEDICAL_CODES_LLM l
  )
"""


INSERT_MEDICAL_CODES_QUERY = f"""
INSERT INTO MIMIC_IV_MEDI_ASSIST.PROD_MIMIC.DIM_MEDICAL_CODES_LLM
(DIS_RECORD_ID, DMC_ICD_CODE, CREATED_AT)
VALUES (%s, %s, %s)
"""

UPDATE_SUMMARY_QUERY = f"""
UPDATE MIMIC_IV_MEDI_ASSIST.PROD_MIMIC.DIM_DISCHARGE
SET DIS_NOTE_SUMMARY = %s
WHERE DIS_RECORD_ID = %s
"""


In [46]:
# Helper: Connect to Snowflake
def connect_to_snowflake():
    """Establish a connection to Snowflake."""
    print("Connecting to Snowflake...")
    return snowflake.connector.connect(**SNOWFLAKE_CONFIG)

---
LLM CODE GENERATION

---

In [47]:
# Helper: Call LLM for ICD-10 Code Generation
def call_llm_for_icd(clinical_note):
    """
    Call an LLM to generate ICD-10 codes from a clinical note.

    Args:
        clinical_note (str): Clinical text to analyze.

    Returns:
        list: A list of ICD-10 codes.
    """
    print("Calling LLM for ICD-10 code generation...")

    # Initialize the LLM client
    client = InferenceClient(api_key="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")

    # Define the prompt for ICD-10 code generation
    prompt_medical_code = f"""
        You are an advanced clinical language model specializing in analyzing patient clinical notes to generate accurate ICD-10 codes.
        Carefully analyze the following clinical note and provide the 5 most relevant ICD-10 codes as a Python list of strings. Do not include explanations, additional text, or any code in the output.

        Clinical Note:
        {clinical_note}

        Return the output in this exact format:

        ["ICD10_CODE_1", "ICD10_CODE_2", "ICD10_CODE_3", ..., "ICD10_CODE_5"]
    """

    # LLM API Call
    messages = [{"role": "user", "content": prompt_medical_code}]
    completion = client.chat.completions.create(
        model="meta-llama/Llama-3.3-70B-Instruct",
        messages=messages,
        max_tokens=500
    )

    # Parse the response
    icd_codes = completion.choices[0].message["content"]

    try:
        print("Parsing ICD-10 codes...")
        return eval(icd_codes)  # Convert response to a Python list
    except Exception as e:
        print(f"Error parsing ICD-10 codes: {e}")
        return []


In [48]:
# Main workflow
def process_discharge_notes():
    """Read clinical notes from Snowflake and generate ICD-10 codes."""
    try:
        # Connect to Snowflake
        conn = connect_to_snowflake()
        cursor = conn.cursor()

        print("Fetching unprocessed discharge notes...")
        # Execute the query to fetch clinical notes
        cursor.execute(UNPROCESSED_RECORDS_QUERY)
        records = cursor.fetchall()

        print(f"Fetched {len(records)} records.")

        for record_id, clinical_note in records:
            print(f"Processing Record ID: {record_id}")

            # Generate ICD-10 codes for each clinical note
            icd_codes = call_llm_for_icd(clinical_note)

            # Print the generated ICD-10 codes
            print(f"Record ID: {record_id}")
            print(f"ICD-10 Codes: {icd_codes}\n")

        # Close the connection
        cursor.close()
        conn.close()
        print("Processing completed and connection closed.")

    except Exception as e:
        print(f"Error processing discharge notes: {e}")

In [49]:
# Helper: Clean ICD-10 Codes
def clean_icd_codes(icd_codes):
    """
    Clean ICD-10 codes by removing periods and trimming whitespace.

    Args:
        icd_codes (list): List of raw ICD-10 codes.

    Returns:
        list: Cleaned ICD-10 codes.
    """
    print("Cleaning ICD-10 codes...")
    return [code.replace(".", "").strip() for code in icd_codes]

In [50]:
# Main Workflow
def process_discharge_notes():
    """Process discharge notes to generate and insert ICD-10 codes into Snowflake."""
    try:
        # Connect to Snowflake
        conn = connect_to_snowflake()
        cursor = conn.cursor()

        print("Fetching unprocessed discharge notes...")
        cursor.execute(UNPROCESSED_RECORDS_QUERY)
        records = cursor.fetchall()

        print(f"Fetched {len(records)} records.")

        for dis_record_id, clinical_note in records:
            print(f"Processing Record ID: {dis_record_id}")

            # Generate ICD-10 codes
            raw_icd_codes = call_llm_for_icd(clinical_note)
            cleaned_icd_codes = clean_icd_codes(raw_icd_codes)

            print(f"Raw ICD-10 Codes: {raw_icd_codes}")
            print(f"Cleaned ICD-10 Codes: {cleaned_icd_codes}")

            # Insert cleaned ICD-10 codes into Snowflake
            created_at = datetime.utcnow().isoformat()
            icd_records = [(dis_record_id, code, created_at) for code in cleaned_icd_codes]

            try:
                cursor.executemany(INSERT_MEDICAL_CODES_QUERY, icd_records)
                conn.commit()
                print(f"Inserted ICD-10 codes for Discharge Record ID: {dis_record_id}")
            except Exception as e:
                print(f"Error inserting ICD-10 codes for Discharge Record ID {dis_record_id}: {e}")

        # Close connections
        cursor.close()
        conn.close()
        print("Processing completed. Connection closed.")

    except Exception as e:
        print(f"Error processing discharge notes: {e}")


---
asynchronous - implementation

---

In [51]:
# Run the Workflow
if __name__ == "__main__":
    print("Starting discharge note processing...")
    process_discharge_notes()
    print("Discharge note processing finished.")

Starting discharge note processing...
Connecting to Snowflake...
Fetching unprocessed discharge notes...


KeyboardInterrupt: 