In [1]:
# Initial setup
import os
from openai import OpenAI
import json
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd
from pathlib import Path
import pickle

import pyodbc
import warnings
warnings.filterwarnings("ignore", message="pandas only supports SQLAlchemy connectable")

from sklearn.metrics import confusion_matrix, recall_score, precision_score, accuracy_score, f1_score
import matplotlib.pyplot as plt 
import seaborn as sns

In [2]:
# Set up local configuration for Llama 3
client = OpenAI(
    base_url = "http://localhost:1234/v1",
    api_key = 'lm-studio'
)
deployment_name = 'bartowski/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct-Q8_0.gguf'

In [None]:
# Test LLM
start_phrase = 'Tagline for an ice cream shop'
response = client.chat.completions.create(model=deployment_name, messages=[{"role":"user", "content":start_phrase}], max_tokens=10)
print(response.choices[0].message.content)

In [24]:
# Part 1 of the prompt for the LLM (entered in as the system context)
system_context = """This prompt consists of two sections: "Instructions" and "Input," which contains a clinical note for you to parse.

Section 1: Instructions

Based on the past medical history data of the clinical note given in the Input Section, estimate the value and the certainty degree (CD: 0.00 to 1.00) for the following immunosuppression attributes with allowed values in the bracket:

solid organ transplant: [yes, no]
stem cell transplant: [yes, no]
HIV: [yes, no]
acute leukemia: [yes, no]
lymphoma: [yes, no]
multiple myeloma: [yes, no]
immunoglobulin deficiency: [yes, no]

Please follow the requirements below:

1. For each attribute, create 3 key-value pairs:
<attribute_estimation>: <the estimated value based on reading the note>
<attribute_CD>: <the certainty degree of your estimation: [0.00, 1.00]>
<attribute_evidence>: <the supporting evidence for your estimation>

2. Please make sure to make valid inference for attribute estimate based on evidence. If there is no available evidence provided to make estimation, please answer the value as "unknown".

3. Please make sure to output the whole set of answers together as a single JSON file, and don't output anything beyond the required JSON file."""

# Purpose: get the LLM response for a given prompt and the system_context defined above
# Params:
#   prompt: string, prompt to LLM (as user, beyond provided system_context)
# Produces:
#   response.choices[0].message.content: string, the LLM's response
#   response.usage.total_tokens: number, number of total tokens
#   response.usage.prompt_tokens: number, number of tokens in the prompt
#   response.usage.completion_tokens: number, number of tokens in the response
def get_completion(prompt):
    messages = [
        {
            "role": "system",
            "content": system_context
        },
        {
            "role": "user",
            "content": prompt
        }
    ]

    # Parameters to use
    response = client.chat.completions.create(
        model = deployment_name,
        messages = messages,
        temperature = 0,
        max_tokens = 4000,
        top_p = 1,
        frequency_penalty = 0,
        presence_penalty = 0
    )

    return response.choices[0].message.content, response.usage.total_tokens, response.usage.prompt_tokens, response.usage.completion_tokens

# Purpose: use get_completion() from above to process one row of the notes dataframe containing one note
# Params:
#   row: row of a dataframe, has study ID, note ID, note text
# Produces:
#   result, dictionary with LLM completion status, total tokens, prompt tokens, completion tokens, study ID, note ID, json-translated response
def process_row(row):
    success = False
    num_tries = 0
    
    # Don't try more than 3 times if something is going wrong
    while num_tries < 3 and success == False:
        prompt = "Section 2: Input\n\n" + row['text']
        try:
            # Get the LLM response and reformat as needed
            response, total_tokens, prompt_tokens, completion_tokens = get_completion(prompt)
            success = True
            # Store LLM output
            result = {"Status": 1, "Total Tokens": str(total_tokens), "Prompt Tokens": str(prompt_tokens), "Completion Tokens": str(completion_tokens), "SCRIPT_study_id": row['SCRIPT_study_id'], "note_id": row['note_id'], "response": str(response)}
            print ("Current id: " + str(row['note_id']) + " | tokens: " + str(total_tokens))

            file_path = r".\temp_" + datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + "_" + str(row['note_id']) + ".pkl"
            # Save each row output into a file in a temp folder as we go
            with open(file_path, 'wb') as file:
                pickle.dump(result, file)
                
            return result

        except Exception as e:
            print(f"Error processing note {row['note_id']}: {e}")
            num_tries = num_tries + 1
    
    if num_tries >= 3:
        print(str(row['note_id'])+ ": failed note")
        result = {"Status": 0, "note_id": row['note_id']}
        return result

In [7]:
# Redefine system context if doing meds
system_context = """This prompt consists of two sections: "Instructions" and "Input," which contains a clinical note for you to parse.

Section 1: Instructions

Based on the current medication use information from the clinical note given in the Input Section, estimate the value and the certainty degree (CD: 0.00 to 1.00) for the current/ongoing use of the following immunosuppressive medications with allowed values in the bracket:

azathioprine (includes azathioprine, imuran, azasan): [yes, no]
corticosteroids (only includes oral or IV steroids greater than 5 mg per day): [yes, no]
cyclosporine (includes cyclosporine, neoral, sandimmune, gengraf): [yes, no]
cyclophosphamide (includes cyclophosphamide, cytoxan): [yes, no]
mycophenolate (includes mycophenolate, cellcept, myfortic): [yes, no]
myelosuppressive chemotherapy: [yes, no]
rituximab (includes rituximab, rituxan, truxima, ruxience, riabni): [yes, no]
tacrolimus (includes tacrolimus, prograf, advagraf, astagraf, envarsus, hecoria): [yes, no]

Please follow the requirements below:

1. For each attribute, create 3 key-value pairs:
<attribute_estimation>: <the estimated value based on reading the note>
<attribute_CD>: <the certainty degree of your estimation: [0.00, 1.00]>
<attribute_evidence>: <the supporting evidence for your estimation>

2. Please make sure to make valid inference for attribute estimate based on evidence. If there is no available evidence provided to make estimation, please answer the value as "unknown".

3. Please make sure to output the whole set of answers together as a single JSON file, and don't output anything beyond the required JSON file."""

In [None]:
# Purpose: read in all notes from a folder and put together with corresponding metadata
# Params:
#   file: Path, path to metadata file
# Produces:
#   notes_df: dataframe, has study ID, note ID, note text
def load_notes(file):
    metadata_df = pd.read_excel(file)
    data = []
    # Use a raw string (r'') to avoid issues with backslashes in the file path
    folder_path = r'PATH TO FOLDER CONTAINING NOTES'
    
    for _, row in metadata_df.iterrows():
        note_id = row['note_id']
        script_study_id = row['SCRIPT_study_id']
        file_path = os.path.join(folder_path, f'note_{note_id}.txt')
        try:
            with open(file_path, 'r', encoding='utf-8') as file:
                text = file.read()
                data.append({'SCRIPT_study_id': script_study_id, 'note_id': note_id, 'text': text, 'report_type': row['report_type']})
        except FileNotFoundError:
            print(f'File not found: {file_path}')  # Handle missing files
    
    notes_df = pd.DataFrame(data)
    return notes_df

# Read in all the notes
hpi_df = load_notes(Path(r'PATH TO NOTES METADATA'))

In [None]:
# Process each row sequentially
results = []
for _, row in hpi_df.iterrows():
    result = process_row(row)
    if result is not None:
        results.append(result)

# Convert results to DataFrame and save to CSV
results_df = pd.DataFrame(results)
results_df.to_csv(r"./results.csv", index = False)

In [10]:
# Use/modify as needed to get results into the proper format
def process_immunosuppression_data(row):
    
    json_string = row['response']

    # Extract the JSON part from the string
    json_start = json_string.find('{')
    json_end = json_string.rfind('}') + 1
    json_data = json_string[json_start:json_end]

    # Parse the JSON data
    try:
        data = json.loads(json_data)
    except json.JSONDecodeError:
        processed_data = row.to_dict()
        df = pd.DataFrame([processed_data])
        return df

    # Initialize a dictionary to store the processed data
    processed_data = row.to_dict()

    # Process each condition
    for condition, attributes in data.items():
        # Check if all required attributes are present
        required_attrs = [('attribute_estimation', 'value'), 
                          ('attribute_CD', 'CD'), 
                          ('attribute_evidence', 'evidence')]
        
        for attr, alt_name in required_attrs:
            if attr not in attributes and alt_name not in attributes:
                processed_data = row.to_dict()
                df = pd.DataFrame([processed_data])
                return df
        
        # Add the attributes to the processed data
        for attr, alt_name in required_attrs:
            key = attr if attr in attributes else alt_name
            column_name = f"{condition}_{attr}"
            processed_data[column_name] = attributes[key]

    # Create a DataFrame with a single row
    df = pd.DataFrame([processed_data])

    return df

In [44]:
# Establish SQL server connection
SERVER = 'INSERT SERVER NAME'
conn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};SERVER='+SERVER,
                      Trusted_Connection='Yes')

conn.autocommit = True
cursor = conn.cursor()

In [None]:
# Replace yes/no/unk with 1/0/0
results_df = results_df.replace({'yes': 1, 'no': 0, 'unknown': 0})

# Get just the study ID and pred columns so agg works as expected
predictions_df = results_df[['SCRIPT_study_id'] + [col for col in results_df.columns if col.endswith('attribute_estimation')]]
predictions_df = predictions_df.rename(columns=lambda x: x.replace('_attribute_estimation', '_pred') if '_attribute_estimation' in x else x)

In [46]:
# Purpose:
#   generate a confusion matrix for LLM identification of a given immunosuppressive condition and return operating characteristics
# Params:
#   predictions_df: dataframe, has study ID and all pred columns
#   immune_condition: string, how the immunosuppressive condition is named in the prediction column
#   gold_label: string, how the immunosuppressive condition is named in the gold standard column
#   show_confusion_matrix: boolean, True makes confusion matrix be displayed, False suppresses it
#   is_corrections_applied: boolean, True applies manual chart review corrections to gold standard labels, False leaves it as it is
#   return_metrics_instead: boolean, True returns metrics_df (see below), False returns final_df (see below)
# Produces:
#   final_df: dataframe, has study ID, all immune condition predictions, all immune condition gold labels
#   metrics_df: dataframe, has all calculated metrics
def generate_cm(predictions_df, immune_condition, gold_label, show_confusion_matrix = True, return_metrics_instead = False):

    # Load in the IC label data
    # Query for SCRIPT 1.0 patients
    redcap='''
            SET NOCOUNT ON
            drop table if exists #imc
            select * 
            into #imc 
            from (
            select distinct pt_study_id,type_immunocomp
            from FSM_SCRIPT.fsm_script_redcap_dm.redcap_PROJECTID_demographics
            ) x

            select pt_study_id,
                case when type_immunocomp like '%Acute leukemia%' then 1 end as Leukemia,
                case when type_immunocomp like '%Azathioprine%' then 1 end as Azathioprine,
                case when type_immunocomp like '%Chronic corticosteroids%' then 1 end as Chronic_corticosteroids,
                case when type_immunocomp like '%Cyclosporine%' then 1 end as Cyclosporine,
                case when type_immunocomp like '%Cytoxan%' then 1 end as Cytoxan,
                case when type_immunocomp like '%HIV%' then 1 end as HIV,
                case when type_immunocomp like '%Immunoglobulin deficiency%' then 1 end as Immunoglobulin_deficiency,
                case when type_immunocomp like '%Lymphoma%' then 1 end as Lymphoma,
                case when type_immunocomp like '%Mycophenolate (MMF)%' then 1 end as Mycophenolate,
                case when type_immunocomp like '%Multiple myeloma%' then 1 end as Myeloma,
                case when type_immunocomp like '%Myelosuppressive chemotherapy%' then 1 end as Myelosuppressive_chemo,
                case when type_immunocomp like '%Rituximab%' then 1 end as Rituximab,
                case when type_immunocomp like '%Solid organ transplant%' then 1 end as SOT,
                case when type_immunocomp like '%Stem cell transplant%' then 1 end as Stem_cell_transplant,
                case when type_immunocomp like '%Tacrolimus%' then 1 end as Tacrolimus
            from #imc
            '''
    
    # Same query as above but for SCRIPT 2 patients
    redcap2='''
            SET NOCOUNT ON
            drop table if exists #imc2
            select * 
            into #imc2
            from (
            select distinct record_id,emr_ic_type
            from FSM_SCRIPT.fsm_script_redcap_dm.redcap_PROJECTID_emr_info
            ) x

            select record_id as pt_study_id,
                case when emr_ic_type like '%Acute leukemia%' then 1 end as Leukemia,
                case when emr_ic_type like '%Azathioprine%' then 1 end as Azathioprine,
                case when emr_ic_type like '%Chronic corticosteroids%' then 1 end as Chronic_corticosteroids,
                case when emr_ic_type like '%Cyclosporine%' then 1 end as Cyclosporine,
                case when emr_ic_type like '%Cytoxan%' then 1 end as Cytoxan,
                case when emr_ic_type like '%HIV%' then 1 end as HIV,
                case when emr_ic_type like '%Immunoglobulin deficiency%' then 1 end as Immunoglobulin_deficiency,
                case when emr_ic_type like '%Lymphoma%' then 1 end as Lymphoma,
                case when emr_ic_type like '%Mycophenolate (MMF)%' then 1 end as Mycophenolate,
                case when emr_ic_type like '%Multiple myeloma%' then 1 end as Myeloma,
                case when emr_ic_type like '%Myelosuppressive chemotherapy%' then 1 end as Myelosuppressive_chemo,
                case when emr_ic_type like '%Rituximab%' then 1 end as Rituximab,
                case when emr_ic_type like '%Solid organ transplant%' then 1 end as SOT,
                case when emr_ic_type like '%Stem cell transplant%' then 1 end as Stem_cell_transplant,
                case when emr_ic_type like '%Tacrolimus%' then 1 end as Tacrolimus
            from #imc2
            '''
    # This dataframe will have study ID and then every immunosuppressive condition listed above and named as above
    labels_df_1 = pd.read_sql(redcap, conn)
    labels_df_1['pt_study_id'] = labels_df_1['pt_study_id'].astype(int)

    # Same dataframe as above but for SCRIPT 2 patients
    labels_df_2 = pd.read_sql(redcap2, conn)
    labels_df_2['pt_study_id'] = labels_df_2['pt_study_id'].astype(int)

    # Put together the labels for SCRIPT 1 and SCRIPT 2
    labels_df = pd.concat([labels_df_1, labels_df_2], ignore_index=True)

    # Inner join prediction_df to labels_df to put predictions and labels together (TODO: in ICD version it's right join, make sure this doesn't matter)
    final_df = pd.merge(left = predictions_df, right = labels_df, left_on = 'SCRIPT_study_id', right_on = 'pt_study_id', how = 'inner')
    final_df = final_df.fillna(0)

    # Make sure the types match for confusion matrix input
    final_df[gold_label] = final_df[gold_label].astype(int)
    final_df[immune_condition] = final_df[immune_condition].astype(int)

    # Create and display the confusion matrix
    cm = confusion_matrix(final_df[gold_label], final_df[immune_condition], labels = [0, 1])
    
    # Calculate metrics
    tn, fp, fn, tp = cm.ravel()
    sensitivity = recall_score(final_df[gold_label], final_df[immune_condition])
    specificity = tn / (tn + fp)
    ppv = precision_score(final_df[gold_label], final_df[immune_condition])
    npv = tn / (tn + fn)
    accuracy = accuracy_score(final_df[gold_label], final_df[immune_condition])
    f1 = f1_score(final_df[gold_label], final_df[immune_condition])

    # Draw confusion matrix
    cm1 = cm[::-1, ::-1]

    # Only display the CM if asked for
    if show_confusion_matrix:
        plt.figure(figsize=(6, 6))
        sns.heatmap(cm1, annot=True, fmt='d', cmap='Purples',
                xticklabels=['LLM 1', 'LLM 0'],
                yticklabels=['REDCap 1', 'REDCap 0'])
        plt.title(f'Confusion Matrix - {immune_condition}')
        plt.ylabel('Actual')
        plt.xlabel('Predicted')
        plt.show()
        #plt.savefig(f'confusion_matrix_{comparison_name}.png')
        #plt.close()

    # Create DataFrame with metrics
    metrics_df = pd.DataFrame({
        'Comparison': [immune_condition],
        'True Negative': [tn],
        'False Positive': [fp],
        'False Negative': [fn],
        'True Positive': [tp],
        'Sensitivity': [sensitivity],
        'Specificity': [specificity],
        'PPV': [ppv],
        'NPV': [npv],
        'Accuracy': [accuracy],
        'F1 Score': [f1]
    })

    # And only print the metrics if asked for
    if show_confusion_matrix:
        print(metrics_df)

    if return_metrics_instead:
        return metrics_df
    else:
        return final_df


In [None]:
# For doing conditions

pred_list = ['solid_organ_transplant_pred', 'stem_cell_transplant_pred', 'HIV_pred', 'acute_leukemia_pred', 'lymphoma_pred', 'multiple_myeloma_pred', 'immunoglobulin_deficiency_pred']
gold_list = ['SOT', 'Stem_cell_transplant', 'HIV', 'Leukemia', 'Lymphoma', 'Myeloma', 'Immunoglobulin_deficiency']

# Run generate_cm() for all the conditions with corrections_applied
for pred, gold in zip(pred_list, gold_list):
    generate_cm(predictions_df, pred, gold, is_corrections_applied = True)

In [None]:
# For doing meds

pred_list = ['azathioprine_pred', 'corticosteroids_pred', 'cyclosporine_pred', 'cyclophosphamide_pred', 'mycophenolate_pred', 'myelosuppressive_chemotherapy_pred', 'rituximab_pred', 'tacrolimus_pred']
gold_list = ['Azathioprine', 'Chronic_corticosteroids', 'Cyclosporine', 'Cytoxan', 'Mycophenolate', 'Myelosuppressive_chemo', 'Rituximab', 'Tacrolimus']

# Run generate_cm() for all the conditions with corrections_applied
for pred, gold in zip(pred_list, gold_list):
    generate_cm(predictions_df, pred, gold, is_corrections_applied = True)

In [None]:
# Compile corrected metrics

# Initialize an empty list to store individual metric DataFrames
metrics_list = []

for pred, gold in zip(pred_list, gold_list):
    # Generate metrics for each prediction
    metrics_df = generate_cm(predictions_df, pred, gold, show_confusion_matrix=False, is_corrections_applied=True, return_metrics_instead=True)
    # Append the metrics DataFrame to the list
    metrics_list.append(metrics_df)

# Concatenate all individual metric DataFrames into a single DataFrame
final_metrics_df = pd.concat(metrics_list, ignore_index=True)

# Save metrics as a CSV in this directory for subsequent figure generation
final_metrics_df.to_csv(Path(f'metrics.csv'), index = False)