GitHub Repository Link: https://github.com/Fabliha-Bd/AI-HealthCare-LLM/

In [None]:
import pandas as pd
import numpy as np
import openai
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, average_precision_score
from openai import OpenAI


# Data Loading

In [2]:
conditions = pd.read_csv("data/conditions.csv")
print(conditions.head())
medications = pd.read_csv("data/medications.csv")
print(medications.head())
encounters = pd.read_csv("data/encounters.csv")
print(encounters.head())
patients = pd.read_csv("data/patients.csv")
print(patients.head())


        START        STOP                               PATIENT  \
0  2019-02-15  2019-08-01  f0f3bc8d-ef38-49ce-a2bd-dfdda982b271   
1  2019-10-30  2020-01-30  f0f3bc8d-ef38-49ce-a2bd-dfdda982b271   
2  2020-03-01  2020-03-30  f0f3bc8d-ef38-49ce-a2bd-dfdda982b271   
3  2020-03-01  2020-03-01  f0f3bc8d-ef38-49ce-a2bd-dfdda982b271   
4  2020-03-01  2020-03-30  f0f3bc8d-ef38-49ce-a2bd-dfdda982b271   

                              ENCOUNTER       CODE         DESCRIPTION  
0  d5ee30a9-362f-429e-a87a-ee38d999b0a5   65363002        Otitis media  
1  8bca6d8a-ab80-4cbf-8abb-46654235f227   65363002        Otitis media  
2  681c380b-3c84-4c55-80a6-db3d9ea12fee  386661006     Fever (finding)  
3  681c380b-3c84-4c55-80a6-db3d9ea12fee  840544004  Suspected COVID-19  
4  681c380b-3c84-4c55-80a6-db3d9ea12fee  840539006            COVID-19  


# Data Processing

Identifying covid-19 patients

In [6]:
covid_conditions = conditions[
    conditions["DESCRIPTION"].str.contains("COVID-19", case=False, na=False)
]
covid_patient_ids = covid_conditions["PATIENT"].unique()

In [7]:
covid_meds = medications[medications["PATIENT"].isin(covid_patient_ids)]
covid_encounters = encounters[encounters["PATIENT"].isin(covid_patient_ids)]
covid_patients = patients[patients["Id"].isin(covid_patient_ids)]
print(covid_meds.shape)
print(covid_encounters.shape)
print(covid_patients.shape)

(262959, 13)
(205207, 15)
(9106, 25)


Determine survival (DEATHDATE is null = survived)

In [8]:
covid_patients["SURVIVED"] = covid_patients["DEATHDATE"].isna().astype(int)
print(covid_patients.shape)


(9106, 26)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  covid_patients["SURVIVED"] = covid_patients["DEATHDATE"].isna().astype(int)


In [47]:
# Aggregate medications into list
med_summary = covid_meds.groupby("PATIENT")["DESCRIPTION"].apply(lambda x: list(set(x))).reset_index()

# Merge everything into a summary table
summary = covid_patients.merge(med_summary, left_on="Id", right_on="PATIENT", how="left")
summary = summary[["Id", "DESCRIPTION", "SURVIVED"]]
summary.columns = ["PATIENT", "MEDICATIONS", "SURVIVED"]
summary.dropna(subset=["MEDICATIONS"], inplace=True)
print(summary.shape)
print(summary.head())


(6770, 3)
                                PATIENT  \
0  f0f3bc8d-ef38-49ce-a2bd-dfdda982b271   
1  067318a4-db8f-447f-8b6e-f2f61e9baaa5   
2  ae9efba3-ddc4-43f9-a781-f72019388548   
3  199c586f-af16-4091-9998-ee4cfc02ee7a   
5  f58bf921-cba1-475a-b4f8-dc6fa3b8f89c   

                                         MEDICATIONS  SURVIVED  
0  [Amoxicillin 250 MG Oral Capsule, Acetaminophe...         1  
1  [Acetaminophen 160 MG Chewable Tablet, Penicil...         1  
2  [amLODIPine 5 MG / Hydrochlorothiazide 12.5 MG...         1  
3                            [Jolivette 28 Day Pack]         1  
5  [1 ML Enoxaparin sodium 150 MG/ML Prefilled Sy...         1  


In [50]:
min_count = summary['SURVIVED'].value_counts().min()

balanced_summary = summary.groupby("SURVIVED").apply(lambda x: x.sample(n=min_count, random_state=1) if len(x) >= 25 else x)
balanced_summary = balanced_summary.sample(n=50, random_state=1).reset_index(drop=True)

train_df, test_df = train_test_split(balanced_summary, test_size=0.2, random_state=42)
test_df.shape


  balanced_summary = summary.groupby("SURVIVED").apply(lambda x: x.sample(n=min_count, random_state=1) if len(x) >= 25 else x)


(10, 3)

In [68]:
test_df.to_csv("test_df_covid_survival.csv", index=False)
print(test_df.shape)
train_df.to_csv("train_df_covid_survival.csv", index=False)
print(train_df.shape)


(10, 15)
(40, 3)


# openAI gpt calls using multiple techniques

openAI API key setup

In [None]:

client = OpenAI(api_key="use-your-openAIsecretKey-here")

In [89]:
# create prompt
def create_prompt(row):
    meds = ", ".join(row["MEDICATIONS"][:5])
    return f"""Patient was treated for COVID-19 with medications: {meds}.
            Respond only with one word: 'Survived' or 'Did not survive'."""

test_df["LLM_PROMPT"] = test_df.apply(create_prompt, axis=1)


In [91]:
# chain of thought
def create_cot_prompt(row):
    meds = ", ".join(row["MEDICATIONS"][:5])
    return f"""The patient had COVID-19 and was treated with: {meds}.
These medications are commonly used for managing COVID-19 symptoms.
Based on the medications listed, what is the likely outcome for this patient?
Respond with one word : 'Survived' or 'Did not survive'."""

test_df["LLM_PROMPT_COT"] = test_df.apply(create_cot_prompt, axis=1)

In [92]:
# tree of thought
def create_tot_prompt(row):
    meds = ", ".join(row["MEDICATIONS"][:5])
    return f"""Patient was treated for COVID-19 with medications: {meds}.

Step 1: List three possible interpretations of the patient's condition based on the medications.
Step 2: For each interpretation, consider what the outcome might be.
Step 3: Choose the most likely outcome and respond with only one word: 'Survived' or 'Did not survive'."""

test_df["LLM_PROMPT_TOT"] = test_df.apply(create_tot_prompt, axis=1)

In [None]:

def ask_openai(prompt):
    try:
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are a medical assistant helping assess COVID-19 patient outcomes."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.7
        )
        return response.choices[0].message.content.strip()

    except Exception as e:
        return str(e)



In [59]:
# test

llm_outputs = []
for prompt in tqdm(test_df["LLM_PROMPT"], desc="Running GPT calls"):
    llm_outputs.append(ask_openai(prompt))
test_df["LLM_RESPONSE"] = llm_outputs
test_df[["LLM_PROMPT","LLM_RESPONSE","SURVIVED"]]


Running GPT calls:   0%|          | 0/10 [00:00<?, ?it/s]

Unnamed: 0,LLM_PROMPT,LLM_RESPONSE,SURVIVED
13,Patient was treated for COVID-19 with medicati...,Survived,1
39,Patient was treated for COVID-19 with medicati...,Survived,1
30,Patient was treated for COVID-19 with medicati...,Survived,1
45,Patient was treated for COVID-19 with medicati...,"As an AI language model developed by OpenAI, I...",0
17,Patient was treated for COVID-19 with medicati...,Survived,0
48,Patient was treated for COVID-19 with medicati...,"As an AI model developed by OpenAI, I don't ha...",0
26,Patient was treated for COVID-19 with medicati...,Survived,1
25,Patient was treated for COVID-19 with medicati...,Survived,0
32,Patient was treated for COVID-19 with medicati...,Survived,0
19,Patient was treated for COVID-19 with medicati...,Survived,1


In [98]:
test_df["LLM_PROMPT_TOT"][13]

"Patient was treated for COVID-19 with medications: NDA020800 0.3 ML Epinephrine 1 MG/ML Auto-Injector, Fexofenadine hydrochloride 30 MG Oral Tablet, Seasonique 91 Day Pack.\n\nStep 1: List three possible interpretations of the patient's condition based on the medications.\nStep 2: For each interpretation, consider what the outcome might be.\nStep 3: Choose the most likely outcome and respond with only one word: 'Survived' or 'Did not survive'."

In [60]:
# few shots
few_shot_intro = """
Example 1:
Patient was treated for COVID-19 with medications: Acetaminophen, Amoxicillin.
Answer: Survived

Example 2:
Patient was treated for COVID-19 with medications: Ibuprofen, Penicillin.
Answer: Did not survive

Now answer the following with one word: 'Survived' or 'Did not Survive' 
"""
test_df["FEW_SHOT_PROMPT"] = few_shot_intro + test_df["LLM_PROMPT"]
few_shot_outputs = []
for prompt in tqdm(test_df["FEW_SHOT_PROMPT"], desc="Running Few-shot GPT calls"):
    few_shot_outputs.append(ask_openai(prompt))
test_df["LLM_FEW_SHOT_RESPONSE"] = few_shot_outputs

test_df[["FEW_SHOT_PROMPT","LLM_FEW_SHOT_RESPONSE","SURVIVED"]]

Running Few-shot GPT calls:   0%|          | 0/10 [00:00<?, ?it/s]

Unnamed: 0,FEW_SHOT_PROMPT,LLM_FEW_SHOT_RESPONSE,SURVIVED
13,\nExample 1:\nPatient was treated for COVID-19...,Survived,1
39,\nExample 1:\nPatient was treated for COVID-19...,Survived,1
30,\nExample 1:\nPatient was treated for COVID-19...,Survived,1
45,\nExample 1:\nPatient was treated for COVID-19...,Survived,0
17,\nExample 1:\nPatient was treated for COVID-19...,Survived,0
48,\nExample 1:\nPatient was treated for COVID-19...,Survived,0
26,\nExample 1:\nPatient was treated for COVID-19...,Survived,1
25,\nExample 1:\nPatient was treated for COVID-19...,Survived,0
32,\nExample 1:\nPatient was treated for COVID-19...,Survived,0
19,\nExample 1:\nPatient was treated for COVID-19...,Survived,1


In [61]:
# chain of thoughts
cot_outputs = []
for prompt in tqdm(test_df["LLM_PROMPT_COT"], desc="Running CoT GPT calls"):
    cot_outputs.append(ask_openai(prompt))
test_df["LLM_COT_RESPONSE"] = cot_outputs
test_df[["LLM_PROMPT_COT", "LLM_COT_RESPONSE", "SURVIVED"]]

Running CoT GPT calls:   0%|          | 0/10 [00:00<?, ?it/s]

Unnamed: 0,LLM_PROMPT_COT,LLM_COT_RESPONSE,SURVIVED
13,The patient had COVID-19 and was treated with:...,Survived,1
39,The patient had COVID-19 and was treated with:...,Did not survive,1
30,The patient had COVID-19 and was treated with:...,"The medications listed, Ferrous sulfate and Ca...",1
45,The patient had COVID-19 and was treated with:...,Survived,0
17,The patient had COVID-19 and was treated with:...,"As an AI, I don't have real-time access to ind...",0
48,The patient had COVID-19 and was treated with:...,Survived,0
26,The patient had COVID-19 and was treated with:...,Survived,1
25,The patient had COVID-19 and was treated with:...,Survived,0
32,The patient had COVID-19 and was treated with:...,"As an AI, I don't have the ability to predict ...",0
19,The patient had COVID-19 and was treated with:...,'Survived',1


In [62]:
# tree of thoughts
tot_outputs = []
for prompt in tqdm(test_df["LLM_PROMPT_TOT"], desc="Running ToT GPT calls"):
    tot_outputs.append(ask_openai(prompt))
test_df["LLM_TOT_RESPONSE"] = tot_outputs
print(test_df[["LLM_PROMPT_TOT", "LLM_TOT_RESPONSE", "SURVIVED"]])
print(test_df['LLM_TOT_RESPONSE'][30])

Running ToT GPT calls:   0%|          | 0/10 [00:00<?, ?it/s]

In [63]:
# print(test_df[["LLM_PROMPT_TOT", "LLM_TOT_RESPONSE", "SURVIVED"]])
# print(test_df['LLM_TOT_RESPONSE'][30])

                                       LLM_PROMPT_TOT  \
13  Patient was treated for COVID-19 with medicati...   
39  Patient was treated for COVID-19 with medicati...   
30  Patient was treated for COVID-19 with medicati...   
45  Patient was treated for COVID-19 with medicati...   
17  Patient was treated for COVID-19 with medicati...   
48  Patient was treated for COVID-19 with medicati...   
26  Patient was treated for COVID-19 with medicati...   
25  Patient was treated for COVID-19 with medicati...   
32  Patient was treated for COVID-19 with medicati...   
19  Patient was treated for COVID-19 with medicati...   

                                     LLM_TOT_RESPONSE  SURVIVED  
13  Step 1: \n1. The patient might have experience...         1  
39  Step 1: \na) The patient might have cancer: Le...         1  
30  Step 1:\n1. The patient might have been anemic...         1  
45  Step 1: \n1. The patient might have been suffe...         0  
17  Step 1:\n1) The patient has severe COV

In [93]:
# cleaning the response as some responses are large and ambigous, keeping the response in one word
def clean_response(text):
    text = text.lower().strip()
    if "did not survive" in text:
        return 0
    elif "survived" in text:
        return 1
    return -1


# Evaluation

In [65]:
test_df["LLM_CLEAN"] = test_df["LLM_RESPONSE"].apply(clean_response)
test_df["LLM_FEW_SHOT_CLEAN"] = test_df["LLM_FEW_SHOT_RESPONSE"].apply(clean_response)
test_df["LLM_COT_CLEAN"] = test_df["LLM_COT_RESPONSE"].apply(clean_response)
test_df["LLM_TOT_CLEAN"] = test_df["LLM_TOT_RESPONSE"].apply(clean_response)


true_labels = test_df["SURVIVED"]
pred_llm = test_df["LLM_CLEAN"]
pred_few = test_df["LLM_FEW_SHOT_CLEAN"]
pred_cot = test_df["LLM_COT_CLEAN"]
pred_tot = test_df["LLM_TOT_CLEAN"]



In [66]:
test_df[['SURVIVED','LLM_RESPONSE','LLM_CLEAN', 'LLM_FEW_SHOT_RESPONSE','LLM_FEW_SHOT_CLEAN','LLM_COT_CLEAN', 'LLM_TOT_CLEAN']]

Unnamed: 0,SURVIVED,LLM_RESPONSE,LLM_CLEAN,LLM_FEW_SHOT_RESPONSE,LLM_FEW_SHOT_CLEAN,LLM_COT_CLEAN,LLM_TOT_CLEAN
13,1,Survived,1,Survived,1,1,1
39,1,Survived,1,Survived,1,0,1
30,1,Survived,1,Survived,1,-1,1
45,0,"As an AI language model developed by OpenAI, I...",-1,Survived,1,1,0
17,0,Survived,1,Survived,1,-1,0
48,0,"As an AI model developed by OpenAI, I don't ha...",-1,Survived,1,1,0
26,1,Survived,1,Survived,1,1,1
25,0,Survived,1,Survived,1,1,0
32,0,Survived,1,Survived,1,-1,0
19,1,Survived,1,Survived,1,1,1


In [67]:
auroc_llm = roc_auc_score(true_labels, pred_llm)
auprc_llm = average_precision_score(true_labels, pred_llm)
auroc_few = roc_auc_score(true_labels, pred_few)
auprc_few = average_precision_score(true_labels, pred_few)
auroc_cot = roc_auc_score(true_labels, pred_cot)
auprc_cot = average_precision_score(true_labels, pred_cot)
auroc_tot = roc_auc_score(true_labels, pred_tot)
auprc_tot = average_precision_score(true_labels, pred_tot)

print(f"LLM AUROC: {auroc_llm:.3f}, AUPRC: {auprc_llm:.3f}")
print(f"Few-shot AUROC: {auroc_few:.3f}, AUPRC: {auprc_few:.3f}")
print(f"CoT AUROC: {auroc_cot:.3f}, AUPRC: {auprc_cot:.3f}")
print(f"ToT AUROC: {auroc_tot:.3f}, AUPRC: {auprc_tot:.3f}")

LLM AUROC: 0.700, AUPRC: 0.625
Few-shot AUROC: 0.500, AUPRC: 0.500
CoT AUROC: 0.540, AUPRC: 0.514
ToT AUROC: 1.000, AUPRC: 1.000


# Prediction of survivality of covid patients using chatgpt embeddings and Logistic Regression

In [80]:
def generate_embeddings(text, model="text-embedding-ada-002"):
    response = client.embeddings.create(input=text, model=model)
    return response.data[0].embedding

In [81]:
train_df["TEXT"] = train_df["MEDICATIONS"].apply(lambda meds: ", ".join(meds[:10]))
test_df["TEXT"] = test_df["MEDICATIONS"].apply(lambda meds: ", ".join(meds[:10]))

In [84]:
# Generate embeddings 
train_embeddings = []
for text in tqdm(train_df["TEXT"], desc="Generating train embeddings"):
    train_embeddings.append(generate_embeddings(text))

test_embeddings = []
for text in tqdm(test_df["TEXT"], desc="Generating test embeddings"):
    test_embeddings.append(generate_embeddings(text))

import numpy as np
X_train = np.array(train_embeddings)
y_train = train_df["SURVIVED"].values

X_test = np.array(test_embeddings)
y_test = test_df["SURVIVED"].values

Generating train embeddings:   0%|          | 0/40 [00:00<?, ?it/s]

Generating test embeddings:   0%|          | 0/10 [00:00<?, ?it/s]

In [88]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score

clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)

# Predict
y_pred = clf.predict(X_test)
y_prob = clf.predict_proba(X_test)[:,1]
auroc = roc_auc_score(y_test, y_prob)
auprc = average_precision_score(y_test, y_prob)
print('\nAUROC:', auroc, '\nAUPRC', auprc)


AUROC: 0.8 
AUPRC 0.835
