In [1]:
from langchain_openai import AzureChatOpenAI
from langchain.prompts import ChatPromptTemplate
import os

import truststore
truststore.inject_into_ssl()
import numpy as np
import pandas as pd
from tqdm import tqdm
import time
import seaborn as sns
import numpy as np
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
os.environ["APP_CLIENT_ID"] = "zizhang-chen-research-app"
os.environ["APP_CLIENT_SECRET"] = "cQPcueFP7tDrimbf8NW2GAHcHeQa"
from llm_idam_token_generator.idam_token_generator import get_idam_token
from langchain_community.callbacks import get_openai_callback
from langchain_openai import OpenAI
import tiktoken

In [2]:
end_point_list = [
    'https://lmaas-beta.ai.gehealthcare.com',
    "https://openai-llm-frontdoor-hma7evbthrd4cugn.a01.azurefd.net"
]

model_list = ["gpt-35-turbo-16k", "gpt-4-32k-beta", "gpt-4o"]

# APIM_KEY = "8b96051ed6b84e4dad762fdc9f8c809e"

OPENAI_ENDPOINT = end_point_list[0]
OPENAI_DEPLOYMENT_MODEL = model_list[1]
OPENAI_AZURE_API_VERSION = "2023-12-01-preview"
# OPENAI_AZURE_API_VERSION = '2024-02-01'
OPENAI_TYPE="azure"

In [3]:
# help(AzureChatOpenAI)

llm = AzureChatOpenAI(
    api_key="xxx",  # This is not playing any role, but required as per OpenAI sdk. So any random could be passed.
    azure_endpoint=OPENAI_ENDPOINT,
    deployment_name=OPENAI_DEPLOYMENT_MODEL,
    openai_api_version=OPENAI_AZURE_API_VERSION,
    n = 2,
    default_headers={
        'Authorization': f'Bearer {get_idam_token()}',
        'Content-Type': 'application/json'
    }
)

PID:49922 INFO llm_idam_token_generator.idam_token_generator - Client ID: zizhang-chen-research-app - Generating new token.
PID:49922 INFO llm_idam_token_generator.idam_token_generator - Client ID: zizhang-chen-research-app - All required environment variables are present.
PID:49922 INFO llm_idam_token_generator.idam_token_generator - Client ID: zizhang-chen-research-app - IDAM Access Token is generated
PID:49922 INFO llm_idam_token_generator.idam_token_generator - Client ID: zizhang-chen-research-app - IDAM Exchange Access Token is generated


In [4]:
df = pd.read_csv('data/operation_patient_description.csv')
i_list = pd.read_csv(
    'results/gpt4/baseline/ICU_baseline.csv')['orig_index'].values
df = df.loc[i_list]
df = df.reset_index(drop=True)

gt_list = df.loc[:, ['icu_label', 'los_label', 'readmin_label']].values.astype(
    int)
gt_list = pd.DataFrame(gt_list)

description_list = df['description']
general_operation_list = [i.split('_')[0] for i in df.columns[5:]]

general_operation_list = [
    "Predict whether a patient’s total length of stay during a visit to the hospital will be at least 7 days. Answer 'Yes' if the patient will stay at least 7 days, 'No' otherwise.",
    "Predict whether a patient will be transferred to the ICU during a visit to the hospital. Answer 'Yes' if the patient will be transferred to the ICU, 'No' otherwise.",
    "Predict whether a patient will be re-admitted to the hospital within 30 days after being discharged from a visit. Answer 'Yes' if the patient will be re-admitted, 'No' otherwise."
]

In [5]:
def split_string_into_two_parts(text):
    lines = text.split('\n')
    first_part = lines[:1]
    second_part = lines[1:]

    first_part_string = '\n'.join(first_part)
    second_part_string = '\n'.join(second_part)

    return first_part_string, second_part_string

def combin_prompt_cot(general_prompt, medical_event_combined, template_events_answer_style):
    template_by_task = '**Task:**\n' + general_prompt + '\n'

    person_info, medical_event = split_string_into_two_parts(medical_event_combined)

    template_info = '**Patient age information:**\n' + person_info + '\n'

    template_events = '**Medical Events:**\n' + medical_event + '\n'

    return template_by_task + '\n' + '{events}' , template_info + '\n' + template_events + '\n' + template_events_answer_style

def calculate_row_entropy(df):
    def entropy(row):
        counts = row.value_counts(normalize=True)
        return -np.sum(counts * np.log2(counts + np.finfo(float).eps))

    return df.apply(entropy, axis=1)

def num_tokens_from_string(string, encoding_model = 'gpt-3.5-turbo') -> int:
    encoding = tiktoken.encoding_for_model(encoding_model)
    num_tokens = len(encoding.encode(string))
    return num_tokens


def generate_prompt_task(general_operation_list):

    multi_task_prompt = '\n'.join(general_operation_list)

    task = f"You are an experienced doctor. Based on the provided patient age, medical events starting at the admission date, and other predictions (Long length of stay, ICU transfer, Readmission) you made at the same time. Use your medical knowledge and reasoning to: \n{multi_task_prompt} right after the given medical events.\nPlease only answer with 'Yes' or 'No'. Yes if the patient lab results come back as normal, No if the patient lab results remain moderate or high. Please respond 'Yes' if it is at all plausible, only use 'No' if absolutely certain otherwise."
    return task


def generate_anwer_requirement():
    template_events_answer_style = "**Answer requirement with task names:**\n"

    answer_requirements = "Please only answer with 'Yes' or 'No'"

    general_operation_list = [
        "Predict whether a patient’s total length of stay during a visit to the hospital will be at least 7 days. Answer 'Yes' if the patient will stay at least 7 days, 'No' otherwise. Answer with task name: 'Long length of stay'",
        "Predict whether a patient will be transferred to the ICU during a visit to the hospital. Answer 'Yes' if the patient will be transferred to the ICU, 'No' otherwise. Answer with task name: 'ICU transfer'",
        "Predict whether a patient will be re-admitted to the hospital within 30 days after being discharged from a visit. Answer 'Yes' if the patient will be re-admitted, 'No' otherwise. Answer with task name: 'Readmission'"
    ]

    modified_list = [s + " " + answer_requirements for s in general_operation_list]

    final_string_requirements = '\n'.join(modified_list)

    final_string_requirements = template_events_answer_style + final_string_requirements

    example_answer = "\n\n**Answer example:**\n"

    example_prediction = "Long length of stay: Yes\n"+ "ICU transfer: No\n" + "Readmission: No\n"

    final_string_example = example_answer + example_prediction

    final_string = final_string_requirements + final_string_example

    return final_string

In [6]:
out_csv_names = 'multi_task_general_operation.csv'
with get_openai_callback() as cb:

    answer_list_all = []

    for i in tqdm(range(len(df))):
        # for i in tqdm(range(50, 100)):
        # for i in tqdm(range(1)):
        row = df.loc[i, :]
        answer_list_5 = []
        for _ in range(5):
            # for _ in range(1):
            general_prompt = generate_prompt_task(general_operation_list)
            current_description = description_list[i]
            anwer_requirement = generate_anwer_requirement()
            c, e = combin_prompt_cot(
                general_prompt=general_prompt,
                medical_event_combined=current_description,
                template_events_answer_style=anwer_requirement)
            prompt = ChatPromptTemplate.from_template(c)
            chain = prompt | llm
            result = chain.invoke({'events': e})
            answer_list_5.append(result.content)
        answer_list_all.append(answer_list_5)
    df_pred = pd.DataFrame(answer_list_all)
    df_pred.columns = ['pred_1', 'pred_2', 'pred_3', 'pred_4', 'pred_5']
    df_pred = pd.concat([df_pred, gt_list], axis=1)
    # df_pred.to_csv(os.path.join('results/gpt35_new/cross_task_general_operation/',
    #                             out_csv_names),
    #                index=False)
    df_pred.to_csv(os.path.join('results/gpt4_new/cross_task_general_operation/', out_csv_names), index=False)

  0%|          | 0/100 [00:00<?, ?it/s]

PID:49922 INFO httpx - Client ID: zizhang-chen-research-app - HTTP Request: POST https://lmaas-beta.ai.gehealthcare.com/openai/deployments/gpt-4-32k-beta/chat/completions?api-version=2023-12-01-preview "HTTP/1.1 200 OK"
PID:49922 INFO httpx - Client ID: zizhang-chen-research-app - HTTP Request: POST https://lmaas-beta.ai.gehealthcare.com/openai/deployments/gpt-4-32k-beta/chat/completions?api-version=2023-12-01-preview "HTTP/1.1 200 OK"
PID:49922 INFO httpx - Client ID: zizhang-chen-research-app - HTTP Request: POST https://lmaas-beta.ai.gehealthcare.com/openai/deployments/gpt-4-32k-beta/chat/completions?api-version=2023-12-01-preview "HTTP/1.1 200 OK"
PID:49922 INFO httpx - Client ID: zizhang-chen-research-app - HTTP Request: POST https://lmaas-beta.ai.gehealthcare.com/openai/deployments/gpt-4-32k-beta/chat/completions?api-version=2023-12-01-preview "HTTP/1.1 200 OK"
PID:49922 INFO httpx - Client ID: zizhang-chen-research-app - HTTP Request: POST https://lmaas-beta.ai.gehealthcare.com/

In [7]:
print(c)

**Task:**
You are an experienced doctor. Based on the provided patient age, medical events starting at the admission date, and other predictions (Long length of stay, ICU transfer, Readmission) you made at the same time. Use your medical knowledge and reasoning to: 
Predict whether a patient’s total length of stay during a visit to the hospital will be at least 7 days. Answer 'Yes' if the patient will stay at least 7 days, 'No' otherwise.
Predict whether a patient will be transferred to the ICU during a visit to the hospital. Answer 'Yes' if the patient will be transferred to the ICU, 'No' otherwise.
Predict whether a patient will be re-admitted to the hospital within 30 days after being discharged from a visit. Answer 'Yes' if the patient will be re-admitted, 'No' otherwise. right after the given medical events.
Please only answer with 'Yes' or 'No'. Yes if the patient lab results come back as normal, No if the patient lab results remain moderate or high. Please respond 'Yes' if it is

In [8]:
print(e[-900:])

y venipuncture) occurred.

**Answer requirement with task names:**
Predict whether a patient’s total length of stay during a visit to the hospital will be at least 7 days. Answer 'Yes' if the patient will stay at least 7 days, 'No' otherwise. Answer with task name: 'Long length of stay' Please only answer with 'Yes' or 'No'
Predict whether a patient will be transferred to the ICU during a visit to the hospital. Answer 'Yes' if the patient will be transferred to the ICU, 'No' otherwise. Answer with task name: 'ICU transfer' Please only answer with 'Yes' or 'No'
Predict whether a patient will be re-admitted to the hospital within 30 days after being discharged from a visit. Answer 'Yes' if the patient will be re-admitted, 'No' otherwise. Answer with task name: 'Readmission' Please only answer with 'Yes' or 'No'

**Answer example:**
Long length of stay: Yes
ICU transfer: No
Readmission: No



In [9]:
cb

Tokens Used: 2491445
	Prompt Tokens: 2474445
	Completion Tokens: 17000
Successful Requests: 500
Total Cost (USD): $150.50670000000017