In [1]:
from aug.features import *
import warnings
warnings.filterwarnings("ignore")

In [2]:
def get_questions_by_lesion(lesion: str):
    questions = [
        # f"What is {lesion}?", # don't need the first one for extending features.
        f"What are the symptoms associated with {lesion}?",
        f"What can cause {lesion}?",        
        f"What are the patient’s symptoms that are relevant for {lesion}?",
        f"What are the relevant clinical signs for the etiological diagnosis of {lesion}?",
        f"What are the relevant laboratory data for the etiological diagnosis of {lesion}?",
        f"What are the relevant clinical characteristics for the etiological diagnosis of {lesion}",
        f"What are the patient’s personal relevant history for the etiological diagnosis of {lesion}",
    ]
    return questions

In [3]:
lesion = "atelectasis"
## adding prior knowledge from 8 questions.
questions = get_questions_by_lesion(lesion)
documents = request_documents(lesion)
responses = neo4jgraph_get_responses(questions, documents)
keywords = responses_to_keywords(lesion, responses)
keywords

In [None]:
identified_keywords = {
    "boolean": [
        "blockage of the airways",
        "chest pain",
        "chronic obstructive pulmonary disease (copd)",
        "collapse or closure of a lung",
        "coughing",
        "foreign object",
        "pneumonia",
        "prolonged bed rest",
        "rapid breathing",
        "reduced or absent gas exchange",
        "shortness of breath",
    ],
    "numerical": ["oxygen saturation"],  # 'low oxygen levels',
    # 'lung diseases',
    # 'mucus',
    # 'pneumonia',
    # 'surgery',
    # 'tumor'
}

In [None]:
# identified_keywords = {  # networkx
#     "boolean": [
#         "abnormal chest x-ray findings",
#         "abnormal lung sounds",
#         "collapsed lung tissue" "bluish skin color",
#         "chest pain",
#         "coughing",
#         "coughing up blood",
#         "crackling sounds in the lungs",
#         "decreased appetite",
#         "decreased breath sounds",
#         "decreased oxygen saturation",
#         "difficulty breathing",
#         "difficulty speaking",
#         "dyspnea",
#         "fatigue",
#         "history of being on mechanical ventilation",
#         "history of chest trauma or injury",
#         "history of chronic obstructive pulmonary disease (copd)",
#         "history of lung cancer",
#         "history of lung diseases",
#         "history of neuromuscular disorders",
#         "history of prolonged bed rest or immobility",
#         "history of recent surgery",
#         "history of respiratory infections",
#         "history of smoking for 20 years",
#         "advanced age",
#         "low oxygen levels",
#         "obesity",
#         "fungi in sputum culture",
#         "presence of bacteria",
#         "rapid breathing",
#         "shortness of breath",
#         "smoking",
#         "tachypnea",
#         "underlying lung diseases",
#         "viruses",
#         "wheezing",
#         "white-out appearance in the affected area of the lung",
#     ],
#     "numerical": ["levels of oxygen and carbon dioxide in the blood"],
#     #  'amount of air that can be inhaled and exhaled',
#     #  'use of accessory muscles',
#     #  'viruses',
# }

In [None]:
# identified_keywords = { # Neo4jVector
#     "boolean": [
#         "anesthesia",
#         "asthma",
#         "asymptomatic",
#         "blockage of the airway",
#         "breathing difficulty",
#         "chest pain",
#         "chronic obstructive pulmonary disease",
#         "cough",
#         "cyanosis",
#         "dilated aorta",
#         # "fever", # Can be consider as body temperature.
#         "hypoventilation",
#         "lower lung segments",
#         "lung tumor",
#         "muscle relaxation",
#         "obesity",
#         "pleural effusion",
#         "poor surfactant spreading during inspiration",
#         "post-surgical complications",
#         "pulmonary tuberculosis",
#         "small volume linear shadows",
#         "smoking",
#         "surfactant deficiency",
#         "type of anesthetic",
#         # Response: Risk factors for atelectasis include certain types of surgery, muscle relaxation, obesity, high oxygen, and lower lung segments.',
#         "high oxygen",  # A cause, not a symptom. Has the patient been the situation with high oxygen.
#     ],
#     "numerical": [
#         "heart rate", # "increased heart rate",
#         "oxygen saturation (%)", # "low oxygen saturation",
#         # "age", # "old age",
#     ],
# }

In [None]:
import pandas as pd
df = pd.read_csv('./spreadsheets/reflacx_clinical.csv')
sample_df = df.iloc[:100]

In [None]:
results = []
for idx, data in sample_df.iterrows():
    r = get_possible_values(data, identified_keywords)
    results.append(r)

In [None]:
for f in identified_keywords['numerical'] + identified_keywords['boolean']:
    sample_df[f] = None
    sample_df[f] = [ r[f] for r in results]

In [None]:
sample_df.to_csv('[prior-knowledge] neo4j-vector.csv')

In [None]:
raise StopIteration()

StopIteration: 

In [None]:
import os
from enum import Enum
from loader.radiopaedia import RadioWebLoader
from langchain.document_loaders import WikipediaLoader
from langchain.text_splitter import TokenTextSplitter

from langchain.indexes import GraphIndexCreator
from langchain.graphs.networkx_graph import KnowledgeTriple
from langchain.llms import OpenAI
from langchain.chains import GraphQAChain
from langchain.graphs import Neo4jGraph
from secret import *

from langchain.vectorstores.neo4j_vector import Neo4jVector
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chat_models import ChatOpenAI
from langchain_core.documents import Document

os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

class DocumentSource(Enum):
    Wikipedia = "wikipedia"
    Radiopaedia = "radiopaedia"

class StoreType(Enum):
    Neo4jVectorIndex = "neo4jvectorindex"
    Neo4jGraph = "neo4jgraph"
    NetworkXGraph ="networkxgraph"

CLEAN_QUERY = """
    MATCH (n)
    DETACH DELETE n
    """

def get_questions_by_lesion(lesion: str):
    questions = [
        # f"What is {lesion}?", # don't need the first one for extending features.
        f"What are the symptoms associated with {lesion}?",
        f"What can cause {lesion}?",
        f"What are the patient’s symptoms that are relevant for {lesion}?",
        f"What are the relevant clinical signs for the etiological diagnosis of {lesion}?",
        f"What are the relevant laboratory data for the etiological diagnosis of {lesion}?",
        f"What are the relevant clinical characteristics for the etiological diagnosis of {lesion}",
        f"What are the patient’s personal relevant history for the etiological diagnosis of {lesion}",
    ]
    return questions

def request_documents(
        lesion: str,
        sources: list[DocumentSource] = [
            DocumentSource.Radiopaedia,
            DocumentSource.Wikipedia,
        ],
        top1_only=True,
        text_splitter = TokenTextSplitter(
            chunk_size=2048, chunk_overlap=24
        ),
    ):
        raw_documents = []
        # retrieve raw_documents
        if DocumentSource.Radiopaedia in sources:
            if top1_only:
                raw_documents.extend(
                    RadioWebLoader(lesion, only_first=True).load()
                )
            else:
                raw_documents.extend(
                    RadioWebLoader(lesion, only_first=False).load()
                )

        if DocumentSource.Wikipedia in sources:
            if top1_only:
                raw_documents.extend(
                    WikipediaLoader(query="Atelectasis", load_max_docs=1).load()
                )
            else:
                raw_documents.extend(WikipediaLoader(query="Atelectasis").load())

        # pre-process documents
        documents = text_splitter.split_documents(raw_documents)
        return documents


def clean_and_get_neo4jgraph(url: str=NOE4J_URL, username: str= NEO4J_USERNAME, password: str= NOE4J_PASSWORD,):
    graph = Neo4jGraph(url=url, username=username, password=password)
    graph.query(CLEAN_QUERY)
    graph.refresh_schema()
    print(graph.schema)
    return graph

def networkx_get_responses(questions: list[str], documents: list[Document]):
    index_creator = GraphIndexCreator(llm=OpenAI(temperature=0))
    graph = None
    for d in documents:
        g_temp = index_creator.from_text(d.page_content)
        if graph is None:
            graph = g_temp
        else:
            triplets = graph.get_triples()
            for t in triplets:
                graph.add_triple(knowledge_triple=KnowledgeTriple(*t))
    chain = GraphQAChain.from_llm(OpenAI(temperature=0), graph=graph, verbose=True)
    res_dict = {}
    for q in questions:
        res = chain.run(q)
        res_dict[q] = res.strip()
    return res_dict

def neo4jvector_get_responses(questions: list[str], documents: list[Document]):
    _ = clean_and_get_neo4jgraph()
    db = Neo4jVector.from_documents(
        documents, OpenAIEmbeddings(), url=NOE4J_URL, username=NEO4J_USERNAME, password=NOE4J_PASSWORD
    )
    retriever = db.as_retriever()
    chain = RetrievalQAWithSourcesChain.from_chain_type(
        ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k"), chain_type="stuff", retriever=retriever
    )
    res_dict = {}
    for q in questions:
        res = chain(
            {"question": q},
            return_only_outputs=True,
        )
        res_dict[q] = res["answer"].strip()
    return res_dict

from tqdm import tqdm
from aug.graph_doc import (
    get_extraction_chain,
    data_to_graph_doc,
    chain_run,
    add_graph_documents,
)
from langchain.chat_models import ChatOpenAI
from langchain.chains import GraphCypherQAChain

def neo4jgraph_get_responses(questions: list[str], documents: list[Document]):
    graph = clean_and_get_neo4jgraph()

    llm = ChatOpenAI(model="gpt-3.5-turbo-16k", temperature=0)

    allowed_nodes = None
    allowed_rels = None
    # allowed_nodes = ["Symptom", "Disease"]
    # allowed_rels = ["CAN_CAUSE", "DESCRIBE", "HAS"]

    extract_chain = get_extraction_chain(llm, allowed_nodes, allowed_rels)
    gds = []

    for d in tqdm(documents, total=len(documents)):
        data = chain_run(extract_chain, d.page_content)
        # data = extract_chain.run(d.page_content)
        # graph_document = GraphDocument(
        #     nodes=[map_to_base_node(node) for node in data.nodes],
        #     relationships=[map_to_base_relationship(rel) for rel in data.rels],
        #     source=d,
        # )
        graph_document = data_to_graph_doc(data, d)
        # add_graph_document(graph, graph_document)
        gds.append(graph_document)

    graph = add_graph_documents(graph, gds)
    graph.refresh_schema()
    print(graph.schema)

    chain = GraphCypherQAChain.from_llm(
        graph=graph,
        cypher_llm=ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k"),
        qa_llm=ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k"),
        validate_cypher=True,  # Validate relationship directions
        verbose=True,
    )

    res_dict = {}
    for q in questions:
        try:
            res = chain.run(q)
            res_dict[q] = res.strip()
        except:
            res_dict[q] = "Generated Cypher Statement is not valid"
    return res_dict

def get_responses_from_documents(questions: list[str], documents: list[Document], store_type: StoreType):
    if store_type == StoreType.NetworkXGraph:
        return networkx_get_responses(questions,documents)
    elif store_type == StoreType.Neo4jVectorIndex:
        return neo4jvector_get_responses(questions,documents)
    elif store_type == StoreType.Neo4jGraph:
        return neo4jgraph_get_responses(questions,documents)
    else:
        raise NotImplementedError(f"stor type {store_type} is not supported.")

import openai
from secret import *
openai.api_key = OPENAI_API_KEY

from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
    retry_if_exception_type
)  # for exponential backoff

@retry(
    retry=retry_if_exception_type((openai.error.APIError, openai.error.APIConnectionError, openai.error.RateLimitError, openai.error.ServiceUnavailableError, openai.error.Timeout)), 
    wait=wait_random_exponential(multiplier=1, max=60), 
    stop=stop_after_attempt(10)
)
def chat_completion_with_backoff(**kwargs):
    return openai.ChatCompletion.create(**kwargs)

def responses_to_keywords(lesion, responses: dict[str, str]):
    combined_response = " ".join(list(responses.values()))
    prompt = f"""For the subsequent paragraph, isolate solely those clinical keywords that are can be represented as symptoms or numerical values or boolean values (note: please separate the keywords by comma): 
    "{combined_response}\"
    """
    res = chat_completion_with_backoff(
                # model="gpt-3.5-turbo",
                model="gpt-4",
                messages=[
                    {"role": "system", "content": "You are a helpful clinical expert."},
                    # {"role": "system", "content": "You are an experienced radiologist. Use a keyword-based report to answer the following questions."},
                    {"role": "user", "content": prompt},
                ],
                temperature=0,
                n=1,
            )
    keywords = [k.lower().strip() for k in res['choices'][0]['message']['content'].replace(".", "").split(",") if lesion in k.lower().strip()  ]
    return set(keywords)    

In [None]:
lesion = "atelectasis"
questions = get_questions_by_lesion(lesion)
documents = request_documents(lesion)
responses = neo4jvector_get_responses(questions, documents)
keywords = responses_to_keywords(lesion, responses)

Node properties are the following:

Relationship properties are the following:

The relationships are the following:



In [None]:
responses

{'What are the symptoms associated with atelectasis?': 'The symptoms associated with atelectasis may include cough (not prominent), chest pain (not common), breathing difficulty (fast and shallow), low oxygen saturation, pleural effusion (transudate type), cyanosis (late sign), and increased heart rate. However, atelectasis does not cause fever.',
 'What can cause atelectasis?': 'Atelectasis can be caused by various medical conditions, including post-surgical complications, surfactant deficiency, pulmonary tuberculosis, and blockage of the airway. It can also be caused by factors such as poor surfactant spreading during inspiration and relaxation atelectasis. Risk factors for atelectasis include certain types of surgery, muscle relaxation, obesity, high oxygen, and lower lung segments.',
 'What are the patient’s symptoms that are relevant for atelectasis?': 'The relevant symptoms for atelectasis include cough (not prominent), chest pain (not common), breathing difficulty (fast and shal

In [None]:
keywords

{'75-90% of people undergoing general anesthesia',
 'asthma',
 'asymptomatic',
 'blockage of the airway',
 'breathing difficulty',
 'bronchoscopy',
 'certain types of surgery',
 'chest ct',
 'chest pain',
 'chest x-ray',
 'chronic obstructive pulmonary disease',
 'cough',
 'cyanosis',
 'dependent atelectasis',
 'dilated aorta',
 'fever',
 'high oxygen',
 'hypoventilation',
 'increased heart rate',
 'low oxygen saturation',
 'lower lung segments',
 'lung tumor',
 'muscle relaxation',
 'obesity',
 'old age',
 'passive atelectasis',
 'pleural effusion',
 'poor surfactant spreading during inspiration',
 'post-surgical complications',
 'pulmonary tuberculosis',
 'relaxation atelectasis',
 'small volume linear shadows',
 'smoking',
 'surfactant deficiency',
 'type of anesthetic'}

In [None]:
identified_keywords = {
    "boolean": [
        "anesthesia",
        "asthma",
        "asymptomatic",
        "blockage of the airway",
        "breathing difficulty",
        "chest pain",
        "chronic obstructive pulmonary disease",
        "cough",
        "cyanosis",
        "dilated aorta",
        # "fever", # Can be consider as body temperature.
        "hypoventilation",
        "lower lung segments",
        "lung tumor",
        "muscle relaxation",
        "obesity",
        "pleural effusion",
        "poor surfactant spreading during inspiration",
        "post-surgical complications",
        "pulmonary tuberculosis",
        "small volume linear shadows",
        "smoking",
        "surfactant deficiency",
        "type of anesthetic",
        # Response: Risk factors for atelectasis include certain types of surgery, muscle relaxation, obesity, high oxygen, and lower lung segments.',
        "high oxygen",  # A cause, not a symptom. Has the patient been the situation with high oxygen.
    ],
    "numerical": [
        "heart rate", # "increased heart rate",
        "oxygen saturation (%)", # "low oxygen saturation",
        # "age", # "old age",
    ],
}
# The features that triage table has have been commented.

In [None]:
from aug.gpt import get_diagnosis
import re

MIMIC_EYE_PATH = "F:\\mimic-eye"

REFLACX_LESION_LABEL_COLS = [
    # "Fibrosis",
    # "Quality issue",
    # "Wide mediastinum",
    # "Fracture",
    # "Airway wall thickening",

    ######################
    # "Hiatal hernia",
    # "Acute fracture",
    # "Interstitial lung disease",
    # "Enlarged hilum",
    # "Abnormal mediastinal contour",
    # "High lung volume / emphysema",
    # "Pneumothorax",
    # "Lung nodule or mass",
    # "Groundglass opacity",
    ######################
    "Pulmonary edema",
    "Enlarged cardiac silhouette",
    "Consolidation",
    "Atelectasis",
    "Pleural abnormality",
    # "Support devices",
]

CHEXPERT_LABEL_COLS = [
    "Atelectasis_chexpert",
    "Cardiomegaly_chexpert",
    "Consolidation_chexpert",
    "Edema_chexpert",
    "Enlarged Cardiomediastinum_chexpert",
    "Fracture_chexpert",
    "Lung Lesion_chexpert",
    "Lung Opacity_chexpert",
    "No Finding_chexpert",
    "Pleural Effusion_chexpert",
    "Pleural Other_chexpert",
    "Pneumonia_chexpert",
    "Pneumothorax_chexpert",
    "Support Devices_chexpert", 
]


def get_report(
    data,
    mimic_eye_path: str = MIMIC_EYE_PATH,
    label_cols: str = REFLACX_LESION_LABEL_COLS,
    report_format=True,
):
    # reflacx_id = data['id']
    patient_id = data["subject_id"]
    study_id = data["study_id"]
    # dicom_id = data['dicom_id']
    report_path = os.path.join(
        mimic_eye_path,
        f"patient_{patient_id}",
        "CXR-DICOM",
        f"s{study_id}.txt",
    )
    with open(report_path) as f:
        report = f.read()

    report = report.strip().replace("FINAL REPORT\n", "").replace("\n", "").strip()

    age = data["age"]
    gender = "Female" if data["gender"] == "F" else "Male"
    if report_format:
        return re.sub(
            "[^0-9A-Za-z.\s\:']",
            "",
            f"{report} LESIONS:{get_diagnosis(data, label_cols)}. AGE: {age}. GENDER: {gender}.",
        )
    else:
        # return f"A {age} years old {gender} patient diagnosed with{get_diagnosis(data, label_cols)}. And, This patient has the radiology report: \n{report}\nThis patients is most likely to have {feature_to_name[desired_clinical_feature]} around"
        # return f"A {age} years old {gender} patient diagnosed with{get_diagnosis(data, label_cols)}. And, This patient has the radiology report: \n{report}\nThe {feature_to_name[desired_clinical_feature]} of this patient is around".replace("_", "")
        return re.sub(
            "[^0-9A-Za-z.\s\:']",
            "",
            f"A {age} years old {gender} patient diagnosed with{get_diagnosis(data, label_cols)}. And, This patient has the radiology report: \n{report}",
        )

In [None]:
import pandas as pd
df = pd.read_csv('./spreadsheets/reflacx_clinical.csv')

In [None]:
def get_boolean_results(report, identified_keywords):
    if not "boolean" in identified_keywords or len(identified_keywords["boolean"]) <= 0:
        return None

    boolean_prompt = f"""{report}
    According to the report above, does the patient has the following attributes? (Return True or False only, and separate the answer for each attribute by comma.)
    """
    for i, k in enumerate(identified_keywords["boolean"]):
        boolean_prompt += f"{i}. {k}.\n"

    res = chat_completion_with_backoff(
        # model="gpt-3.5-turbo",
        model="gpt-4",
        messages=[
            # {"role": "system", "content": "You are a helpful assistant."},
            {
                "role": "system",
                "content": "You are a medical expert.",
            },
            {
                "role": "user",
                "content": boolean_prompt,
            },
        ],
        temperature=0,
        n=1,
    )

    values = res["choices"][0]["message"]["content"].strip().split(",")

    results = {
        k: v.strip().replace(".", "") == "True"
        for k, v in zip(identified_keywords["boolean"], values)
    }

    return results


def get_numerical_results(report, identified_keywords):
    if (
        not "numerical" in identified_keywords
        or len(identified_keywords["numerical"]) <= 0
    ):
        return None

    prompt = f"""{report}
    """
    for i, k in enumerate(identified_keywords["numerical"]):
        prompt += f"{k.upper()}: [MASK].\n"

    prompt += """
    What can be the numerical values covered by the token [MASK]? Please return only one single numerical values (not range) for each [MASK], and separate the answers by comma.
    """

    res = chat_completion_with_backoff(
        # model="gpt-3.5-turbo",
        model="gpt-4",
        messages=[
            # {"role": "system", "content": "You are a helpful assistant."},
            {
                "role": "system",
                "content": "You are a medical expert.",
            },
            {
                "role": "user",
                "content": prompt,
            },
        ],
        temperature=0,
        n=1,
    )

    values = res["choices"][0]["message"]["content"].strip().split(",")
    values = [v.strip().replace(".", "") for v in values]
    values = [int(v) if v.isdecimal() else None for v in values]

    results = {k: v for k, v in zip(identified_keywords["numerical"], values)}

    return results


def get_possible_values(
    data: pd.Series,
    identified_keywords: dict[str, list[str]],
):
    report = get_report(data)

    boolean_results = get_boolean_results(report, identified_keywords)
    numerical_results = get_numerical_results(report, identified_keywords)

    results = {}
    results.update(boolean_results)
    results.update(numerical_results)

    return results

In [None]:
results = get_possible_values(df.iloc[0], identified_keywords)

In [None]:
results

{'anesthesia': False,
 'asthma': False,
 'asymptomatic': True,
 'blockage of the airway': False,
 'breathing difficulty': False,
 'chest pain': False,
 'chronic obstructive pulmonary disease': False,
 'cough': False,
 'cyanosis': False,
 'dilated aorta': False,
 'hypoventilation': False,
 'lower lung segments': False,
 'lung tumor': False,
 'muscle relaxation': False,
 'obesity': False,
 'pleural effusion': False,
 'poor surfactant spreading during inspiration': False,
 'post-surgical complications': False,
 'pulmonary tuberculosis': False,
 'small volume linear shadows': False,
 'smoking': False,
 'surfactant deficiency': False,
 'type of anesthetic': False,
 'heart rate': 60,
 'oxygen saturation (%)': 98}

In [None]:
data = df.iloc[0]

In [None]:
report = get_report(
               data
            )

In [None]:
print(report)

INDICATION:  Central venous line placement.  TECHNIQUE:  Frontal chest radiograph.  COMPARISON:  Chest radiograph 12:42 today.  FINDINGS:   A right subclavian catheter has been placed in the interim. The catheter terminates at the confluence of the brachiocephalic vein and superior vena cava and if indicated could be advanced 3.7 cm for termination within the low SVC.  There is no pleural effusion or pneumothorax. The cardiac silhouette remains mildly enlarged. There is no focal airspace consolidation worrisome for pneumonia.  High density material is again seen in the paritally imaged colon in the left abdomen. Cholecystectomy clips are noted. There are carotid calcifications left greater than right. LESIONS: Enlarged cardiac silhouett. AGE: 69. GENDER: Female.


In [None]:
prompt = f"""{report}
According to the report above, does the patient has the following attributes? (Return True or False only, and separate the answer for each attribute by comma.)
"""

In [None]:
prompt

'INDICATION:  Central venous line placement.  TECHNIQUE:  Frontal chest radiograph.  COMPARISON:  Chest radiograph 12:42 today.  FINDINGS:   A right subclavian catheter has been placed in the interim. The catheter terminates at the confluence of the brachiocephalic vein and superior vena cava and if indicated could be advanced 3.7 cm for termination within the low SVC.  There is no pleural effusion or pneumothorax. The cardiac silhouette remains mildly enlarged. There is no focal airspace consolidation worrisome for pneumonia.  High density material is again seen in the paritally imaged colon in the left abdomen. Cholecystectomy clips are noted. There are carotid calcifications left greater than right. LESIONS: Enlarged cardiac silhouett. AGE: 69. GENDER: Female.\nAccording to the report above, does the patient has the following attributes? (Return True or False only, and separate the answer for each attribute by comma)\n'

In [None]:
for i, k in enumerate(identified_keywords['boolean']):
    prompt += f"{i}. {k}.\n"

In [None]:
prompt

'INDICATION:  Central venous line placement.  TECHNIQUE:  Frontal chest radiograph.  COMPARISON:  Chest radiograph 12:42 today.  FINDINGS:   A right subclavian catheter has been placed in the interim. The catheter terminates at the confluence of the brachiocephalic vein and superior vena cava and if indicated could be advanced 3.7 cm for termination within the low SVC.  There is no pleural effusion or pneumothorax. The cardiac silhouette remains mildly enlarged. There is no focal airspace consolidation worrisome for pneumonia.  High density material is again seen in the paritally imaged colon in the left abdomen. Cholecystectomy clips are noted. There are carotid calcifications left greater than right. LESIONS: Enlarged cardiac silhouett. AGE: 69. GENDER: Female.\nAccording to the report above, does the patient has the following attributes? (Return True or False only, and separate the answer for each attribute by comma)\n0. anesthesia.\n1. asthma.\n2. asymptomatic.\n3. blockage of the 

In [None]:
res = chat_completion_with_backoff(
    # model="gpt-3.5-turbo",
    model="gpt-4",
    messages=[
        # {"role": "system", "content": "You are a helpful assistant."},
        {
            "role": "system",
            "content": "You are a medical expert.",
        },
        {
            "role": "user",
            "content": prompt,
        },
    ],
    temperature=0,
    n=1,
)

In [None]:
res['choices'][0]['message']['conten

<OpenAIObject chat.completion id=chatcmpl-8kS2v281iRh0lMfVx3gZ7N2Sf4GDo at 0x234544b7b80> JSON: {
  "id": "chatcmpl-8kS2v281iRh0lMfVx3gZ7N2Sf4GDo",
  "object": "chat.completion",
  "created": 1706081589,
  "model": "gpt-4-0613",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False."
      },
      "logprobs": null,
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 379,
    "completion_tokens": 46,
    "total_tokens": 425
  },
  "system_fingerprint": null
}

In [None]:
prompt = f"""{report}
According to the report above, what are the possible values of following attributes for the patient? (Return numerical value only, and separate the answer for each attribute by comma.)
"""

In [None]:
prompt = f"""{report}
"""
for i, k in enumerate(identified_keywords['numerical']):
    prompt += f"{k.upper()}: [MASK].\n"

prompt+="""
What can be the numerical values covered by the token [MASK]? Please return only one single numerical values (not range) for each [MASK], and separate the answers by comma.
"""

In [None]:
# prompt +="""
# In case the report lacks the required information, please make an attempt to estimate a plausible value. 
# For the answers, please return numerical value only.
# """

In [None]:
prompt = f"""{report}
"""
for i, k in enumerate(identified_keywords['numerical']):
    prompt += f"{k.upper()}: [MASK].\n"

prompt+="""
What can be the numerical values covered by the token [MASK]? Please return only one single numerical values (not range) for each [MASK], and separate the answers by comma.
"""

res = chat_completion_with_backoff(
    # model="gpt-3.5-turbo",
    model="gpt-4",
    messages=[
        # {"role": "system", "content": "You are a helpful assistant."},
        {
            "role": "system",
            "content": "You are a medical expert.",
        },
        {
            "role": "user",
            "content": prompt,
        },
    ],
    temperature=0,
    n=1,
)

In [None]:
res['choices'][0]['message']['content']

'60, 98'