In [1]:
from aug.features import *
import warnings
warnings.filterwarnings("ignore")

In [2]:
def get_questions_by_lesion(lesion: str):
    questions = [
        # f"What is {lesion}?", # don't need the first one for extending features.
        f"What are the symptoms associated with {lesion}?",
        f"What can cause {lesion}?",        
        f"What are the patient’s symptoms that are relevant for {lesion}?",
        f"What are the relevant clinical signs for the etiological diagnosis of {lesion}?",
        f"What are the relevant laboratory data for the etiological diagnosis of {lesion}?",
        f"What are the relevant clinical characteristics for the etiological diagnosis of {lesion}",
        f"What are the patient’s personal relevant history for the etiological diagnosis of {lesion}",
    ]
    return questions

In [3]:
lesion = "atelectasis"
## adding prior knowledge from 8 questions.
# questions = get_questions_by_lesion(lesion)
# documents = request_documents(lesion)
# responses = neo4jvector_get_responses(questions, documents) # let's try to get the responses from other LLMs to predict the keywords.
# keywords = responses_to_keywords(lesion, responses)
# keywords

In [4]:
responses = {
    "What are the symptoms associated with atelectasis?": "The symptoms associated with atelectasis may include cough (not prominent), chest pain (not common), breathing difficulty (fast and shallow), low oxygen saturation, pleural effusion (transudate type), cyanosis (late sign), and increased heart rate. However, atelectasis can also be asymptomatic. Fever is not a symptom of atelectasis.",
    "What can cause atelectasis?": "Atelectasis can be caused by various medical conditions, including post-surgical complications, surfactant deficiency, and poor surfactant spreading during inspiration. It can also be caused by blockage of a bronchiole or bronchus, such as by a foreign body, mucus plug, tumor, or compression from the outside. Risk factors for atelectasis include certain types of surgery, muscle relaxation, obesity, high oxygen, lower lung segments, age, chronic obstructive pulmonary disease (COPD), asthma, and type of anesthetic.",
    "What are the patient’s symptoms that are relevant for atelectasis?": "The relevant symptoms for atelectasis include cough (not prominent), chest pain (not common), breathing difficulty (fast and shallow), low oxygen saturation, pleural effusion (transudate type), cyanosis (late sign), and increased heart rate.",
    "What are the relevant clinical signs for the etiological diagnosis of atelectasis?": "The relevant clinical signs for the etiological diagnosis of atelectasis may include cough, chest pain (not common), breathing difficulty (fast and shallow), low oxygen saturation, pleural effusion (transudate type), cyanosis (late sign), and increased heart rate. However, it is important to note that atelectasis may also be asymptomatic.",
    "What are the relevant laboratory data for the etiological diagnosis of atelectasis?": "The relevant laboratory data for the etiological diagnosis of atelectasis are not provided in the given information.",
    "What are the relevant clinical characteristics for the etiological diagnosis of atelectasis": "The relevant clinical characteristics for the etiological diagnosis of atelectasis include cough (not prominent), chest pain (not common), breathing difficulty (fast and shallow), low oxygen saturation, pleural effusion (transudate type), cyanosis (late sign), and increased heart rate. It is important to note that atelectasis does not cause fever. The underlying causes of atelectasis can include adjacent compression, passive atelectasis, dependent atelectasis, and poor surfactant spreading. Risk factors for atelectasis include type of surgery, use of muscle relaxation, obesity, high oxygen, lower lung segments, age, presence of chronic obstructive pulmonary disease or asthma, and type of anesthetic. Diagnosis of atelectasis is generally confirmed through chest X-ray, which may show lung opacification and/or loss of lung volume. Additional imaging modalities such as CT chest or bronchoscopy may be necessary to determine the cause of atelectasis.",
    "What are the patient’s personal relevant history for the etiological diagnosis of atelectasis": "The patient's personal relevant history for the etiological diagnosis of atelectasis includes post-surgical atelectasis as a common cause, as well as pulmonary tuberculosis, smoking, and old age as risk factors. Other factors associated with the development of atelectasis include the presence of chronic obstructive pulmonary disease or asthma, and the type of anesthesia used. The diagnosis of atelectasis is generally confirmed through chest X-ray, which shows small volume linear shadows, usually peripherally or at the lung bases. CT chest or bronchoscopy may be necessary to determine the cause or confirm the absence of proximal obstruction.",
}

In [5]:
# identified_keywords = {
#     "boolean": [
#         "blockage of the airways",
#         "chest pain",
#         "chronic obstructive pulmonary disease (copd)",
#         "collapse or closure of a lung",
#         "coughing",
#         "foreign object",
#         "pneumonia",
#         "prolonged bed rest",
#         "rapid breathing",
#         "reduced or absent gas exchange",
#         "shortness of breath",
#     ],
#     "numerical": ["oxygen saturation (%)"],  # 'low oxygen levels',
#     # 'lung diseases',
#     # 'mucus',
#     # 'pneumonia',
#     # 'surgery',
#     # 'tumor'
# }

In [6]:
# identified_keywords = {  # networkx
#     "boolean": [
#         "abnormal chest x-ray findings",
#         "abnormal lung sounds",
#         "collapsed lung tissue" "bluish skin color",
#         "chest pain",
#         "coughing",
#         "coughing up blood",
#         "crackling sounds in the lungs",
#         "decreased appetite",
#         "decreased breath sounds",
#         "decreased oxygen saturation",
#         "difficulty breathing",
#         "difficulty speaking",
#         "dyspnea",
#         "fatigue",
#         "history of being on mechanical ventilation",
#         "history of chest trauma or injury",
#         "history of chronic obstructive pulmonary disease (copd)",
#         "history of lung cancer",
#         "history of lung diseases",
#         "history of neuromuscular disorders",
#         "history of prolonged bed rest or immobility",
#         "history of recent surgery",
#         "history of respiratory infections",
#         "history of smoking for 20 years",
#         "advanced age",
#         "low oxygen levels",
#         "obesity",
#         "fungi in sputum culture",
#         "presence of bacteria",
#         "rapid breathing",
#         "shortness of breath",
#         "smoking",
#         "tachypnea",
#         "underlying lung diseases",
#         "viruses",
#         "wheezing",
#         "white-out appearance in the affected area of the lung",
#     ],
#     "numerical": ["levels of oxygen and carbon dioxide in the blood"],
#     #  'amount of air that can be inhaled and exhaled',
#     #  'use of accessory muscles',
#     #  'viruses',
# }

In [7]:
identified_keywords = {  # Neo4jVector
    "boolean": [
        "anesthesia",
        "asthma",
        "asymptomatic",
        "breathing difficulty",
        "chest pain",
        "chronic obstructive pulmonary disease",
        "cough",
        "cyanosis",
        "fever",
        "pleural effusion",
        "pulmonary tuberculosis",
        "small volume linear shadows",
        "smoking"
        # Response: Risk factors for atelectasis include certain types of surgery, muscle relaxation, obesity, high oxygen, and lower lung segments.',
        # "high oxygen",  # A cause, not a symptom. Has the patient been the situation with high oxygen. # remove this for not causing confusion to LLMs.

    ],

    "numerical": [
        "heart rate (per minute)",  # "increased heart rate",
        "oxygen saturation (%)",  # "low oxygen saturation",
    ],
    # 'old age', # already provided in the report.
}

In [8]:
import pandas as pd
# df = pd.read_csv('./spreadsheets/reflacx_clinical.csv')
sample_df = pd.read_csv("./spreadsheets/gender-age-balance.csv")

In [9]:
results = []
all_res = []
all_prompt = []
all_reasons = []
for idx, data in sample_df.iterrows():
    prompt, res, result, reason = get_possible_values(
        data,
        identified_keywords,
        responses=responses,
    )
    all_prompt.append(prompt)
    all_res.append(res)
    results.append(result)
    all_reasons.append(reason)

1. anesthesia: True, because anesthesia is listed as a risk factor for atelectasis.
2. asthma: False, because there is no mention of the patient having asthma.
3. asymptomatic: True, because the report states that there is no acute cardiopulmonary abnormality and no lesion found.
4. breathing difficulty: False, because the report states that the lungs are clear and there is no mention of the patient experiencing breathing difficulty.
5. chest pain: False, because the report states that there is no acute cardiopulmonary abnormality and no lesion found.
6. chronic obstructive pulmonary disease: False, because there is no mention of the patient having COPD.
7. cough: False, because there is no mention of the patient having a cough.
8. cyanosis: False, because there is no mention of the patient having cyanosis.
9. fever: False, because there is no mention of the patient having fever.
10. pleural effusion: False, because the report states that there is no pleural effusion.
11. pulmonary tub

In [10]:
# res_text = """
# 1. anesthesia: True. Anesthesia is mentioned as a risk factor for atelectasis in the prior knowledge. However, the patient's report does not provide information about whether anesthesia was used or not during the patient's fall or any subsequent treatment.

# 2. asthma: False. The patient's report does not mention any history or symptoms suggestive of asthma.

# 3. asymptomatic: True. The patient's report does not mention any specific symptoms, therefore suggesting that atelectasis may be asymptomatic in this case.

# 4. breathing difficulty: False. The patient's report does not mention any breathing difficulties.

# 5. chest pain: False. The patient's report does not mention chest pain as a symptom.

# 6. chronic obstructive pulmonary disease: False. The patient's report does not mention a history or symptoms suggestive of chronic obstructive pulmonary disease.

# 7. cough: False. The patient's report does not mention cough as a symptom.

# 8. cyanosis: False. The patient's report does not mention cyanosis as a symptom.

# 9. fever: False. The patient's report does not mention fever as a symptom.

# 10. pleural effusion: False. The patient's report specifically states that no pleural effusion is seen.

# 11. pulmonary tuberculosis: False. The patient's personal relevant history does not mention pulmonary tuberculosis.

# 12. small volume linear shadows: False. The patient's report does not describe any specific findings related to small volume linear shadows.

# 13. smoking: False. The patient's personal relevant history does not mention smoking as a risk factor.
# """

In [11]:
# all_ans = [ans for ans in res_text.split("\n") if len(ans) > 5]
# [ ans.split(".")[0].isdigit() for ans in all_ans]

In [12]:
# values = []
# reasons = []
# for ans in res_text.split("\n"):
#     if len(ans) > 5 and ans.split(".")[0].isdigit():
#             a = ans[3:]  # remove number
#             print(ans)
#             comma_idx = a.index(",")  if "," in a else float('inf')
#             period_idx = a.index(".")  if "." in a else float('inf')
#             idx_splitter = min(comma_idx, period_idx)
#             # idx_splitter = min(a.index(","), a.index("."))
#             boolean_ans = a[a.index(":") + 2 : idx_splitter]
#             reason = a[idx_splitter + 2 :]
#             values.append(boolean_ans.strip().lower() == "true")
#             reasons.append(reason)

In [13]:
# ans = "13. smoking: False, because there is no mention of smoking in the patient's report."
# a = ans[3:]
# idx_splitter = min(a.index(","), a.index("."))
# # boolean_ans = a[a.index(":") + 2 : a.index(",")]

In [14]:
print(prompt['boolean'])

SYSTEM:

You are an experienced radiologist with more than 30 years of experience. You are the most respected radiologist in the world. 

You are examining a patient with the following report:

EXAMINATION:  PA AND LATERAL CHEST RADIOGRAPHS  INDICATION:  yearold male with weakness. Evaluate for pneumonia.  TECHNIQUE:  PA and lateral chest radiographs  COMPARISON:  Multiple prior chest radiographs most recent on .  FINDINGS:   Compared with the previous examination there is increased diffuse interstitial opacities with a linear consolidation in the right lower lung compatible with atelectasis. There is also a focal opacity in the left for lower lung and retrocardiac region with associated small pleural effusion better seen in the lateral view.  Calcified granulomas are redemonstrated more prominently in the left apex. Moderate cardiomegaly is stable. Rightward deviation of the trachea is also unchanged.  There is no pneumothorax.  IMPRESSION:   Patchy opacities in the left lower lung an

In [15]:
for f in identified_keywords['numerical'] + identified_keywords['boolean']:
    sample_df[f] = None
    sample_df[f] = [ r[f] for r in results]

In [16]:
sample_df['reason'] = all_reasons

In [17]:
sample_df.to_csv('[NoPK] neo4j-vector_v3-sys-2.csv')

In [18]:
sample_df[sample_df['Atelectasis'] >0]['oxygen saturation (%)'].mean(), sample_df[sample_df['Atelectasis'] <=0]['oxygen saturation (%)'].mean()

(93.2, 96.4)

In [19]:
sample_df[sample_df['Atelectasis'] >0]['chest pain'].mean(), sample_df[sample_df['Atelectasis'] <=0]['chest pain'].mean()

(0.06666666666666667, 0.16923076923076924)

In [20]:
sample_df[sample_df['Atelectasis'] >0]['fever'].mean(), sample_df[sample_df['Atelectasis'] <=0]['fever'].mean()


(0.26666666666666666, 0.03076923076923077)

In [21]:
print(all_prompt[0]['boolean'])

SYSTEM:

You are an experienced radiologist with more than 30 years of experience. You are the most respected radiologist in the world. 

You are examining a patient with the following report:

HISTORY:  Shortness of breath and cough.  TECHNIQUE:  PA and lateral views of the chest.  COMPARISON:  Chest CTA .  Chest radiograph .  FINDINGS:  The heart size is top normal.  The mediastinal and hilar contours are within normal limits.  The pulmonary vascularity is normal.  The lungs are clear.  No pleural effusion focal consolidation or pneumothorax is present.  No acute osseous abnormalities present.  IMPRESSION:  No acute cardiopulmonary abnormality. LESIONS: No lesion found. AGE: 23. GENDER: Female. 

According to your prior knowledge:

The symptoms associated with atelectasis may include cough (not prominent), chest pain (not common), breathing difficulty (fast and shallow), low oxygen saturation, pleural effusion (transudate type), cyanosis (late sign), and increased heart rate. However

In [22]:
print(all_prompt[0]['numerical'])

**Prior Knowledge:**
Question: What are the symptoms associated with atelectasis?
Answer: The symptoms associated with atelectasis may include cough (not prominent), chest pain (not common), breathing difficulty (fast and shallow), low oxygen saturation, pleural effusion (transudate type), cyanosis (late sign), and increased heart rate. However, atelectasis can also be asymptomatic. Fever is not a symptom of atelectasis.
Question: What can cause atelectasis?
Answer: Atelectasis can be caused by various medical conditions, including post-surgical complications, surfactant deficiency, and poor surfactant spreading during inspiration. It can also be caused by blockage of a bronchiole or bronchus, such as by a foreign body, mucus plug, tumor, or compression from the outside. Risk factors for atelectasis include certain types of surgery, muscle relaxation, obesity, high oxygen, lower lung segments, age, chronic obstructive pulmonary disease (COPD), asthma, and type of anesthetic.
Question: 

In [23]:
sample_df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,id,dicom_id,subject_id,stay_id,study_id,split,eye_tracking_data_discarded,gender,...,chest pain,chronic obstructive pulmonary disease,cough,cyanosis,fever,pleural effusion,pulmonary tuberculosis,small volume linear shadows,smoking,reason
0,432,487,P300R901530,005aff0f-0c236062-06df954a-25ad1874-bcdffcb0,14825395,31814406.0,55805945,train,False,F,...,False,False,False,False,False,False,False,False,False,{'anesthesia': 'because anesthesia is listed a...
1,568,641,P300R134725,24c9417f-d3972f1e-32aabb7a-8123fcd5-a4b15676,16611781,39028206.0,55803695,val,False,F,...,True,False,False,False,False,False,False,False,False,{'anesthesia': 'There is no mention of anesthe...
2,385,433,P300R595769,7932e4ef-4c3c88d1-c495bc63-5f1b75be-1d771705,14254429,31943698.0,54176216,val,False,F,...,True,False,False,False,False,False,False,False,False,{'anesthesia': 'because there is no mention of...
3,99,99,P231R575633,f0358e42-f36d9497-d5b3e082-52837e1e-07bcce84,19458616,32025907.0,54401894,train,False,F,...,False,False,False,False,False,False,False,True,False,{'anesthesia': 'It is possible that the patien...
4,363,408,P300R243430,6734422f-c3e84ec8-1af0cb63-f360c146-1fca2c1e,13880916,36731968.0,59469360,test,False,F,...,False,False,False,False,False,False,False,False,False,{'anesthesia': 'because the patient's report d...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,482,544,P300R394105,4a04164c-bf7a47b2-39273bf3-6f841e34-278431eb,15378103,39748092.0,51347031,train,False,M,...,False,True,False,False,False,True,False,True,False,{'anesthesia': 'because the patient has a hist...
76,582,657,P300R964502,c065eceb-35cfd7ec-585b80e8-40d92614-b9fe29a8,16825821,34630858.0,57002974,train,False,M,...,False,False,False,False,False,False,False,True,True,{'anesthesia': 'because the report does not me...
77,710,796,P300R129481,04d8b146-8f27fd48-e07afc43-464529fc-57350e1b,18615099,32493581.0,59480739,test,False,M,...,False,False,True,False,False,True,False,True,False,"{'anesthesia': 'The patient is post-surgery, s..."
78,576,649,P300R301672,640c7188-935fa336-0f838197-7bfea86d-c34cd19a,16672810,32193400.0,51502011,val,False,M,...,False,False,False,False,False,True,False,True,True,{'anesthesia': 'because the report mentions a ...


In [24]:
sample_df.columns

Index(['Unnamed: 0.1', 'Unnamed: 0', 'id', 'dicom_id', 'subject_id', 'stay_id',
       'study_id', 'split', 'eye_tracking_data_discarded', 'gender',
       'anchor_age', 'anchor_year', 'dod', 'StudyDate', 'StudyDateTime',
       'StudyTime', 'in_eye_gaze', 'in_reflacx', 'image', 'image_size_x',
       'image_size_y', 'ViewPosition', 'Airway wall thickening', 'Atelectasis',
       'Consolidation', 'Enlarged cardiac silhouette', 'Fibrosis', 'Fracture',
       'Groundglass opacity', 'Other', 'Pneumothorax', 'Pulmonary edema',
       'Quality issue', 'Support devices', 'Wide mediastinum',
       'Abnormal mediastinal contour', 'Acute fracture', 'Enlarged hilum',
       'Hiatal hernia', 'High lung volume / emphysema',
       'Interstitial lung disease', 'Lung nodule or mass',
       'Pleural abnormality', 'Atelectasis_negbio', 'Cardiomegaly_negbio',
       'Consolidation_negbio', 'Edema_negbio',
       'Enlarged Cardiomediastinum_negbio', 'Fracture_negbio',
       'Lung Lesion_negbio', 'Lun