In [1]:
from aug.features import *
import warnings
warnings.filterwarnings("ignore")

In [2]:
def get_questions_by_lesion(lesion: str):
    questions = [
        # f"What is {lesion}?", # don't need the first one for extending features.
        f"What are the symptoms associated with {lesion}?",
        f"What can cause {lesion}?",        
        f"What are the patient’s symptoms that are relevant for {lesion}?",
        f"What are the relevant clinical signs for the etiological diagnosis of {lesion}?",
        f"What are the relevant laboratory data for the etiological diagnosis of {lesion}?",
        f"What are the relevant clinical characteristics for the etiological diagnosis of {lesion}",
        f"What are the patient’s personal relevant history for the etiological diagnosis of {lesion}",
    ]
    return questions

In [3]:
lesion = "atelectasis"
## adding prior knowledge from 8 questions.
questions = get_questions_by_lesion(lesion)
responses = LLM_get_responses(questions, llm=QuestionLLM.Mistral) # let's try to get the responses from other LLMs to predict the keywords.

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [4]:
# identified_keywords = {
#     "boolean": [
#         "blockage of the airways",
#         "chest pain",
#         "chronic obstructive pulmonary disease (copd)",
#         "collapse or closure of a lung",
#         "coughing",
#         "foreign object",
#         "pneumonia",
#         "prolonged bed rest",
#         "rapid breathing",
#         "reduced or absent gas exchange",
#         "shortness of breath",
#     ],
#     "numerical": ["oxygen saturation (%)"],  # 'low oxygen levels',
#     # 'lung diseases',
#     # 'mucus',
#     # 'pneumonia',
#     # 'surgery',
#     # 'tumor'
# }

In [5]:
# identified_keywords = {  # networkx
#     "boolean": [
#         "abnormal chest x-ray findings",
#         "abnormal lung sounds",
#         "collapsed lung tissue" "bluish skin color",
#         "chest pain",
#         "coughing",
#         "coughing up blood",
#         "crackling sounds in the lungs",
#         "decreased appetite",
#         "decreased breath sounds",
#         "decreased oxygen saturation",
#         "difficulty breathing",
#         "difficulty speaking",
#         "dyspnea",
#         "fatigue",
#         "history of being on mechanical ventilation",
#         "history of chest trauma or injury",
#         "history of chronic obstructive pulmonary disease (copd)",
#         "history of lung cancer",
#         "history of lung diseases",
#         "history of neuromuscular disorders",
#         "history of prolonged bed rest or immobility",
#         "history of recent surgery",
#         "history of respiratory infections",
#         "history of smoking for 20 years",
#         "advanced age",
#         "low oxygen levels",
#         "obesity",
#         "fungi in sputum culture",
#         "presence of bacteria",
#         "rapid breathing",
#         "shortness of breath",
#         "smoking",
#         "tachypnea",
#         "underlying lung diseases",
#         "viruses",
#         "wheezing",
#         "white-out appearance in the affected area of the lung",
#     ],
#     "numerical": ["levels of oxygen and carbon dioxide in the blood"],
#     #  'amount of air that can be inhaled and exhaled',
#     #  'use of accessory muscles',
#     #  'viruses',
# }

In [6]:
identified_keywords = {  # Neo4jVector
    "boolean": [
        "anesthesia",
        "asthma",
        "asymptomatic",
        "breathing difficulty",
        "chest pain",
        "chronic obstructive pulmonary disease",
        "cough",
        "cyanosis",
        "fever",
        "pleural effusion",
        "pulmonary tuberculosis",
        "small volume linear shadows",
        "smoking"
        # Response: Risk factors for atelectasis include certain types of surgery, muscle relaxation, obesity, high oxygen, and lower lung segments.',
        # "high oxygen",  # A cause, not a symptom. Has the patient been the situation with high oxygen. # remove this for not causing confusion to LLMs.

    ],

    "numerical": [
        "heart rate (per minute)",  # "increased heart rate",
        "oxygen saturation (%)",  # "low oxygen saturation",
    ],
    # 'old age', # already provided in the report.
}

In [7]:
import pandas as pd
df = pd.read_csv('./spreadsheets/reflacx_clinical.csv')
sample_df = df.iloc[:50]

In [8]:
results = []
all_res = []
all_prompt = []
for idx, data in sample_df.iterrows():
    prompt, res, result = get_possible_values(
        data,
        identified_keywords,
        responses=responses,
    )
    all_prompt.append(prompt)
    all_res.append(res)
    results.append(result)

In [9]:
for f in identified_keywords['numerical'] + identified_keywords['boolean']:
    sample_df[f] = None
    sample_df[f] = [ r[f] for r in results]

In [10]:
sample_df.to_csv('[prior-knowledge] mistral.csv')

In [11]:
sample_df[sample_df['Atelectasis'] >0]['oxygen saturation (%)'].mean(), sample_df[sample_df['Atelectasis'] <=0]['oxygen saturation (%)'].mean()

(94.82352941176471, 96.9090909090909)

In [12]:
sample_df[sample_df['Atelectasis'] >0]['chest pain'].mean(), sample_df[sample_df['Atelectasis'] <=0]['chest pain'].mean()

(0.23529411764705882, 0.3333333333333333)

In [13]:
sample_df[sample_df['Atelectasis'] >0]['fever'].mean(), sample_df[sample_df['Atelectasis'] <=0]['fever'].mean()


(0.0, 0.030303030303030304)

In [14]:
## ChatGPT
# (94.29411764705883, 96.36363636363636)
# (0.23529411764705882, 0.3333333333333333)
# (0.0, 0.030303030303030304)

## Llama2
# (95.0, 97.18181818181819)
# (0.23529411764705882, 0.3333333333333333)
# (0.0, 0.030303030303030304)

## Mistral
# (94.82352941176471, 96.9090909090909)
# (0.23529411764705882, 0.3333333333333333)
# (0.0, 0.030303030303030304)

In [15]:
print(all_prompt[0]['boolean'])

**Prior Knowledge:**
Question: What are the symptoms associated with atelectasis?
Answer: Question: What are the symptoms associated with atelectasis?
Answer: The symptoms associated with atelectasis include:

- Shortness of breath
- Cough
- Chest pain
- Fever
- Wheezing

Question: What are the causes of atelectasis?
Answer: The causes of atelectasis include:

- Blockage of the airway
- Infection
- Injury
- Obstruction of the airway
- Inflammation
- Fluid in the lungs
- Tumor
- Heart failure
- Lung disease
- Lung cancer
- Pneumonia
- Pleural effusion
- Pulmonary embolism
- Pulmonary edema
- Pulmonary fibrosis
- Pulmonary hypertension
- Pulmonary infarction
- Pulmonary thromboembolism
- Sarcoidosis
- Tuberculosis
- Ventilator-associated pneumonia

Question: What are the risk factors for atelectasis?
Answer: The risk factors for atelectasis include:

- Age
- Being a smoker

Question: What can cause atelectasis?
Answer: Question: What can cause atelectasis?
Answer: Atelectasis can be caus

In [16]:
print(all_prompt[0]['numerical'])

**Prior Knowledge:**
Question: What are the symptoms associated with atelectasis?
Answer: Question: What are the symptoms associated with atelectasis?
Answer: The symptoms associated with atelectasis include:

- Shortness of breath
- Cough
- Chest pain
- Fever
- Wheezing

Question: What are the causes of atelectasis?
Answer: The causes of atelectasis include:

- Blockage of the airway
- Infection
- Injury
- Obstruction of the airway
- Inflammation
- Fluid in the lungs
- Tumor
- Heart failure
- Lung disease
- Lung cancer
- Pneumonia
- Pleural effusion
- Pulmonary embolism
- Pulmonary edema
- Pulmonary fibrosis
- Pulmonary hypertension
- Pulmonary infarction
- Pulmonary thromboembolism
- Sarcoidosis
- Tuberculosis
- Ventilator-associated pneumonia

Question: What are the risk factors for atelectasis?
Answer: The risk factors for atelectasis include:

- Age
- Being a smoker

Question: What can cause atelectasis?
Answer: Question: What can cause atelectasis?
Answer: Atelectasis can be caus