In [1]:
import os
import pandas as pd

from tqdm import tqdm
from transformers import pipeline, set_seed
from transformers import BioGptTokenizer, BioGptForCausalLM
from aug.gpt import *

import warnings
warnings.filterwarnings("ignore")

MIMIC_EYE_PATH = "F:\\mimic-eye"

REFLACX_LESION_LABEL_COLS = [
    # "Fibrosis",
    # "Quality issue",
    # "Wide mediastinum",
    # "Fracture",
    # "Airway wall thickening",

    ######################
    # "Hiatal hernia",
    # "Acute fracture",
    # "Interstitial lung disease",
    # "Enlarged hilum",
    # "Abnormal mediastinal contour",
    # "High lung volume / emphysema",
    # "Pneumothorax",
    # "Lung nodule or mass",
    # "Groundglass opacity",
    ######################
    "Pulmonary edema",
    "Enlarged cardiac silhouette",
    "Consolidation",
    "Atelectasis",
    "Pleural abnormality",
    # "Support devices",
]


CHEXPERT_LABEL_COLS = [
    "Atelectasis_chexpert",
    "Cardiomegaly_chexpert",
    "Consolidation_chexpert",
    "Edema_chexpert",
    "Enlarged Cardiomediastinum_chexpert",
    "Fracture_chexpert",
    "Lung Lesion_chexpert",
    "Lung Opacity_chexpert",
    "No Finding_chexpert",
    "Pleural Effusion_chexpert",
    "Pleural Other_chexpert",
    "Pneumonia_chexpert",
    "Pneumothorax_chexpert",
    "Support Devices_chexpert", 
]


In [2]:
from transformers import AutoTokenizer, AutoModel, DistilBertForMaskedLM, BertForMaskedLM, LlamaForCausalLM, LlamaTokenizer, LlamaForCausalLM
import torch

generator = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta", torch_dtype=torch.bfloat16, device_map="auto")
set_seed(0)

Downloading shards:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading (…)of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

Downloading (…)of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

Downloading (…)of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

Downloading (…)of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

Downloading (…)of-00008.safetensors:   0%|          | 0.00/816M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/1.43k [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

In [3]:
# tokenizer = LlamaTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", token="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")
# model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", token="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") #, device_map='auto', torch_dtype=torch.float16)
# model = AutoModel.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")

In [4]:
generator("A 56 years old Female patient diagnosed with Pulmonary edema Enlarged cardiac silhouette Consolidation Atelectasis Pleural abnormalit. And This patient has the radiology report: INDICATION:  Fever.  Assess for pneumonia.  COMPARISONS:   and .  FINDINGS:  Portable upright view of the chest demonstrates low lung volumes.  The study is somewhat limited due to patient's body habitus.  Hilar and mediastinal silhouettes are unchanged.  Intrathoracic aorta is tortuous.  Heart is mildly enlarged unchanged.  Linear opacity in the left lung base is longstanding and likely represents an area of scarring.  There is mild pulmonary edema.  Bibasilar opacities are noted which may reflect atelectasis.  A fixation hardware overlying of the right humerus is noted.  IMPRESSION:  Mild cardiomegaly and pulmonary edema.  Bibasilar opacities likely atelectasis or infection in the appropriate clinical setting. The body temperature in degrees Celsius of this patient is around",  max_new_tokens=10, num_return_sequences=50, do_sample=True)

[{'generated_text': "A 56 years old Female patient diagnosed with Pulmonary edema Enlarged cardiac silhouette Consolidation Atelectasis Pleural abnormalit. And This patient has the radiology report: INDICATION:  Fever.  Assess for pneumonia.  COMPARISONS:   and .  FINDINGS:  Portable upright view of the chest demonstrates low lung volumes.  The study is somewhat limited due to patient's body habitus.  Hilar and mediastinal silhouettes are unchanged.  Intrathoracic aorta is tortuous.  Heart is mildly enlarged unchanged.  Linear opacity in the left lung base is longstanding and likely represents an area of scarring.  There is mild pulmonary edema.  Bibasilar opacities are noted which may reflect atelectasis.  A fixation hardware overlying of the right humerus is noted.  IMPRESSION:  Mild cardiomegaly and pulmonary edema.  Bibasilar opacities likely atelectasis or infection in the appropriate clinical setting. The body temperature in degrees Celsius of this patient is around …….\nHow woul

In [5]:
features_to_aug =  [
            "temperature_c",
            "heartrate",
            "resprate",
            "o2sat",
            "sbp",
            "dbp",
        ]

feature_to_name_map = {
    "temperature_c": "body temperature in degrees Celsius",
    "heartrate": "heart rate in beats per minute",
    "resprate": "respiratory rate in breaths per minute",
    "o2sat": "peripheral oxygen saturation (%)",
    "sbp": "systolic blood pressure (mmHg)",
    "dbp":"diastolic blood pressure (mmHg)",
}

In [6]:
# import numpy as np

# 799 - np.sum(df['aug_resprate'].isna())

In [7]:
for rf in [
    True,
    False,
]:
    report_format = rf
    df = pd.read_csv("./spreadsheets/reflacx_clinical.csv")
    df["temperature_c"] = df["temperature"].apply(lambda f: (f - 32) * 5 / 9)
    df = aug_df(
        MIMIC_EYE_PATH,
        REFLACX_LESION_LABEL_COLS,
        features_to_aug,
        feature_to_name_map,
        df,
        generator,
        progress=[15],
        report_format=report_format,
    )
    df["aug_temperature"] = df["aug_temperature_c"].apply(lambda c: (c*1.8)+32 if not c is None else None)
    if report_format:
        df.to_csv("./spreadsheets/zephyr_aug_report.csv")
    else:
        df.to_csv("./spreadsheets/zephyr_aug_text.csv")

Resolving temperature_c


100%|██████████| 799/799 [08:45<00:00,  1.52it/s]


Resolving heartrate


100%|██████████| 799/799 [08:34<00:00,  1.55it/s]


Resolving resprate


100%|██████████| 799/799 [08:58<00:00,  1.48it/s]


Resolving o2sat


100%|██████████| 799/799 [08:34<00:00,  1.55it/s]


Resolving sbp


100%|██████████| 799/799 [09:22<00:00,  1.42it/s]


Resolving dbp


100%|██████████| 799/799 [10:19<00:00,  1.29it/s]


In [8]:
raise StopIteration()

StopIteration: 

In [None]:
# df['temperature_c'] = df['temperature'].apply(lambda f :(f-32) * 5/9 )
# df = aug_df(MIMIC_EYE_PATH, REFLACX_LESION_LABEL_COLS, features_to_aug, feature_to_name_map, df, generator, progress=[1, 5, 25, 50], report_format=report_format)
# df["aug_temperature"] = df["aug_temperature_c"].apply(lambda c: (c*1.8)+32)

Resolving temperature_c


 13%|█▎        | 104/799 [10:04<55:23,  4.78s/it]  

In [None]:
# df["aug_temperature"] = df["aug_temperature_c"].apply(lambda c: (c*1.8)+32)
# if report_format:
#     df.to_csv('./spreadsheets/llama2_aug_report.csv')
# else:
#     df.to_csv('./spreadsheets/llama2_aug_text.csv')

In [None]:
# # df = aug_df(MIMIC_EYE_PATH, REFLACX_LESION_LABEL_COLS, features_to_aug, feature_to_name_map, df, generator, progress=[1, 5, 25, 50], report_format=report_format)
# aug_feature_range = {f: (df[f].min(), df[f].max()) for f in features_to_aug}

# for f in features_to_aug:
#     df[f"aug_{f}"] = None

# for f in features_to_aug:
#     print(f"Resolving {f}")
#     # aug the instance one by one
#     for idx, data in tqdm(df.iterrows(), total=df.shape[0]):
#         prompt = get_prompt_for_mask(
#             MIMIC_EYE_PATH,
#             data,
#             REFLACX_LESION_LABEL_COLS,
#             feature_to_name_map,
#             f,
#             report_format=report_format,
#         )


#         v = get_generated_value(
#             mask_filler, prompt, aug_feature_range[f], top_k=100,
#         )
#         if v is None:
#             print(
#                 f"Couldn't find value for [{idx}] prompt: {prompt}"
#             )


#         df.at[idx, f"aug_{f}"] = v

Resolving temperature_c


  0%|          | 2/799 [00:00<01:09, 11.44it/s]

Couldn't find value for [0] prompt: INDICATION:  Central venous line placement.  TECHNIQUE:  Frontal chest radiograph.  COMPARISON:  Chest radiograph 12:42 today.  FINDINGS:   A right subclavian catheter has been placed in the interim. The catheter terminates at the confluence of the brachiocephalic vein and superior vena cava and if indicated could be advanced 3.7 cm for termination within the low SVC.  There is no pleural effusion or pneumothorax. The cardiac silhouette remains mildly enlarged. There is no focal airspace consolidation worrisome for pneumonia.  High density material is again seen in the paritally imaged colon in the left abdomen. Cholecystectomy clips are noted. There are carotid calcifications left greater than right. LESIONS: Enlarged cardiac silhouett. AGE: 69. GENDER: Female. body temperature in degrees Celsius: [MASK]
Couldn't find value for [1] prompt: INDICATION:  Central venous line placement.  TECHNIQUE:  Frontal chest radiograph.  COMPARISON:  Chest radiogra

  1%|          | 4/799 [00:00<01:07, 11.83it/s]

Couldn't find value for [4] prompt: INDICATION:  Central venous line placement.  TECHNIQUE:  Frontal chest radiograph.  COMPARISON:  Chest radiograph 12:42 today.  FINDINGS:   A right subclavian catheter has been placed in the interim. The catheter terminates at the confluence of the brachiocephalic vein and superior vena cava and if indicated could be advanced 3.7 cm for termination within the low SVC.  There is no pleural effusion or pneumothorax. The cardiac silhouette remains mildly enlarged. There is no focal airspace consolidation worrisome for pneumonia.  High density material is again seen in the paritally imaged colon in the left abdomen. Cholecystectomy clips are noted. There are carotid calcifications left greater than right. LESIONS: Enlarged cardiac silhouett. AGE: 69. GENDER: Female. body temperature in degrees Celsius: [MASK]


  2%|▏         | 12/799 [00:00<01:00, 13.07it/s]

Couldn't find value for [9] prompt: EXAMINATION:  CHEST PA AND LAT  INDICATION:  History: M with chest pain  TECHNIQUE:  Chest PA and lateral  COMPARISON:    FINDINGS:   The patient is status post median sternotomy and CABG.  Cardiac mediastinal and hilar contours are unchanged with the heart size within normal limits. Minimal atherosclerotic calcifications are noted at the aortic knob. Pulmonary vasculature is normal. Calcified granuloma is seen within the right apex.  Lungs are clear. Pulmonary vasculature is normal. No pleural effusion or pneumothorax is present. Minimal degenerative changes are seen within the thoracic spine.  IMPRESSION:   No acute cardiopulmonary abnormality. LESIONS: Enlarged cardiac silhouett. AGE: 73. GENDER: Male. body temperature in degrees Celsius: [MASK]


  5%|▌         | 42/799 [00:03<01:04, 11.81it/s]

Couldn't find value for [39] prompt: CHEST RADIOGRAPH PERFORMED ON   COMPARISON:  Outside hospital chest radiograph from earlier today from the outside hospital.  CLINICAL HISTORY:  NG tube placement assess position.  FINDINGS:  In this patient with a large hiatal hernia the NG tube is seen coiled in the left upper abdomen though its tip extends superiorly into the intrathoracic portion of the stomach.  Basilar atelectasis and tiny effusions are better assessed on outside hospital CT from earlier today.  The upper lungs remain well aerated.  Heart size cannot be assessed.  Bony structures are intact with degenerative changes at the right AC joint.  Free air below the right hemidiaphragm is seen.  IMPRESSION:  NG tube coiled within the stomach with its tip in the intrathoracic portion of the stomach. LESIONS: Consolidation Atelectasis Pleural abnormalit. AGE: 89. GENDER: Female. body temperature in degrees Celsius: [MASK]
Couldn't find value for [40] prompt: CHEST RADIOGRAPH PERFORMED O

  6%|▌         | 46/799 [00:03<00:57, 13.17it/s]

Couldn't find value for [42] prompt: CHEST RADIOGRAPH PERFORMED ON   COMPARISON:  Outside hospital chest radiograph from earlier today from the outside hospital.  CLINICAL HISTORY:  NG tube placement assess position.  FINDINGS:  In this patient with a large hiatal hernia the NG tube is seen coiled in the left upper abdomen though its tip extends superiorly into the intrathoracic portion of the stomach.  Basilar atelectasis and tiny effusions are better assessed on outside hospital CT from earlier today.  The upper lungs remain well aerated.  Heart size cannot be assessed.  Bony structures are intact with degenerative changes at the right AC joint.  Free air below the right hemidiaphragm is seen.  IMPRESSION:  NG tube coiled within the stomach with its tip in the intrathoracic portion of the stomach. LESIONS: Consolidation Atelectasi. AGE: 89. GENDER: Female. body temperature in degrees Celsius: [MASK]
Couldn't find value for [43] prompt: CHEST RADIOGRAPH PERFORMED ON   COMPARISON:  Out

  9%|▉         | 70/799 [00:05<00:59, 12.20it/s]

Couldn't find value for [68] prompt: WET READ:    5:25 PM  A 1.2 cm rounded opacity just inferior to the right scapular border is of  uncertain etiology and may be external to the patient for which shallow  oblique radiographs or CT chest is recommended to evaluate for pulmonary  nodule.            ED URGENT ATTENTION  WET READ VERSION     4:08 PM  A 1.2 cm rounded opacity just inferior to the right scapular border is of  uncertain etiology and may be external to the patient for which shallow  oblique radiographs or CT chest is recommended to evaluate for pulmonary  nodule.                                   EXAMINATION:  Chest:  Frontal and lateral views  INDICATION:  History: F with painless jaundice fevers cough   evaluate for masses pneumonia  TECHNIQUE:  Chest:  Frontal and Lateral  COMPARISON:  None.  FINDINGS:   There is a 1.2 cm rounded opacity just inferior to the right scapular border.  No pleural effusion or pneumothorax is seen. The cardiac and mediastinal silhouettes are un

  9%|▉         | 72/799 [00:05<01:07, 10.81it/s]

Couldn't find value for [70] prompt: WET READ:    5:25 PM  A 1.2 cm rounded opacity just inferior to the right scapular border is of  uncertain etiology and may be external to the patient for which shallow  oblique radiographs or CT chest is recommended to evaluate for pulmonary  nodule.            ED URGENT ATTENTION  WET READ VERSION     4:08 PM  A 1.2 cm rounded opacity just inferior to the right scapular border is of  uncertain etiology and may be external to the patient for which shallow  oblique radiographs or CT chest is recommended to evaluate for pulmonary  nodule.                                   EXAMINATION:  Chest:  Frontal and lateral views  INDICATION:  History: F with painless jaundice fevers cough   evaluate for masses pneumonia  TECHNIQUE:  Chest:  Frontal and Lateral  COMPARISON:  None.  FINDINGS:   There is a 1.2 cm rounded opacity just inferior to the right scapular border.  No pleural effusion or pneumothorax is seen. The cardiac and mediastinal silhouettes are un

  9%|▉         | 74/799 [00:05<01:10, 10.22it/s]

Couldn't find value for [72] prompt: WET READ:    5:25 PM  A 1.2 cm rounded opacity just inferior to the right scapular border is of  uncertain etiology and may be external to the patient for which shallow  oblique radiographs or CT chest is recommended to evaluate for pulmonary  nodule.            ED URGENT ATTENTION  WET READ VERSION     4:08 PM  A 1.2 cm rounded opacity just inferior to the right scapular border is of  uncertain etiology and may be external to the patient for which shallow  oblique radiographs or CT chest is recommended to evaluate for pulmonary  nodule.                                   EXAMINATION:  Chest:  Frontal and lateral views  INDICATION:  History: F with painless jaundice fevers cough   evaluate for masses pneumonia  TECHNIQUE:  Chest:  Frontal and Lateral  COMPARISON:  None.  FINDINGS:   There is a 1.2 cm rounded opacity just inferior to the right scapular border.  No pleural effusion or pneumothorax is seen. The cardiac and mediastinal silhouettes are un

 10%|▉         | 76/799 [00:06<01:13,  9.81it/s]

Couldn't find value for [74] prompt: HISTORY:  A yearold female with the recent aortic valve replacement now presenting with increasing cough.  COMPARISON:  Recent chest radiograph from  and PA and lateral chest radiographs from .  AP SEMIERECT AND LATERAL CHEST RADIOGRAPH:  Evaluation is limited due to significant patient rotation to the right.  Median sternotomy wires appear intact.  Moderately severe enlargement of the cardiac silhouette appears grossly unchanged.  Evaluation of the mediastinum is limited due to the significant patient rotation.  There is mild interstitial pulmonary edema. The appearance is similar to recent portable radiograph from .  Blunting of the bilateral costophrenic angles is chronic and are compatible with persistent small pleural effusions.  No confluent consolidation or pneumothorax is present.  Evidence of prior vertebroplasty in the mid thoracic spine is unchanged from prior.  IMPRESSION: 1.  Limited examination due to poor patient positioning. 2.  Mild

 10%|█         | 80/799 [00:06<01:04, 11.18it/s]

Couldn't find value for [76] prompt: HISTORY:  A yearold female with the recent aortic valve replacement now presenting with increasing cough.  COMPARISON:  Recent chest radiograph from  and PA and lateral chest radiographs from .  AP SEMIERECT AND LATERAL CHEST RADIOGRAPH:  Evaluation is limited due to significant patient rotation to the right.  Median sternotomy wires appear intact.  Moderately severe enlargement of the cardiac silhouette appears grossly unchanged.  Evaluation of the mediastinum is limited due to the significant patient rotation.  There is mild interstitial pulmonary edema. The appearance is similar to recent portable radiograph from .  Blunting of the bilateral costophrenic angles is chronic and are compatible with persistent small pleural effusions.  No confluent consolidation or pneumothorax is present.  Evidence of prior vertebroplasty in the mid thoracic spine is unchanged from prior.  IMPRESSION: 1.  Limited examination due to poor patient positioning. 2.  Mild

 10%|█         | 82/799 [00:06<00:59, 12.08it/s]

Couldn't find value for [80] prompt: EXAMINATION:  CHEST PA AND LAT  INDICATION:  History: F being treated for pneumonia. Eval for effusion PNA  TECHNIQUE:  Chest PA and lateral  COMPARISON:  None  FINDINGS:   Prominence of the cardiac contour is likely due to prominent mediastinal fat obscuring the left lung base on the PA view. Heart size is top normal. No evidence of pleural effusion on the lateral view. Lungs are mildly hyperexpanded.  IMPRESSION:   No evidence for current pneumonia. Hyperexpanded but clear lungs.  No pleural effusions. LESIONS: Enlarged cardiac silhouett. AGE: 66. GENDER: Female. body temperature in degrees Celsius: [MASK]
Couldn't find value for [81] prompt: EXAMINATION:  CHEST PA AND LAT  INDICATION:  History: F being treated for pneumonia. Eval for effusion PNA  TECHNIQUE:  Chest PA and lateral  COMPARISON:  None  FINDINGS:   Prominence of the cardiac contour is likely due to prominent mediastinal fat obscuring the left lung base on the PA view. Heart size is t

 12%|█▏        | 94/799 [00:07<00:49, 14.26it/s]

In [None]:
# df["aug_temperature"] = df["aug_temperature_c"].apply(lambda c: (c*1.8)+32)
# if report_format:
#     df.to_csv('./spreadsheets/bcb_aug_report.csv')
# else:
#     df.to_csv('./spreadsheets/bcb_aug_text.csv')