In [1]:
import os
import pandas as pd

from tqdm import tqdm
from transformers import pipeline, set_seed
from transformers import BioGptTokenizer, BioGptForCausalLM
from aug.reflacx import *

MIMIC_EYE_PATH = "F:\\mimic-eye"

REFLACX_LESION_LABEL_COLS = [
    # "Fibrosis",
    # "Quality issue",
    # "Wide mediastinum",
    # "Fracture",
    # "Airway wall thickening",

    ######################
    # "Hiatal hernia",
    # "Acute fracture",
    # "Interstitial lung disease",
    # "Enlarged hilum",
    # "Abnormal mediastinal contour",
    # "High lung volume / emphysema",
    # "Pneumothorax",
    # "Lung nodule or mass",
    # "Groundglass opacity",
    ######################
    "Pulmonary edema",
    "Enlarged cardiac silhouette",
    "Consolidation",
    "Atelectasis",
    "Pleural abnormality",
    # "Support devices",
]


CHEXPERT_LABEL_COLS = [
    "Atelectasis_chexpert",
    "Cardiomegaly_chexpert",
    "Consolidation_chexpert",
    "Edema_chexpert",
    "Enlarged Cardiomediastinum_chexpert",
    "Fracture_chexpert",
    "Lung Lesion_chexpert",
    "Lung Opacity_chexpert",
    "No Finding_chexpert",
    "Pleural Effusion_chexpert",
    "Pleural Other_chexpert",
    "Pneumonia_chexpert",
    "Pneumothorax_chexpert",
    "Support Devices_chexpert",
]


In [2]:
model = BioGptForCausalLM.from_pretrained("microsoft/biogpt")
tokenizer = BioGptTokenizer.from_pretrained("microsoft/biogpt")
generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
set_seed(0)

In [11]:
outputs = generator("When radiologists is reading the x-ray image and making diagnosis, they need some clinical features about the patients, including ",
                         max_length=1024,
                        # max_new_tokens = 1,
                           num_return_sequences=10,
                             do_sample=True,
                             )

In [10]:
outputs

[{'generated_text': "what are the clinical featuress, like gender and age, and blood presssure, radiologits need while they're reading x-ray image and making diagnosis?"},
 {'generated_text': "what are the clinical featuress, like gender and age, and blood presssure, radiologits need while they're reading x-ray image and making diagnosis?"},
 {'generated_text': "what are the clinical featuress, like gender and age, and blood presssure, radiologits need while they're reading x-ray image and making diagnosis?"},
 {'generated_text': "what are the clinical featuress, like gender and age, and blood presssure, radiologits need while they're reading x-ray image and making diagnosis?"},
 {'generated_text': "what are the clinical featuress, like gender and age, and blood presssure, radiologits need while they're reading x-ray image and making diagnosis?"},
 {'generated_text': 'what are the clinical featuress, like gender and age, and blood presssure, radiologits need while they\'re reading x-ra

In [None]:
df = pd.read_csv('./spreadsheets/reflacx_clinical.csv')

In [None]:
features_to_aug =  [
            "temperature_c",
            "heartrate",
            "resprate",
            "o2sat",
            "sbp",
            "dbp",
        ]

feature_to_name_map = {
    "temperature_c": "body temperature in degrees Celsius",
    "heartrate": "heart rate in beats per minute",
    "resprate": "respiratory rate in breaths per minute",
    "o2sat": "peripheral oxygen saturation (%)",
    "sbp": "systolic blood pressure (mmHg)",
    "dbp":"diastolic blood pressure (mmHg)",
}

In [None]:
report_format=True

In [None]:
df['temperature_c'] = df['temperature'].apply(lambda f :(f-32) * 5/9 )
df = aug_df(MIMIC_EYE_PATH, REFLACX_LESION_LABEL_COLS, features_to_aug, feature_to_name_map, df, generator, progress=[1, 5, 25, 50], report_format=report_format)
df["aug_temperature"] = df["aug_temperature_c"].apply(lambda c: (c*1.8)+32)

In [None]:
if report_format:
    df.to_csv('./spreadsheets/reflacx_clinical_aug_report.csv')
else:
    df.to_csv('./spreadsheets/reflacx_clinical_aug_text.csv')