In [None]:
import os
import json
import openai


In [2]:
dataset = "mimic"
openapi_key = ''

In [3]:
openai.api_key = openapi_key

In [24]:
cls_file = 'mimic_classes.txt'
with open(cls_file, "r") as f:
    classes = f.read().split("\n")

In [29]:
mapping_classes ={
    'pulmonary edema': 'edema',
    'peripherally inserted central catheter': 'picc',
    'crowding of the bronchovascular structures': 'crowd',
    'congestive hear failure': 'chf',
    'rare diseases': 'tail_abnorm_obs',
    'medical hardware': 'hardware',
    'cancer': 'mass',
    'pleural effusion': 'effusion',
    'congestive heart failure', 'chf'
}

In [68]:
base_prompt = open('visual_prompts.txt', 'r').read()
base_prompt

In [None]:
## GPT-4

feature_dict = {}
gpt_model = "gpt-4"
#gpt_model = "gpt-3"
for i, label in enumerate(classes):
    feature_dict[label] = set()
    print("\n", i, label)
    for _ in range(1):
        response = openai.ChatCompletion.create(
          model="gpt-4",
          messages=[
            {
              "role": "system",
              "content": "You are a radiologist describing recognizable visual features of medical findings to non-medical people."
            },
            {
              "role": "user",
              "content": "List the most important features in the chest X-ray for identifying \"pneumothorax\":\n\n-opacity: reduced opacity and increased transparency, often appearing as a dark or black area.\n-shape and structure: altered lung's structure with partial or complete lung collapse.\nlocation: often located near the lung's edges or at the lung's apex.\n-contrast and border clarity: distinct border separating the collapsed lung from the chest wall.\n-textures: lucent, dark, mottled or hazy appearances.\n-fluid: no fluid or effusion accumulation.\n-others: asymmetry between affected and unaffected chest's sides, sharper contrast between the lung and the ribs, and reduced lung volume.\n\nList the most important features in the chest X-ray for identifying \"emphysema\":\n\n-opacity: increased transparency in the lung due to damaged air sacs.\n-shape and structure: larger, irregularly shaped air spaces, creating an open or flattened appearance.\n-location: often located at lung's upper lobes.\n-contrast and border clarity: indistinct borders or hazy edges between normal and damaged lung tissue.\n-textures: holes or \"bullae\" disrupts the lung's normal texture.\n-fluid:no fluid accumulation.\n-others: bullous and over-inflated appearance of the lungs.\n\nList the most important features in the chest X-ray for identifying \"thicken\":\n\n-opacity:increased opacity and reduced transparency.\n-shape and structure: irregular shapes or bulging areas due to thickening or enlargement of structures.\n-location: vary based on the involved structure.\n-contrast and border clarity: vary with well-defined or blurred borders.\n-textures: linear or irregular patterns disrupting normal texture in the vicinity.\n-fluid: no fluid accumulation or effusion.\n-others: specific descriptors, e.g., vascular wall thickening, mucosal thickening, provides context about the affected structures.\n\nList the most important features in the chest X-ray for identifying \"pneumonia\":"
            }
          ],
          temperature=0.7,
          max_tokens=256,
          top_p=1,
          frequency_penalty=0,
          presence_penalty=0.0,
          stop=[" Human:", " AI:"]
        )
        #clean up responses
        features = response["choices"][0]['message']['content']
        features = features.split("\n-")
        features = [feat.replace("\n", "") for feat in features]
        features[0] = features[0].replace("-","", 1)
        features = [feat.strip() for feat in features]
        features = [feat for feat in features if len(feat)>0]
        features = set(features)
        feature_dict[label].update(features)
    feature_dict[label] = list(feature_dict[label])

In [10]:
# GPT-3.5

feature_dict = {}
#gpt_model = "gpt-4"
gpt_model = "text-davinci-003"
#gpt_model = "gpt-3.5-turbo-0613"
for i, label in enumerate(classes):
    feature_dict[label] = set()
    print("\n", i, label)
    for _ in range(1):
        response = openai.Completion.create(
              model= gpt_model,
              prompt=base_prompt.format(label),
              temperature=0.7,
              max_tokens=256,
              top_p=1,
              frequency_penalty=0,
              presence_penalty=0
            )
        #clean up responses
        features = response["choices"][0]["text"]
        features = features.split("\n-")
        features = [feat.replace("\n", "") for feat in features]
        features = [feat.strip() for feat in features]
        features = [feat for feat in features if len(feat)>0]
        features = set(features)
        feature_dict[label].update(features)
    feature_dict[label] = sorted(list(feature_dict[label]))


 0 covid-19


In [38]:
feature_dict = {mapping_classes[k] if k in mapping_classes else k: v for k, v in feature_dict.items()}


In [8]:
feature_dict

{'Covid-19': ['contrast and border clarity: indistinct borders or hazy edges',
  'fluid: may be present in some cases',
  'location: often in the lower lobes, but can be present in different lobes',
  'opacity: increased opacity and reduced transparency',
  'others: air bronchograms, vascular thickening, pleural effusions or thickening, and other findings that may suggest Covid-19 infection',
  'shape and structure: diffuse ground-glass opacities, often in peripheral and lower lung regions',
  'textures: reticulations, nodules, and/or consolidation']}

In [39]:
json_object = json.dumps(feature_dict, indent=4)
with open("gpt_{}.json".format(dataset, prompt_type), "w") as outfile:
    outfile.write(json_object)