In [None]:
from google.colab import drive

drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
import os
import requests
import pandas as pd

In [None]:
def call_openai_chat_api(prompt, n=1, model="gpt-4", api_key=[API_KEY]):
    url = "https://api.openai.com/v1/chat/completions"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    payload = {
        "model": model,
        "n": n,
        "messages": [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]
    }

    response = requests.post(url, headers=headers, json=payload)
    return response.json()


In [None]:
all_dataset_class_labels = {
    "pcam": [
        "lymph node",
        "lymph node containing metastatic tumor tissue"
    ],
    "nck": ["adipose",
            "debris",
            "lymphocytes",
            "mucus",
            "smooth muscle",
            "normal colon mucosa",
            "cancer-associated stroma",
            "colorectal adenocarcinoma epithelium"
    ],
    "lc25000_lung": ["benign lung",
                     "lung adenocarcinoma",
                     "lung squamous cell carcinoma"
    ],
    "lc25000_colon": ["colon adenocarcinoma",
                      "benign colonic tissue"
    ],
    "mhist": ["hyperplastic polyp",
              "sessile serrated adenoma"
    ],
    "sicap": ["benign glands",
        "atrophic dense glands",
        "cribriform ill-formed fused papillary patterns",
        "isolated nest cells without lumen roseting patterns"
    ],
    "idc_grade": ["well differentiated bloom richardson grade one",
                "moderately differentiated bloom richardson grade two",
                "poorly differentiated grade three"
    ],
    "osteo": ["non-tumor",
        "non-viable necrotic osteosarcoma tumor",
        "viable osteosarcoma tumor"
    ],
    "bach": ["breast non-malignant benign tissue",
            "breast malignant in-situ carcinoma",
            "breast malignant invasive carcinoma",
            "breast normal breast tissue"],
    "renal_cell": ["red blood cells",
                     "renal cancer",
                     "normal renal tissue",
                     "torn adipose necrotic tissue",
                     "muscle fibrous stroma blood vessels"
    ],
    "skin": ["necrosis",
        "skeletal muscle",
        "eccrine sweat glands",
        "vessels",
        "elastosis",
        "chondral tissue",
        "hair follicle",
        "epidermis",
        "nerves",
        "subcutis",
        "dermis",
        "sebaceous glands",
        "squamous-cell carcinoma",
        "melanoma in-situ",
        "basal-cell carcinoma",
        "naevus"
    ],
    "skin_tumor": [
        "squamous-cell carcinoma",
        "melanoma in-situ",
        "basal-cell carcinoma",
        "naevus"
    ]
}

In [None]:
response = None
desc_type = "feature"

for dataset, class_labels in all_dataset_class_labels.items():

    print(dataset, class_labels)
    save_path = "drive/MyDrive/CV2_project/code/med_vlm_cal/descriptors/{}/{}.csv".format(desc_type, dataset)
    print(save_path)

    rows = []
    for idx, label in enumerate(class_labels):

        if desc_type == "sentence":
            prompt = "Give a 1 sentence, detailed but generic caption for a histopathological image showing {}.".format(label)
        elif desc_type == "feature":
            prompt = "Give a list of 5-6 short (1-3 word) descriptions of features you would expect to see in a histopathological image showing {}.".format(label)
        else:
            raise ValueError

        print(prompt)
        response = call_openai_chat_api(prompt)
        row = [idx, dataset, label, response]
        rows.append(row)

    df = pd.DataFrame(rows, columns=["label_index", "dataset", "label", "response"])
    display(df)
    df.to_csv(save_path, index=False)

    print("---------------------------")


In [None]:
for dataset, class_labels in all_dataset_class_labels.items():

    save_path = "drive/MyDrive/CV2_project/code/med_vlm_cal/descriptors/feature/{}.csv".format(dataset)
    df = pd.read_csv(save_path)


    print(dataset)
    for label in class_labels:
        row = df[df["label"] == label].iloc[0]
        response = eval(row["response"])
        c = response["choices"][0]["message"]["content"].strip()
        texts = [t.split(". ")[1].strip(".") for t in c.split("\n")]
        print(label, texts)
    print("---------------")