In [1]:
import json
import pandas as pd
import os
from typing import Literal

In [2]:
def format_decimal(x: float, digits: int = 3) -> str:
    return f'{x:.{digits}f}'[1:]

In [3]:
METRICS_DIR = "/data1/workspace/bih1122/test_metrics"

def get_metric(model: str,
               dataset: str,
               metric: Literal["bleu", "rouge", "meteor", "bertscore", "f1radgraph",
                               "f1chexbert_macro_f1_14", "f1chexbert_micro_f1_14",
                               "f1chexbert_macro_f1_5", "f1chexbert_micro_f1_5",
                               "sembscore", "green", "ratescore"],
               metrics_dir: str = METRICS_DIR) -> float:
    
    metrics = json.load(open(os.path.join(metrics_dir, f"{model}/{dataset}_test_outputs_metrics.json")))
    # green_df = pd.read_csv(os.path.join(metrics_dir, f"{model}/{dataset}_green_scores.csv"))

    if metric in ["bleu", "rouge", "meteor", "bertscore", "f1radgraph"]:
        return metrics[metric]
    elif metric in ["f1chexbert_macro_f1_14", "f1chexbert_micro_f1_14",
                    "f1chexbert_macro_f1_5", "f1chexbert_micro_f1_5"]:
        return metrics["chexbert"][metric.replace("f1chexbert_", "")]
    elif metric == "sembscore":
        return metrics["chexbert"]["sembscore"]
    # elif metric == "green":
    #     return green_df["green_score"].mean()
    elif metric == "ratescore":
        return metrics["ratescore"]
    else:
        raise ValueError(f"Unknown metric: {metric}")

In [4]:
USING_METRICS = ["bleu", "f1chexbert_macro_f1_14", "f1chexbert_micro_f1_14", "f1radgraph", "sembscore", "ratescore"]

In [5]:
model = "v3.6-CHEXGPT-RRIEF-lora-3422"

for dataset in ["mimic", "openi"]:
    metrics = json.load(open(f'/data1/workspace/bih1122/test_metrics/{model}/{dataset}_test_outputs_metrics.json'))
    for metric in USING_METRICS:
        value = get_metric(model, dataset, metric)
        print(f"{dataset} {metric}: {format_decimal(value)}")
    print("")

mimic bleu: .054
mimic f1chexbert_macro_f1_14: .298
mimic f1chexbert_micro_f1_14: .436
mimic f1radgraph: .166
mimic sembscore: .357
mimic ratescore: .490

openi bleu: .106
openi f1chexbert_macro_f1_14: .276
openi f1chexbert_micro_f1_14: .671
openi f1radgraph: .274
openi sembscore: .666
openi ratescore: .678



In [7]:
x = json.load(open("/BARO_Cluster/data/data/llava_input/v3.6/mimic+chexpert-train.json"))
y = json.load(open("/BARO_Cluster/data/data/llava_input/v3.6/mimic-test.json"))

In [8]:
x[0]

{'id': '02aa804e-bde0afdd-112c0b34-7bc16630-4e384014',
 'image': 'public/mimic/preprocessed/02aa804e-bde0afdd-112c0b34-7bc16630-4e384014.png',
 'conversations': [{'from': 'human',
   'value': 'Image: <image>\nView Position: PA\nPatient Age: 52\nPatient Gender: Female\nPatient Indication: F with new onset ascites // eval for infection\nDiagnostic Probabilites:\n- Atelectasis: 0.082\n- Consolidation: 0.009\n- Effusion: 0.041\n- Fracture: 0.870\n- Hyperinflation: 0.076\n- Lung Opacity: 0.220\n- Nodule: 0.107\n- Pleural Lesion: 0.046\n- Pneumothorax: 0.009\n- Pulmonary Edema: 0.007\n- Subcutaneous Emphysema: 0.005\n- Subdiaphragmatic Gas: 0.001\n- Widened Mediastinal Silhouette: 0.008\n- No Abnormality: 0.054'},
  {'from': 'gpt',
   'value': 'No focal consolidation is observed. No pleural effusion is observed. No pneumothorax is observed. Bilateral nodular opacities are present. The nodular opacities most likely represent nipple shadows. The cardiomediastinal silhouette is normal. Clips pr

In [9]:
y[0]

{'id': 'abea5eb9-b7c32823-3a14c5ca-77868030-69c83139',
 'image': 'public/mimic/preprocessed/abea5eb9-b7c32823-3a14c5ca-77868030-69c83139.png',
 'conversations': [{'from': 'human',
   'value': 'Image: <image>\nView Position: AP\nPatient Age: 68\nPatient Gender: Male\nPatient Indication: -year-old male with history of metastatic melanoma, presenting with confusion and somnolence. Evaluate for acute cardiopulmonary process.\nDiagnostic Probabilites:\n- Atelectasis: 0.522\n- Consolidation: 0.011\n- Effusion: 0.118\n- Fracture: 0.058\n- Hyperinflation: 0.003\n- Lung Opacity: 0.156\n- Nodule: 0.041\n- Pleural Lesion: 0.044\n- Pneumothorax: 0.004\n- Pulmonary Edema: 0.009\n- Subcutaneous Emphysema: 0.002\n- Subdiaphragmatic Gas: 0.002\n- Widened Mediastinal Silhouette: 0.408\n- No Abnormality: 0.199'},
  {'from': 'gpt',
   'value': 'Lateral view somewhat limited due to overlying motion artifact. The lungs are low in volume. There is no focal airspace consolidation to suggest pneumonia. A 1.2-