In [1]:
import pandas as pd
import numpy as np
import json
import re
import os
import sys
from sklearn.metrics import f1_score

In [2]:
sys.path.append('/data/dangnguyen/report_generation/report-generation/')

In [4]:
from CXRMetric.CheXbert.src.label import label

In [43]:
cxr_labels = [
        'Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema',
        'Enlarged Cardiomediastinum', 'Fracture', 'Lung Lesion', 'Lung Opacity',
        'No Finding', 'Pleural Effusion', 'Pleural Other', 'Pneumonia',
        'Pneumothorax', 'Support Devices']

cxr_labels_2 = ['Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity',\
'Lung Lesion', 'Edema', 'Consolidation', 'Pneumonia', 'Atelectasis',\
'Pneumothorax', 'Pleural Effusion', 'Pleural Other', 'Fracture', 'Support Devices', 'No Finding']

# converts a label vector to English
def labels_to_eng(labels):
    diag = ''
    for i in range(len(labels)):
        label = labels[i]
        cond = cxr_labels[i]
        if label == 1:
            diag += cond
            diag += ', '
    return diag

In [None]:
df_ind = pd.read_csv('/data/dangnguyen/report_generation/mimic_data/train_indications.csv')
df_ind = df_ind.rename(columns={'report': 'indication'})
df_ind

In [None]:
df_imp_chexb = pd.read_csv('/data/dangnguyen/report_generation/mimic_data/train_gt_imp_chexbert.csv')
df_imp_chexb = df_imp_chexb.rename(columns={'Report Impression': 'impression'})
df_imp_chexb

In [None]:
df_ind_imp = pd.concat([df_ind['indication'], df_imp_chexb], axis=1)
df_ind_imp

In [None]:
finetune_data = []
instruction = 'Write a radiology report responding to the indication. Include all given positive labels.'

for _, row in df_ind_imp[:100].iterrows():
    ind = row['indication']
    imp = row['impression']
    labels = labels_to_eng(row[cxr_labels])[:-2]
    
    if pd.isna(ind):
        ind = ''
    if pd.isna(imp):
        imp = ''
    
    inp = 'Indication: {}. Positive labels: {}'.format(ind, labels)
    
    sample = {
        'instruction': instruction,
        'input': inp,
        'output': imp
    }
    finetune_data.append(sample)

In [None]:
outpath = '/data/dangnguyen/report_generation/mimic_data/finetune_llm/finetune_imp.json'

with open(outpath, 'w') as json_file:
    json.dump(finetune_data, json_file)

In [None]:
# Cleaning radiology reports using decomposed rules

from transformers import T5Tokenizer, T5ForConditionalGeneration
import json
import gzip
import pandas as pd

In [None]:
test_file = "/data/dangnguyen/report_generation/mimic_data/mimic_train_impressions_sentence.csv"
# output_file = "generated_sentence_t5_xxl.jsonl"
output_file = "remove1_t5_xxl.jsonl"

data = pd.read_csv(test_file, nrows=100)
input_list = list(data["report"])

In [None]:
instruct_path = '/data/dangnguyen/report_generation/XrayGPT/prompts/mimic/report_clean_rules/remove1_instructions.txt'
examples_path = '/data/dangnguyen/report_generation/XrayGPT/prompts/mimic/report_clean_rules/remove1_sen_fewshot.txt'

instructions = open(instruct_path).read()
examples = open(examples_path).read()

In [None]:
model_name = "google/flan-t5-XXL"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name, device_map="auto")

In [None]:
with open(output_file, "w") as fout:
    for input_sent in input_list:
        print(input_sent)
        input_text = instructions.format(EXAMPLES=examples, INPUT_QUERY=input_sent)
        input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
        
        outputs = model.generate(input_ids, max_length=200, bos_token_id=0)
        result = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        fout.write(json.dumps({"input": input_sent, "output": result}) + "\n")

In [31]:
# Evaluating cleaned reports

# Computes the exact match accuracy of the generated reports
def exact_match(gt, pred):
    matches = []
    for (gt_rp, pred_rp) in zip(gt, pred):
        gt_words = re.sub(r"[^\w\s]", "", gt_rp).lower().split() # removes special characters
        pred_words = re.sub(r"[^\w\s]", "", pred_rp).lower().split()

        match = True
        if len(gt_words) == len(pred_words):
            for (gt_word, pred_word) in zip(gt_words, pred_words):
                if gt_word != pred_word:
                    match = False
        else:
            match = False
        matches.append(match)
        
    matches = np.array(matches)
    em_acc = np.sum(matches) / len(matches)
    return em_acc, matches

# Computes the positive and negative F1
def compute_f1(df_gt, df_pred, gt_path, pred_path):
    CHEXBERT_PATH = '/data/dangnguyen/report_generation/models/chexbert.pth'
    
    # need to make sure df_gt and df_pred has a column called "report"
    gt_pre_chexb = gt_path[:-4] + '_pre-chexbert.csv'
    df_gt.to_csv(gt_pre_chexb, index=False)

    y_gt = label(CHEXBERT_PATH, gt_pre_chexb)
    y_gt = np.array(y_gt).T

    # Note on labels:
    # 0: unmentioned ; 1: positive ; 2: negative ; 3: uncertain
    
    y_gt_neg = y_gt.copy()
    y_gt_neg[(y_gt_neg == 1) | (y_gt_neg == 3)] = 0
    y_gt_neg[y_gt_neg == 2] = 1
    
    y_gt[(y_gt == 2) | (y_gt == 3)] = 0

    pred_pre_chexb = pred_path[:-4] + '_pre-chexbert.csv'
    df_pred.to_csv(pred_pre_chexb, index=False)

    # the labels are according to the 2nd ordering (see run_eval.py)
    y_pred = label(CHEXBERT_PATH, pred_pre_chexb)
    y_pred = np.array(y_pred).T

    y_pred_neg = y_pred.copy()
    y_pred_neg[(y_pred_neg == 1) | (y_pred_neg == 3)] = 0
    y_pred_neg[y_pred_neg == 2] = 1
    
    y_pred[(y_pred == 2) | (y_pred == 3)] = 0
    
    assert y_gt.shape == y_pred.shape

    os.system('rm {}'.format(gt_pre_chexb))
    os.system('rm {}'.format(pred_pre_chexb))

    pos_f1 = f1_score(y_gt, y_pred, average='macro')
    neg_f1 = f1_score(y_gt_neg, y_pred_neg, average='macro')
    prag_f1 = np.mean([pos_f1, neg_f1])
    
    # also returning the labels matrices for debugging
    return pos_f1, neg_f1, prag_f1, y_gt_neg, y_gt, y_pred_neg, y_pred

In [8]:
clean_rp = pd.read_csv('/data/dangnguyen/report_generation/mimic_data/finetune_llm/test_cleaned.csv').fillna('')
clean_rp = clean_rp.replace('REMOVED', '')
clean_rp

Unnamed: 0,report,cleaned,llm_rewritten
0,Compared to chest radiographs ___ through ___.,,Compared to chest radiographs ___ through ___.
1,"Compared to prior chest radiographs since ___,...",,"Compared to prior chest radiographs since ___,..."
2,"In comparison with the study of ___, the incre...",No opacification at the right base.,The increased opacification at the right base ...
3,In comparison to previous radiograph of 1 day ...,Support and monitoring devices are in position.,Support and monitoring devices are in position.
4,"Compared to the most recent study, there is im...",There is mild pulmonary edema and small left p...,Mild pulmonary edema and small left pleural ef...
...,...,...,...
95,There is prominence indistinctness of the cent...,There is prominence indistinctness of the cent...,There is prominence indistinctness of the cent...
96,The cardiac silhouette is not well assessed bu...,The cardiac silhouette is not well assessed bu...,The cardiac silhouette is not well assessed bu...
97,"Bibasilar atelectasis, though infection or asp...","Bibasilar atelectasis, though infection or asp...","Bibasilar atelectasis, though infection or asp..."
98,Low lung volumes and mild bibasilar atelectasis.,Low lung volumes and mild bibasilar atelectasis.,Low lung volumes and mild bibasilar atelectasis.


In [None]:
# llm_rewritten = clean_rp['llm_rewritten']
# ['' if 'REMOVED' in rp else rp for rp in llm_rewritten]

In [None]:
clean_rp[0:20]

In [None]:
row = clean_rp.loc[51]
print('{}\n\n{}\n\n{}'.format(row.original, row.cleaned, row.llm_rewritten))

In [9]:
# getting results from CXR-PRO
train_imp_sen = pd.read_csv('/data/dangnguyen/report_generation/mimic_data/mimic_train_impressions_sentence.csv')
train_imp_sen_uniq = train_imp_sen[['study_id','sentence_id','report']].drop_duplicates()

In [10]:
clean_rp_merge = clean_rp.merge(train_imp_sen_uniq, on='report')
clean_rp_merge

Unnamed: 0,report,cleaned,llm_rewritten,study_id,sentence_id
0,Compared to chest radiographs ___ through ___.,,Compared to chest radiographs ___ through ___.,55350604,0
1,Compared to chest radiographs ___ through ___.,,Compared to chest radiographs ___ through ___.,57552246,0
2,Compared to chest radiographs ___ through ___.,,Compared to chest radiographs ___ through ___.,50285724,0
3,Compared to chest radiographs ___ through ___.,,Compared to chest radiographs ___ through ___.,58718226,0
4,Compared to chest radiographs ___ through ___.,,Compared to chest radiographs ___ through ___.,50113144,0
...,...,...,...,...,...
44165,Low lung volumes and mild bibasilar atelectasis.,Low lung volumes and mild bibasilar atelectasis.,Low lung volumes and mild bibasilar atelectasis.,53929430,0
44166,Low lung volumes and mild bibasilar atelectasis.,Low lung volumes and mild bibasilar atelectasis.,Low lung volumes and mild bibasilar atelectasis.,53170506,1
44167,Low lung volumes and mild bibasilar atelectasis.,Low lung volumes and mild bibasilar atelectasis.,Low lung volumes and mild bibasilar atelectasis.,51580320,0
44168,Low lung volumes and mild bibasilar atelectasis.,Low lung volumes and mild bibasilar atelectasis.,Low lung volumes and mild bibasilar atelectasis.,51076093,0


In [11]:
clean_rp_merge = clean_rp_merge.groupby('report').head(1)
clean_rp_merge

Unnamed: 0,report,cleaned,llm_rewritten,study_id,sentence_id
0,Compared to chest radiographs ___ through ___.,,Compared to chest radiographs ___ through ___.,55350604,0
992,"Compared to prior chest radiographs since ___,...",,"Compared to prior chest radiographs since ___,...",50712381,0
1009,"In comparison with the study of ___, the incre...",No opacification at the right base.,The increased opacification at the right base ...,50558581,0
1010,In comparison to previous radiograph of 1 day ...,Support and monitoring devices are in position.,Support and monitoring devices are in position.,59305618,0
1012,"Compared to the most recent study, there is im...",There is mild pulmonary edema and small left p...,Mild pulmonary edema and small left pleural ef...,56836542,0
...,...,...,...,...,...
44156,There is prominence indistinctness of the cent...,There is prominence indistinctness of the cent...,There is prominence indistinctness of the cent...,50700834,2
44157,The cardiac silhouette is not well assessed bu...,The cardiac silhouette is not well assessed bu...,The cardiac silhouette is not well assessed bu...,50700834,3
44158,"Bibasilar atelectasis, though infection or asp...","Bibasilar atelectasis, though infection or asp...","Bibasilar atelectasis, though infection or asp...",56553359,2
44159,Low lung volumes and mild bibasilar atelectasis.,Low lung volumes and mild bibasilar atelectasis.,Low lung volumes and mild bibasilar atelectasis.,53473341,0


In [12]:
cxr_pro = pd.read_csv('/data/mimic_data/cxr-pro/mimic_train_impressions.csv')[['study_id','report']].drop_duplicates()
cxr_pro

Unnamed: 0,study_id,report
0,50414267,No acute cardiopulmonary process.
2,53189527,No acute cardiopulmonary abnormality.
4,53911762,No acute intrathoracic process.
6,56699142,No acute cardiopulmonary process.
7,57375967,"Focal consolidation at the left lung base, pos..."
...,...,...
371944,58708861,
371945,57132437,No acute cardiothoracic process.
371948,55368167,
371949,58621812,Appropriately positioned et and ng tubes. Biba...


In [13]:
cxr_pro = cxr_pro.set_index('study_id')

In [14]:
test_cxr_pro = cxr_pro.loc[clean_rp_merge['study_id']].reset_index()
cxr_pro = cxr_pro.reset_index()

test_cxr_pro

Unnamed: 0,study_id,report
0,55350604,##graphs _ _ _ through _ _ _. Heart size top -...
1,50712381,"_ _, most _ _ _ and _ _ _. Mild pulmonary edem..."
2,50558581,Opacification at the right base cleared. Mild ...
3,59305618,", support and monitoring devices position. Pul..."
4,56836542,The mild pulmonary edema the small left pleura...
...,...,...
95,50700834,The lateral view is suboptimal due to patient ...
96,50700834,The lateral view is suboptimal due to patient ...
97,56553359,Small to moderate right pleural effusion incre...
98,53473341,Low lung volumes and mild bibasilar atelectasis.


In [15]:
cxr_pro_sen = []

for _, row in test_cxr_pro.iterrows():
    study_id = row['study_id']
    report = row['report'].lower().replace('dr.', 'dr').replace('a.m.', 'am').replace('p.m.', 'pm')
    
    sentences = report.split('.')
    sentences = [sen for sen in sentences if len(sen) > 2] + [''] # add dummy sentences to 
    # avoid KeyError because cxr-pro removed some sentences

    for i in range(len(sentences)):
        item = {
            'study_id': study_id,
            'sentence_id': i,
            'report': sentences[i]
        }
        cxr_pro_sen.append(item)

In [16]:
df_pro_sen = pd.DataFrame(cxr_pro_sen)
df_pro_sen

Unnamed: 0,study_id,sentence_id,report
0,55350604,0,##graphs _ _ _ through _ _ _
1,55350604,1,heart size top - normal
2,55350604,2,lungs grossly clear
3,55350604,3,no pleural abnormality or evidence of central...
4,55350604,4,
...,...,...,...
495,55360873,0,", the endotracheal tube removed"
496,55360873,1,there is extensive pulmonary scarring with br...
497,55360873,2,lucency along the right lateral chest wall wi...
498,55360873,3,opacification at the bases could reflect bila...


In [17]:
df_pro_sen = df_pro_sen.set_index(['study_id','sentence_id'])
df_pro_sen

Unnamed: 0_level_0,Unnamed: 1_level_0,report
study_id,sentence_id,Unnamed: 2_level_1
55350604,0,##graphs _ _ _ through _ _ _
55350604,1,heart size top - normal
55350604,2,lungs grossly clear
55350604,3,no pleural abnormality or evidence of central...
55350604,4,
...,...,...
55360873,0,", the endotracheal tube removed"
55360873,1,there is extensive pulmonary scarring with br...
55360873,2,lucency along the right lateral chest wall wi...
55360873,3,opacification at the bases could reflect bila...


In [18]:
# complex query: querying using study_id and sentence_id, which uniquely identifies a sentence
df_pro_sen = df_pro_sen.loc[list(zip(clean_rp_merge['study_id'], clean_rp_merge['sentence_id']))].reset_index().drop_duplicates()
df_pro_sen

Unnamed: 0,study_id,sentence_id,report
0,55350604,0,##graphs _ _ _ through _ _ _
1,50712381,0,"_ _, most _ _ _ and _ _ _"
3,50558581,0,opacification at the right base cleared
4,59305618,0,", support and monitoring devices position"
5,56836542,0,the mild pulmonary edema the small left pleura...
...,...,...,...
105,50700834,2,there is prominence indistinctness of the cen...
107,50700834,3,the cardiac silhouette is not assessed but ap...
109,56553359,2,"bibasilar atelectasis, though infection or as..."
110,53473341,0,low lung volumes and mild bibasilar atelectasis


In [None]:
# df_pro_sen.to_csv('/data/dangnguyen/report_generation/mimic_data/finetune_llm/cxr_pro_sen.csv', index=False)

In [None]:
df_pro_sen = pd.read_csv('/data/dangnguyen/report_generation/mimic_data/finetune_llm/cxr_pro_sen.csv')

In [19]:
clean_rp['cxr_pro'] = list(df_pro_sen['report'])
clean_rp

Unnamed: 0,report,cleaned,llm_rewritten,cxr_pro
0,Compared to chest radiographs ___ through ___.,,Compared to chest radiographs ___ through ___.,##graphs _ _ _ through _ _ _
1,"Compared to prior chest radiographs since ___,...",,"Compared to prior chest radiographs since ___,...","_ _, most _ _ _ and _ _ _"
2,"In comparison with the study of ___, the incre...",No opacification at the right base.,The increased opacification at the right base ...,opacification at the right base cleared
3,In comparison to previous radiograph of 1 day ...,Support and monitoring devices are in position.,Support and monitoring devices are in position.,", support and monitoring devices position"
4,"Compared to the most recent study, there is im...",There is mild pulmonary edema and small left p...,Mild pulmonary edema and small left pleural ef...,the mild pulmonary edema the small left pleura...
...,...,...,...,...
95,There is prominence indistinctness of the cent...,There is prominence indistinctness of the cent...,There is prominence indistinctness of the cent...,there is prominence indistinctness of the cen...
96,The cardiac silhouette is not well assessed bu...,The cardiac silhouette is not well assessed bu...,The cardiac silhouette is not well assessed bu...,the cardiac silhouette is not assessed but ap...
97,"Bibasilar atelectasis, though infection or asp...","Bibasilar atelectasis, though infection or asp...","Bibasilar atelectasis, though infection or asp...","bibasilar atelectasis, though infection or as..."
98,Low lung volumes and mild bibasilar atelectasis.,Low lung volumes and mild bibasilar atelectasis.,Low lung volumes and mild bibasilar atelectasis.,low lung volumes and mild bibasilar atelectasis


In [20]:
gt_clean = list(clean_rp['cleaned'])
llm_clean = list(clean_rp['llm_rewritten'])
pro_clean = list(clean_rp['cxr_pro'])

In [21]:
acc, matches = exact_match(gt_clean, llm_clean)
acc

0.66

In [22]:
pro_acc, pro_matches = exact_match(gt_clean, pro_clean)
pro_acc

0.31

In [None]:
pro_mismatch = clean_rp.loc[~pro_matches]

In [None]:
pro_mismatch[60:80]

In [23]:
# calculating macro F1
gt_path = '/data/dangnguyen/report_generation/mimic_data/finetune_llm/gt_reports.csv'
llm_path = '/data/dangnguyen/report_generation/mimic_data/finetune_llm/llm_reports.csv'
pro_path = '/data/dangnguyen/report_generation/mimic_data/finetune_llm/pro_reports.csv'

In [24]:
clean_rp = clean_rp.replace('', '_')
clean_rp

Unnamed: 0,report,cleaned,llm_rewritten,cxr_pro
0,Compared to chest radiographs ___ through ___.,_,Compared to chest radiographs ___ through ___.,##graphs _ _ _ through _ _ _
1,"Compared to prior chest radiographs since ___,...",_,"Compared to prior chest radiographs since ___,...","_ _, most _ _ _ and _ _ _"
2,"In comparison with the study of ___, the incre...",No opacification at the right base.,The increased opacification at the right base ...,opacification at the right base cleared
3,In comparison to previous radiograph of 1 day ...,Support and monitoring devices are in position.,Support and monitoring devices are in position.,", support and monitoring devices position"
4,"Compared to the most recent study, there is im...",There is mild pulmonary edema and small left p...,Mild pulmonary edema and small left pleural ef...,the mild pulmonary edema the small left pleura...
...,...,...,...,...
95,There is prominence indistinctness of the cent...,There is prominence indistinctness of the cent...,There is prominence indistinctness of the cent...,there is prominence indistinctness of the cen...
96,The cardiac silhouette is not well assessed bu...,The cardiac silhouette is not well assessed bu...,The cardiac silhouette is not well assessed bu...,the cardiac silhouette is not assessed but ap...
97,"Bibasilar atelectasis, though infection or asp...","Bibasilar atelectasis, though infection or asp...","Bibasilar atelectasis, though infection or asp...","bibasilar atelectasis, though infection or as..."
98,Low lung volumes and mild bibasilar atelectasis.,Low lung volumes and mild bibasilar atelectasis.,Low lung volumes and mild bibasilar atelectasis.,low lung volumes and mild bibasilar atelectasis


In [32]:
# flan-t5 results
pos_f1, neg_f1, prag_f1, gt_neg, gt, pred_neg, pred = compute_f1(clean_rp[['cleaned']].rename(columns={'cleaned':'report'}), 
                                                                 clean_rp[['llm_rewritten']].rename(columns={'llm_rewritten':'report'}), 
                                                                 gt_path, llm_path)

100%|██████████| 100/100 [00:00<00:00, 2109.33it/s]


Tokenizing report impressions. All reports are cut off at 512 tokens.





Using 4 GPUs!


  0%|          | 0/6 [00:00<?, ?it/s]


Begin report impression labeling. The progress bar counts the # of batches completed:
The batch size is 18


100%|██████████| 6/6 [00:00<00:00,  7.14it/s]
100%|██████████| 100/100 [00:00<00:00, 3010.12it/s]


Tokenizing report impressions. All reports are cut off at 512 tokens.





Using 4 GPUs!


  0%|          | 0/6 [00:00<?, ?it/s]


Begin report impression labeling. The progress bar counts the # of batches completed:
The batch size is 18


100%|██████████| 6/6 [00:00<00:00,  7.29it/s]


In [36]:
print('{}\n{}\n{}'.format(pos_f1, neg_f1, prag_f1))

0.8969386805699598
0.3587301587301587
0.6278344196500593


In [39]:
# looking into failure cases
pos_diff = np.logical_xor(gt, pred)
pos_diff_agg = np.any(pos_diff, axis=1)

llm_pos_diff = clean_rp.loc[pos_diff_agg].reset_index(drop=True)
llm_pos_diff

Unnamed: 0,report,cleaned,llm_rewritten,cxr_pro
0,"New left upper lobe opacity, probably due to a...","Left upper lobe opacity, probably due to atele...","Left upper lobe opacity, probably due to atele...","left upper lobe opacity, probably due to atele..."
1,There is no change in multifocal especially pe...,There is multifocal especially perihilar opaci...,There is interval development of left lower lo...,multifocal especially perihilar opacities lef...
2,What was probably concurrent pulmonary edema i...,Concurrent pulmonary edema in the left lung.,Pulmonary edema in the left lung is negative.,": the widespread, asymmetric pulmonary opacif..."
3,Improvement of the ventilation at the right ba...,Ventilation at the right base with pleural eff...,Improvement of the ventilation at the right ba...,the ventilation at the right base with reduced...
4,AP chest compared to ___ through ___: Left pl...,Left pleural effusion is small.,Left pleural effusion is small and has not rec...,ap chest through _ _ _ : left pleural effusion...
5,Interval resolution of interstitial edema.,No interstitial edema.,Interstitial edema.,of interstitial edema
6,Previous borderline cardiomegaly and pulmonary...,No cardiomegaly and pulmonary vascular congest...,_,cardiomegaly and pulmonary vascular congestion...
7,Previously identified left apical pneumothorax...,No left apical pneumothorax with small amount ...,Left apical pneumothorax now appears to have r...,identified left apical pneumothorax with smal...
8,"Since ___, large regions of heterogeneous opac...",No large regions of heterogeneous opacificatio...,Large regions of heterogeneous opacification a...,_ large regions of heterogeneous opacificatio...


In [58]:
idx = 8
print('GT: {}\n\npred: {}'.format(llm_pos_diff.loc[idx].cleaned, llm_pos_diff.loc[idx].llm_rewritten))

GT: No large regions of heterogeneous opacification in the lingula and lower lobe.

pred: Large regions of heterogeneous opacification are present in the lingula and lower lobe.


In [49]:
pd.DataFrame(pos_diff[pos_diff_agg], columns=cxr_labels_2)

Unnamed: 0,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices,No Finding
0,False,False,False,False,False,False,False,True,False,False,False,False,False,False
1,False,False,True,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,True,False,False,False,False,False,False,False,False,True
3,False,False,False,False,False,False,False,False,False,False,False,False,True,False
4,False,False,False,False,False,False,False,False,False,False,False,False,True,False
5,False,False,False,False,True,False,False,False,False,False,False,False,False,True
6,False,False,False,False,False,False,False,False,False,False,False,False,False,True
7,False,False,False,False,False,False,False,False,False,True,False,False,False,True
8,False,False,True,False,False,False,False,False,False,False,False,False,False,True


In [60]:
neg_diff = np.logical_xor(gt_neg, pred_neg)
neg_diff_agg = np.any(neg_diff, axis=1)

llm_neg_diff = clean_rp.loc[neg_diff_agg].reset_index(drop=True)
llm_neg_diff

Unnamed: 0,report,cleaned,llm_rewritten,cxr_pro
0,"In comparison with the study of ___, the incre...",No opacification at the right base.,The increased opacification at the right base ...,opacification at the right base cleared
1,What was probably concurrent pulmonary edema i...,Concurrent pulmonary edema in the left lung.,Pulmonary edema in the left lung is negative.,": the widespread, asymmetric pulmonary opacif..."
2,Interval resolution of interstitial edema.,No interstitial edema.,Interstitial edema.,of interstitial edema
3,Resolution of left perihilar pneumonia.,No left perihilar pneumonia.,_,resolution of left perihilar pneumonia
4,Previous borderline cardiomegaly and pulmonary...,No cardiomegaly and pulmonary vascular congest...,_,cardiomegaly and pulmonary vascular congestion...
5,Resolved pulmonary edema.,No pulmonary edema.,_,pulmonary edema
6,"In comparison with the study of ___, the bilat...",No bilateral pulmonary opacification.,The bilateral pulmonary opacification is negat...,the bilateral pulmonary opacification is cleared
7,"Since ___, large regions of heterogeneous opac...",No large regions of heterogeneous opacificatio...,Large regions of heterogeneous opacification a...,_ large regions of heterogeneous opacificatio...
8,Lungs are essentially clear with resolution of...,Lungs are essentially clear with no right pleu...,Lungs are essentially clear.,lungs right pleural effusion and right basal ...


In [67]:
idx = 8
print('GT: {}\n\npred: {}'.format(llm_neg_diff.loc[idx].cleaned, llm_neg_diff.loc[idx].llm_rewritten))

GT: Lungs are essentially clear with no right pleural effusion and right basal atelectasis.

pred: Lungs are essentially clear.


In [61]:
pd.DataFrame(neg_diff[neg_diff_agg], columns=cxr_labels_2)

Unnamed: 0,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices,No Finding
0,False,False,True,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,True,False,False,False,False,False,False,False,False,False
2,False,False,False,False,True,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,True,False,False,False,False,False,False,False
4,False,True,False,False,True,False,False,False,False,False,False,False,False,False
5,False,False,False,False,True,False,False,False,False,False,False,False,False,False
6,False,False,True,False,False,False,False,False,False,False,False,False,False,False
7,False,False,True,False,False,False,False,False,False,False,False,False,False,False
8,False,False,False,False,False,False,False,True,False,True,False,False,False,False


In [68]:
# cxr-pro results
pos_f1, neg_f1, prag_f1, gt_neg, gt, pred_neg, pred = compute_f1(clean_rp[['cleaned']].rename(columns={'cleaned':'report'}), 
                                                                 clean_rp[['cxr_pro']].rename(columns={'cxr_pro':'report'}), 
                                                                 gt_path, pro_path)

100%|██████████| 100/100 [00:00<00:00, 3188.61it/s]


Tokenizing report impressions. All reports are cut off at 512 tokens.





Using 4 GPUs!


  0%|          | 0/6 [00:00<?, ?it/s]


Begin report impression labeling. The progress bar counts the # of batches completed:
The batch size is 18


100%|██████████| 6/6 [00:00<00:00,  6.07it/s]
100%|██████████| 100/100 [00:00<00:00, 2722.20it/s]


Tokenizing report impressions. All reports are cut off at 512 tokens.





Using 4 GPUs!


  0%|          | 0/6 [00:00<?, ?it/s]


Begin report impression labeling. The progress bar counts the # of batches completed:
The batch size is 18


100%|██████████| 6/6 [00:00<00:00,  7.45it/s]
  average, "true nor predicted", 'F-score is', len(true_sum)


In [69]:
print('{}\n{}\n{}'.format(pos_f1, neg_f1, prag_f1))

0.7118988092342827
0.27936507936507937
0.49563194429968105
