# Import libraries

In [1]:
import os
import re
import copy

import pandas as pd
import numpy as np
from sklearn import metrics

# Load and align all dataframes

In [2]:
chexpert_categories = ["No Finding", "Enlarged Cardiomediastinum", "Cardiomegaly",
                      "Lung Lesion", "Lung Opacity", "Edema", "Consolidation",
                      "Pneumonia", "Atelectasis", "Pneumothorax", "Pleural Effusion",
                      "Pleural Other", "Fracture", "Support Devices"]

# reports
val = pd.read_csv('../data_msc_project/cheXpert/input_chexpert.csv', header=None)
val.columns = ['id', 'text']

# chexbert
df_chexbert = pd.read_csv('../data_msc_project/cheXbert/chexbert_labeled_1.csv')
df_chexbert = df_chexbert.merge(
    val, how='inner', left_on='Report Impression', right_on='text'
)
df_chexbert.drop_duplicates(inplace=True)
# Drop the 's' in the beginning of the id
df_chexbert['id'] = df_chexbert['id'].astype(str).str.lstrip('s')
df_chexbert.set_index('id', inplace=True)
df_chexbert.rename(columns={'Airspace Opacity': 'Lung Opacity'}, inplace=True)
df_chexbert = df_chexbert[chexpert_categories]

# chexpert
df_chexpert = pd.read_csv('../data_msc_project/cheXpert/chexpert_labeled_1.csv')
df_chexpert = df_chexpert.merge(
    val, how='inner', left_on='Reports', right_on='text'
)
df_chexpert.drop_duplicates(inplace=True)
# Drop the 's' in the beginning of the id
df_chexpert['id'] = df_chexpert['id'].astype(str).str.lstrip('s')
df_chexpert.set_index('id', inplace=True)
df_chexpert.rename(columns={'Airspace Opacity': 'Lung Opacity'}, inplace=True)
df_chexpert = df_chexpert[chexpert_categories]

# visualchexbert
df_visualchexbert = pd.read_csv('../data_msc_project/VisualCheXbert/visualchexbert_labeled_1.csv')
df_visualchexbert = df_visualchexbert.merge(
    val, how='inner', left_on='Report Impression', right_on='text'
)
df_visualchexbert.drop_duplicates(inplace=True)
# Drop the 's' in the beginning of the id
df_visualchexbert['id'] = df_visualchexbert['id'].astype(str).str.lstrip('s')
df_visualchexbert.set_index('id', inplace=True)
df_visualchexbert.rename(columns={'Airspace Opacity': 'Lung Opacity'}, inplace=True)
df_visualchexbert = df_visualchexbert[chexpert_categories]

# ground truth
gs = pd.read_csv('../data_msc_project/physionet.org/files/mimic-cxr-jpg/2.1.0/mimic-cxr-2.1.0-test-set-labeled.csv', header=0, index_col=0)
gs.index.name = 'id'
gs.rename(columns={'Airspace Opacity': 'Lung Opacity'}, inplace=True)
gs = gs[chexpert_categories]

print(gs.shape)

# ensure all dataframes are aligned
gs.sort_index(inplace=True)
gs.index = gs.index.astype(str)
common_indices = gs.index.intersection(df_chexpert.index)
gs = gs.loc[common_indices]
df_chexpert = df_chexpert.loc[common_indices]
df_chexbert = df_chexbert.loc[common_indices]
df_visualchexbert = df_visualchexbert.loc[common_indices]
gs.head()

(687, 14)


Unnamed: 0_level_0,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Lesion,Lung Opacity,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
50008596,,,,,,,,,1.0,,1.0,1.0,,
50020371,1.0,,,,,,,,,,,,,
50022945,,1.0,,,,0.0,,,1.0,0.0,1.0,,,
50031776,,,,,1.0,,1.0,-1.0,-1.0,,,,,
50037292,,,,,,-1.0,1.0,1.0,,,,,,


# Evaluation functions

* Helper function to evaluate outputs in categories: (1) mentions, (2) uncertainty, (3) absence, and (4) presence.

In [3]:
def evaluate_label(tar, pred, ignore_nan=False):
    """
    Return precision, recall, f1, and prevalence for a single label.
    """
    
    if ignore_nan:
        idx = ~(np.isnan(tar) | np.isnan(pred))
        pred = pred[idx]
        tar = tar[idx]
    
    results = {
        'precision': np.nan,
        'recall': np.nan,
        'f1': np.nan,
        'positives': int(tar.sum())
    }
    
    if results['positives'] == 0:
        # return NaN if no positive labels
        return results
    
    results['precision'] = metrics.precision_score(tar, pred, zero_division=0)
    results['recall'] = metrics.recall_score(tar, pred)
    if results['precision'] + results['recall'] == 0:
        results['f1'] = 0.0
    else:
        results['f1'] = 2 * (results['precision'] * results['recall']) / (results['precision'] + results['recall'])
    
    return results
    

def get_scores(target, prediction, categories, ignore_nan=False):
    
    
    results = {}
    for i, c in enumerate(categories):
        results[c] = evaluate_label(target[:, i], prediction[:, i])
    
    # convert to dataframe
    df = pd.DataFrame.from_dict(results, orient='index')
    
    return df

def evaluate_labels(df_truth, df_label, method='mention'):
    categories = list(df_truth.columns)
    
    # create the matrix of 0s and 1s
    preds = copy.copy(df_label.values)
    targets = copy.copy(df_truth.values)
    
    if method == 'mention':
        # any mention is a 1
        preds[np.isin(preds, [-1, 0, 1])] = 1
        targets[np.isin(targets, [-1, 0, 1])] = 1

        # no mention is a 0
        preds[np.isnan(preds)] = 0
        targets[np.isnan(targets)] = 0
        
        # do not ignore NaN (which we have set to 0 anyway)
        ignore_nan=False
    elif method == 'absence':
        # successful prediction of absence
        idxNonZero = preds != 0
        idxZero = preds == 0
        preds[idxNonZero] = 0
        preds[idxZero] = 1
        
        idxNonZero = targets != 0
        idxZero = targets == 0
        targets[idxNonZero] = 0
        targets[idxZero] = 1
        
        # ignore NaN values
        ignore_nan=True
    elif method == 'presence':
        # successful prediction of presence
        idxZero = preds != 1
        idxNonZero = preds == 1
        preds[idxZero] = 0
        preds[idxNonZero] = 1

        idxZero = targets != 1
        idxNonZero = targets == 1
        targets[idxZero] = 0
        targets[idxNonZero] = 1

        # ignore NaN values
        ignore_nan=True
    elif method == 'uncertain':
        # any non-uncertain prediction is 0
        preds[preds!= -1] = 0
        targets[targets != -1] = 0
        
        # any uncertain prediction is 1
        preds[preds == -1] = 1
        targets[targets == -1] = 1
        
        # ignore NaN
        ignore_nan=True
    else:
        raise ValueError(f'Unrecognized method {method}')
        
    df = get_scores(targets, preds, categories, ignore_nan=ignore_nan)
    
    return df

# Mention 
* If the clinical finding has been mentioned in the radiology report, i.e. [1:Positive or 0:Negative or -1:Uncertain]

In [4]:
df = evaluate_labels(gs, df_chexbert, method='mention')
df.columns = pd.MultiIndex.from_tuples([('CheXbert', c) for c in df.columns])

cx = evaluate_labels(gs, df_chexpert, method='mention')
cx.columns = pd.MultiIndex.from_tuples([('CheXpert', c) for c in cx.columns])

df = df.merge(cx, how='inner', left_index=True, right_index=True)
# df.columns.swaplabel(0, 1, axis=1, inplace=True)
df.columns = df.columns.reorder_levels([1, 0])

# re-order columns
df = df[['precision', 'recall', 'f1', 'positives']]

# round values
for c in df.columns:
    if 'float' in str(df.dtypes[c]):
        df[c] = np.round(df[c], 3)

# drop the unecessary final column
# df.drop(('positives', 'CheXpert'), axis=1, inplace=True)
# df.drop(('positives', 'CheXbert'), axis=1, inplace=True)

# output to latex
df.index.name = 'Mention'

# df.to_latex('mention.tex')

df

Unnamed: 0_level_0,precision,precision,recall,recall,f1,f1,positives,positives
Unnamed: 0_level_1,CheXbert,CheXpert,CheXbert,CheXpert,CheXbert,CheXpert,CheXbert,CheXpert
Mention,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
No Finding,0.391,0.397,0.833,0.833,0.532,0.538,30,30
Enlarged Cardiomediastinum,0.928,0.761,0.904,0.728,0.916,0.744,114,114
Cardiomegaly,0.982,0.985,0.989,0.921,0.986,0.952,279,279
Lung Lesion,0.984,0.862,0.955,0.848,0.969,0.855,66,66
Lung Opacity,0.947,0.714,0.932,0.911,0.94,0.801,192,192
Edema,0.996,0.996,0.996,0.993,0.996,0.995,283,283
Consolidation,1.0,1.0,1.0,0.981,1.0,0.99,106,106
Pneumonia,0.991,0.97,0.996,0.987,0.994,0.979,233,233
Atelectasis,1.0,0.996,0.996,0.996,0.998,0.996,242,242
Pneumothorax,1.0,1.0,0.987,0.983,0.994,0.992,238,238


# Uncertain

* If the clinical finding is uncertain, i.e. [-1:Uncertain]

In [5]:
df = evaluate_labels(gs, df_chexbert, method='uncertain')
df.columns = pd.MultiIndex.from_tuples([('CheXbert', c) for c in df.columns])

cx = evaluate_labels(gs, df_chexpert, method='uncertain')
cx.columns = pd.MultiIndex.from_tuples([('CheXpert', c) for c in cx.columns])

df = df.merge(cx, how='inner', left_index=True, right_index=True)
# df.columns.swaplabel(0, 1, axis=1, inplace=True)
df.columns = df.columns.reorder_levels([1, 0])

# re-order columns
df = df[['precision', 'recall', 'f1', 'positives']]

# round values
for c in df.columns:
    if 'float' in str(df.dtypes[c]):
        df[c] = np.round(df[c], 3)

# drop the unecessary final column
# df.drop(('positives', 'CheXpert'), axis=1, inplace=True)
# df.drop(('positives', 'CheXbert'), axis=1, inplace=True)

# output to latex
df.index.name = 'Uncertainty'

# df.to_latex('uncertainty.tex')

df

Unnamed: 0_level_0,precision,precision,recall,recall,f1,f1,positives,positives
Unnamed: 0_level_1,CheXbert,CheXpert,CheXbert,CheXpert,CheXbert,CheXpert,CheXbert,CheXpert
Uncertainty,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
No Finding,,,,,,,0,0
Enlarged Cardiomediastinum,0.897,0.815,0.921,0.579,0.909,0.677,38,38
Cardiomegaly,0.923,0.6,0.923,0.231,0.923,0.333,39,39
Lung Lesion,1.0,0.2,0.778,0.111,0.875,0.143,9,9
Lung Opacity,,,,,,,0,0
Edema,1.0,0.688,0.976,0.805,0.988,0.742,41,41
Consolidation,1.0,0.818,0.962,0.346,0.98,0.486,26,26
Pneumonia,0.934,0.674,0.966,0.659,0.95,0.667,88,88
Atelectasis,0.95,0.865,0.95,0.8,0.95,0.831,40,40
Pneumothorax,0.9,0.5,0.9,0.2,0.9,0.286,10,10


# Absence

* If the clinical finding is absent in the radiology report, i.e. [0:Negative]

In [6]:
df = evaluate_labels(gs, df_chexbert, method='absence')
df.columns = pd.MultiIndex.from_tuples([('CheXbert', c) for c in df.columns])

cx = evaluate_labels(gs, df_chexpert, method='absence')
cx.columns = pd.MultiIndex.from_tuples([('CheXpert', c) for c in cx.columns])

df = df.merge(cx, how='inner', left_index=True, right_index=True)
# df.columns.swaplabel(0, 1, axis=1, inplace=True)
df.columns = df.columns.reorder_levels([1, 0])

# re-order columns
df = df[['precision', 'recall', 'f1', 'positives']]

# round values
for c in df.columns:
    if 'float' in str(df.dtypes[c]):
        df[c] = np.round(df[c], 3)

# drop the unecessary final column (redundant)
# df.drop(('positives', 'CheXpert'), axis=1, inplace=True)
# df.drop(('positives', 'CheXbert'), axis=1, inplace=True)

# output to latex
df.index.name = 'Absence'

# df.to_latex('absence.tex')

df

Unnamed: 0_level_0,precision,precision,recall,recall,f1,f1,positives,positives
Unnamed: 0_level_1,CheXbert,CheXpert,CheXbert,CheXpert,CheXbert,CheXpert,CheXbert,CheXpert
Absence,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
No Finding,,,,,,,0,0
Enlarged Cardiomediastinum,1.0,0.708,0.867,0.567,0.929,0.63,30,30
Cardiomegaly,0.956,0.955,0.966,0.719,0.961,0.821,89,89
Lung Lesion,1.0,0.5,0.75,0.5,0.857,0.5,4,4
Lung Opacity,0.909,0.533,0.87,0.348,0.889,0.421,23,23
Edema,0.99,0.918,0.972,0.832,0.981,0.873,107,107
Consolidation,1.0,0.958,1.0,0.958,1.0,0.958,24,24
Pneumonia,1.0,0.912,0.989,0.689,0.994,0.785,90,90
Atelectasis,1.0,0.333,0.8,0.6,0.889,0.429,5,5
Pneumothorax,0.995,0.988,0.974,0.895,0.984,0.939,190,190


# Presence

* If the clinical finding is present in the radiology report, i.e. [1:Positive]

In [7]:
df = evaluate_labels(gs, df_chexbert, method='presence')
df.columns = pd.MultiIndex.from_tuples([('CheXbert', c) for c in df.columns])

cx = evaluate_labels(gs, df_chexpert, method='presence')
cx.columns = pd.MultiIndex.from_tuples([('CheXpert', c) for c in cx.columns])

vx = evaluate_labels(gs, df_visualchexbert, method='presence')
vx.columns = pd.MultiIndex.from_tuples([('VisualCheXbert', c) for c in vx.columns])

df = df.merge(cx, how='inner', left_index=True, right_index=True)
df = df.merge(vx, how='inner', left_index=True, right_index=True)
# df.columns.swaplabel(0, 1, axis=1, inplace=True)
df.columns = df.columns.reorder_levels([1, 0])

# re-order columns
df = df[['precision', 'recall', 'f1', 'positives']]

# round values
for c in df.columns:
    if 'float' in str(df.dtypes[c]):
        df[c] = np.round(df[c], 3)

# drop the unecessary final column (redundant)
# df.drop(('positives', 'CheXpert'), axis=1, inplace=True)
# df.drop(('positives', 'CheXbert'), axis=1, inplace=True)

# output to latex
df.index.name = 'Presence'

# df.to_latex('presence.tex')

df

Unnamed: 0_level_0,precision,precision,precision,recall,recall,recall,f1,f1,f1,positives,positives,positives
Unnamed: 0_level_1,CheXbert,CheXpert,VisualCheXbert,CheXbert,CheXpert,VisualCheXbert,CheXbert,CheXpert,VisualCheXbert,CheXbert,CheXpert,VisualCheXbert
Presence,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
No Finding,0.391,0.397,0.225,0.833,0.833,0.833,0.532,0.538,0.355,30,30,30
Enlarged Cardiomediastinum,0.848,0.448,0.071,0.848,0.565,0.652,0.848,0.5,0.129,46,46,46
Cardiomegaly,0.98,0.771,0.406,0.987,0.914,0.934,0.983,0.836,0.566,151,151,151
Lung Lesion,0.944,0.768,0.237,0.962,0.811,0.604,0.953,0.789,0.34,53,53,53
Lung Opacity,0.952,0.721,0.278,0.941,0.917,0.787,0.946,0.807,0.41,169,169,169
Edema,0.971,0.883,0.446,0.993,0.896,0.941,0.982,0.89,0.605,135,135,135
Consolidation,0.982,0.754,0.141,1.0,0.929,0.857,0.991,0.832,0.242,56,56,56
Pneumonia,0.963,0.446,0.202,0.945,0.673,0.782,0.954,0.536,0.321,55,55,55
Atelectasis,0.985,0.949,0.429,0.985,0.944,0.792,0.985,0.947,0.556,197,197,197
Pneumothorax,0.949,0.586,0.456,0.974,0.895,0.816,0.961,0.708,0.585,38,38,38
