In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

#DOWNLOAD THE DATA

from google.colab import drive
import zipfile

#MOUNT GDRIVE: DOWNLOAD DATA AND FILTERED LABELS
drive.mount('/content/gdrive',force_remount=True)

# UNZIP ZIP
print ("Uncompressing zip file")
zip_ref = zipfile.ZipFile('/content/gdrive/My Drive/CheXpert-v1.0-small.zip', 'r')
zip_ref.extractall()
zip_ref.close()
print("downloaded files")

Mounted at /content/gdrive
Uncompressing zip file


KeyboardInterrupt: ignored

In [2]:
!pip install torchvision==0.2.2
!pip install torch==1.0.1 -f https://download.pytorch.org/whl/cu100/stable # CUDA 10.0 build


Looking in links: https://download.pytorch.org/whl/cu100/stable


In [0]:
from sklearn.metrics import roc_auc_score

def validation_eval(learn):
    chexpert_targets = ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Pleural Effusion']
    acts = full_valid_df.groupby(['patient','study'])[learn.data.classes].max().values

    valid_preds=learn.get_preds(ds_type=DatasetType.Valid)
    preds = valid_preds[0]
    preds_df = full_valid_df.copy()

    for i, c in enumerate(learn.data.classes):
        preds_df[c] = preds[:,i]

    preds = preds_df.groupby(['patient','study'])[learn.data.classes].mean().values

    auc_scores = {learn.data.classes[i]: roc_auc_score(acts[:,i],preds[:,i]) for i in range(len(chexpert_targets))}

    #average results reported in the associated paper
    chexpert_auc_scores = {'Atelectasis':      0.858,
                           'Cardiomegaly':     0.854,
                           'Consolidation':    0.939,
                           'Edema':            0.941,
                           'Pleural Effusion': 0.936}

    max_feat_len = max(map(len, chexpert_targets))

    avg_chexpert_auc = sum(list(chexpert_auc_scores.values()))/len(chexpert_auc_scores.values())
    avg_auc          = sum(list(auc_scores.values()))/len(auc_scores.values())

    [print(f'{k: <{max_feat_len}}\t auc: {auc_scores[k]:.3}\t chexpert auc: {chexpert_auc_scores[k]:.3}\t difference:\
    {(chexpert_auc_scores[k]-auc_scores[k]):.3}') for k in chexpert_targets]

    print(f'\nAverage auc: {avg_auc:.3} \t CheXpert average auc {avg_chexpert_auc:.3}\t Difference {(avg_chexpert_auc-avg_auc):.3}')



In [4]:
import cv2
import sys
import pandas as pd
from joblib import Parallel, delayed
from fastai.vision import *
from torchvision.models import *
from sklearn.metrics import roc_auc_score


def vaka():
    chexpert_targets = ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Pleural Effusion']
    #python src/<path-to-prediction-program> <input-data-csv-filename> <output-prediction-csv-path>
    
    infile='CheXpert-v1.0-small/valid.csv'
    test_df = pd.read_csv(infile)
    test_df['patient'] = test_df.Path.str.split('/',3,True)[2]
    test_df  ['study'] = test_df.Path.str.split('/',4,True)[3]

    chexpert_learn = load_learner('','chexpert_densenet.pkl')
    test_data_src = (ImageList.from_df(test_df, path=""))
    chexpert_learn.data.add_test(test_data_src)
    chexpert_learn.data.batch_size = 8
    
    test_preds=chexpert_learn.get_preds(ds_type=DatasetType.Test)[0]
    
    
    acts = test_df.groupby(['patient','study'])[chexpert_learn.data.classes].max().values

    preds_df = test_df.copy()

    #print(test_preds[0:5])
   
    i = 0
    for c in chexpert_learn.data.classes:
        preds_df[c] = test_preds[:,i]
        i = i+1
        
    #CheXpert-v1.0/{valid,test}/<PATIENT>/<STUDY>
    preds_df.Path.str.split('/')

   
    def get_study(path):
      return path[0:path.rfind('/')]
    
    preds_df['Study'] = preds_df.Path.apply(get_study)
    #preds = preds_df.drop('Path',axis=1).groupby('Study').max().reset_index()
    preds = preds_df.groupby(['patient','study'])[chexpert_learn.data.classes].mean().values

    
  
    print(chexpert_learn.data.classes)
    auc_scores = {chexpert_learn.data.classes[i]: roc_auc_score(acts[:,i],preds[:,i]) for i in range(len(chexpert_targets))}
    
    
    #average results reported in the associated paper
    chexpert_auc_scores = {'Atelectasis':      0.858,
                           'Cardiomegaly':     0.854,
                           'Consolidation':    0.939,
                           'Edema':            0.941,
                           'Pleural Effusion': 0.936}

    max_feat_len = max(map(len, chexpert_targets))

    avg_chexpert_auc = sum(list(chexpert_auc_scores.values()))/len(chexpert_auc_scores.values())
    avg_auc          = sum(list(auc_scores.values()))/len(auc_scores.values())

    [print(f'{k: <{max_feat_len}}\t auc: {auc_scores[k]:.3}\t chexpert auc: {chexpert_auc_scores[k]:.3}\t difference:\
    {(chexpert_auc_scores[k]-auc_scores[k]):.3}') for k in chexpert_targets]

    print(f'\nAverage auc: {avg_auc:.3} \t CheXpert average auc {avg_chexpert_auc:.3}\t Difference {(avg_chexpert_auc-avg_auc):.3}')

    
vaka()

['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Pleural Effusion']
Atelectasis     	 auc: 0.815	 chexpert auc: 0.858	 difference:    0.0426
Cardiomegaly    	 auc: 0.819	 chexpert auc: 0.854	 difference:    0.0353
Consolidation   	 auc: 0.917	 chexpert auc: 0.939	 difference:    0.0223
Edema           	 auc: 0.926	 chexpert auc: 0.941	 difference:    0.0153
Pleural Effusion	 auc: 0.933	 chexpert auc: 0.936	 difference:    0.00298

Average auc: 0.882 	 CheXpert average auc 0.906	 Difference 0.0237
