## Data preprocessing

##### Copyright (C) Microsoft Corporation.  
see license file for details 

In [1]:
# Allow multiple displays per cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
# AZUREML_NATIVE_SHARE_DIRECTORY mapping to host dir is set by _nativeSharedDirectory_ in .compute file 

import os
try:
    amlWBSharedDir = os.environ['AZUREML_NATIVE_SHARE_DIRECTORY']    
except:
    amlWBSharedDir = ''
    print('not using aml services?')
    
amlWBSharedDir

'/azureml-share/'

In [3]:
# Use the Azure Machine Learning data collector to log various metrics
from azureml.logging import get_azureml_logger
logger = get_azureml_logger()

In [4]:
# Use Azure Machine Learning history magic to control history collection
# History is off by default, options are "on", "off", or "show"
# %azureml history on

In [5]:
# import utlity functions

import sys, os
paths_to_append = [os.path.join(os.getcwd(), os.path.join(*(['Code',  'src'])))]
def add_path_to_sys_path(path_to_append):
    if not (any(path_to_append in paths for paths in sys.path)):
        sys.path.append(path_to_append)

[add_path_to_sys_path(crt_path) for crt_path in paths_to_append]

import azure_chestxray_utils

[None]

#### Path variables

In [6]:
# create base directories for the file path variables 
# paths are tipically container level dirs mapped to a host dir for data persistence.

prj_consts = azure_chestxray_utils.chestxray_consts()

data_base_input_dir=os.path.join(amlWBSharedDir, os.path.join(*(prj_consts.BASE_INPUT_DIR_list)))
data_base_output_dir=os.path.join(amlWBSharedDir, os.path.join(*(prj_consts.BASE_OUTPUT_DIR_list)))  

data_base_input_dir
data_base_output_dir


'/azureml-share/chestxray/data/ChestX-ray8'

'/azureml-share/chestxray/output'

In [7]:
# chest xray images are in nih_chest_xray_data_dir
nih_chest_xray_data_dir=os.path.join(data_base_input_dir, 
                                     os.path.join(*(prj_consts.ChestXray_IMAGES_DIR_list)))
nih_chest_xray_data_dir

# check if we have all 112120 images in nih_chest_xray_data_dir
orig_images_no = !find $nih_chest_xray_data_dir -type f | wc -l
print("orig images number:{} ".format(orig_images_no))

'/azureml-share/chestxray/data/ChestX-ray8/ChestXray-NIHCC'

orig images number:['112120'] 


In [8]:
# check if we have patients file list Data_Entry_2017.csv and BBox_List_2017.csv (https://nihcc.app.box.com/v/ChestXray-NIHCC)
# blacklist.csv is genrated by data scientists with no medical background

other_data_dir=os.path.join(data_base_input_dir, os.path.join(*(prj_consts.ChestXray_OTHER_DATA_DIR_list)))
other_data_dir
# !mkdir -p {other_data_dir}

import shutil
shutil.copyfile(os.path.join(os.getcwd(), os.path.join(*(['Code','src','finding_lungs','blacklist_non_PA_AP_view.csv']))),
                os.path.join(other_data_dir,'blacklist.csv'))

!ls $other_data_dir

# data is split into train/test/validation partitions
data_partitions_dir=os.path.join(data_base_output_dir, os.path.join(*(prj_consts.DATA_PARTITIONS_DIR_list)))  
!mkdir -p {data_partitions_dir}

'/azureml-share/chestxray/data/ChestX-ray8/ChestXray-NIHCC_other'

'/azureml-share/chestxray/data/ChestX-ray8/ChestXray-NIHCC_other/blacklist.csv'

BBox_List_2017.csv  Data_Entry_2017.csv  blacklist.csv


In [9]:
import pickle
import random
import re
import tqdm

import cv2
import numpy as np
import pandas as pd
import sklearn.model_selection 

#### Train/Validation/Test Data partitioning 
 - remove the images in the blacklist.csv where the image has low quality. 
 - remove the NIH bounding box patients since we will save those patients for later validation use. 
 - We will also divide data into train/valid/test dataset using a 7:1:2 ratio.

In [10]:
# remove NIH manually annotated data (groung truth with heavy pathologies, no healthy patients) 
# exclude what visusally looks like bad images to data scientists with no medical background
# todo
# This should prob be a generic function


total_patient_number = 30805
NIH_annotated_file = 'BBox_List_2017.csv' # exclude from train pathology annotated by radiologists 
manually_selected_bad_images_file = 'blacklist.csv'# exclude what viusally looks like bad images

patient_id_original = [i for i in range(1,total_patient_number + 1)]

# ignored images list is used later, since this is not a patient ID level issue
ignored_images_set = set()
with open(os.path.join(other_data_dir, manually_selected_bad_images_file), 'r') as f:
    for line in f:
        # delete the last char which is \n
        ignored_images_set.add(line[:-1])
        if int(line[:-9]) >= 30805:
            print(line[:-1])

bbox_df = pd.read_csv(os.path.join(other_data_dir, NIH_annotated_file))
bbox_patient_index_df = bbox_df['Image Index'].str.slice(3, 8)

bbox_patient_index_list = []
for index, item in bbox_patient_index_df.iteritems():
    bbox_patient_index_list.append(int(item))

patient_id = list(set(patient_id_original) - set(bbox_patient_index_list))
print("len of original patient id is", len(patient_id_original))
print("len of cleaned patient id is", len(patient_id))
print("len of unique patient id with annotated data", 
      len(list(set(bbox_patient_index_list))))
print("len of patient id with annotated data",bbox_df.shape[0])


len of original patient id is 30805
len of cleaned patient id is 30079
len of unique patient id with annotated data 726
len of patient id with annotated data 984


In [11]:
# move/copy all bbox images in test folder since these are not used for training anyway

test_images_dir=os.path.join(data_base_input_dir, 'test_images')
!mkdir -p {test_images_dir}

bbox_images_list = list(bbox_df['Image Index'])
for crt_file in tqdm.tqdm(bbox_images_list):
        source_file = os.path.join(nih_chest_xray_data_dir,crt_file)
        dest_file = os.path.join(test_images_dir,crt_file)
        if os.path.exists(dest_file):
            print('WARNING: image file '+crt_file+' already exists in '+test_images_dir)
#         print('crt_file')

        try:
            shutil.copyfile(source_file,dest_file)
        except IOError:
            print('WARNING: File '+ src_file_path + ' does not exist when copying to '+test_images_dir )

  0%|          | 0/984 [00:00<?, ?it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013118_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014716_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029817_009.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014687_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017877_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00003148_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012515_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022098_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014198_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021007_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030674_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00003945_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000808_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00006621_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000865_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028452_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00007557_026.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000181_061.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00009669_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025368_014.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000468_033.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010770_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016972_019.png'

  2%|▏         | 23/984 [00:00<00:04, 222.34it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030635_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021481_014.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019124_045.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022883_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028173_016.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027094_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012123_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020113_017.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004968_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028012_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029464_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00001170_046.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016267_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018412_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020673_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027474_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00007124_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027866_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000149_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030434_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019271_030.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020408_037.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00023176_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016191_017.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010478_012.png'

  5%|▍         | 48/984 [00:00<00:04, 228.58it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025228_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021796_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00008005_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021495_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018496_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029088_023.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014607_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004968_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019271_064.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010936_011.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000211_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017255_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00007676_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00005089_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020857_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012291_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018762_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000468_017.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014253_042.png'

  7%|▋         | 67/984 [00:00<00:04, 213.37it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026136_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013685_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00003787_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030106_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030260_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015262_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010625_014.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00006851_034.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019495_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019089_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017151_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020349_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00006851_033.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000744_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013685_028.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010481_021.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026886_004.png'

  9%|▊         | 84/984 [00:00<00:04, 197.28it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016403_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010575_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011857_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022155_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014870_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025221_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018253_017.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019373_036.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017500_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016009_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013111_069.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00023156_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013508_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00008554_009.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017582_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00009619_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00009437_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013992_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00001836_082.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021024_022.png'

 11%|█         | 104/984 [00:00<00:04, 197.32it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029940_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011827_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019154_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00003440_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00009256_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014447_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012834_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016987_019.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020393_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029502_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000457_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027556_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00005089_014.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00007710_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012637_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000147_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018427_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014663_013.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014795_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014253_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028625_000.png'

 13%|█▎        | 125/984 [00:00<00:04, 200.06it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019271_065.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014095_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00023116_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00009779_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012829_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00006948_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029200_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022707_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022416_049.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00008716_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000072_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00002350_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004296_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029631_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000643_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015831_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020429_020.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015300_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00002856_009.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028620_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019706_014.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021377_016.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000830_000.png'

 15%|█▌        | 148/984 [00:00<00:04, 206.38it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016490_011.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00001673_016.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015440_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025270_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012636_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027697_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021862_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016987_022.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018419_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029843_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00007120_009.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030260_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020810_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00005066_030.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00009608_024.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000661_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019018_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000211_041.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015770_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011463_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021364_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00003394_006.png'

 17%|█▋        | 170/984 [00:00<00:03, 210.00it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016990_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027797_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013670_151.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011322_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018387_030.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00007037_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017448_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029808_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013249_031.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014706_018.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00007551_020.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021009_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000377_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019426_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00005066_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004630_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028509_026.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00002435_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026087_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021409_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012288_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018187_034.png'

 20%|█▉        | 192/984 [00:00<00:03, 210.43it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00006912_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00005532_016.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020986_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016624_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017511_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016564_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012741_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028018_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017178_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029906_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000732_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012670_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026338_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013062_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00002763_031.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013346_015.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014626_028.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026889_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000211_019.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00023325_019.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011502_001.png'

 22%|██▏       | 213/984 [00:01<00:03, 200.79it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018233_057.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012261_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014706_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028873_009.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004578_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00023093_009.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011402_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017893_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028873_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004461_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00008365_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004381_021.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00002704_029.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012364_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022215_012.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017524_028.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00005827_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00001534_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004344_022.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020819_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00005532_014.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004342_020.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022215_011.png'

 24%|██▍       | 236/984 [00:01<00:03, 207.86it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015563_011.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018686_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011263_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013249_052.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016606_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025747_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027464_033.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022416_018.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004534_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004344_018.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027685_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00009745_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029391_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000740_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011322_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00023176_017.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00009705_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004344_046.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021845_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013249_033.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000845_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014223_009.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000398_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021443_000.png'

 26%|██▋       | 260/984 [00:01<00:03, 213.84it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00008339_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017514_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015400_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013125_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00007735_040.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025732_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018693_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015425_012.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013635_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00007043_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00001369_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015799_012.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00002059_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020438_011.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014574_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012793_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012094_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004533_014.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019187_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00001373_009.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016414_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030279_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010381_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013615_052.png'

 29%|██▉       | 284/984 [00:01<00:03, 218.91it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004344_013.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019861_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022706_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004344_014.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015064_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011557_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029647_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025969_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018187_029.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004342_023.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004342_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015719_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028607_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004893_085.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00001373_039.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000756_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00008522_032.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00005532_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030206_013.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00008399_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012686_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004822_051.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015304_001.png'

 31%|███       | 307/984 [00:01<00:03, 221.15it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010172_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00008814_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010007_168.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00005066_030.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010828_039.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027479_013.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018253_059.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029579_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021703_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013993_077.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018102_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022369_013.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022611_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000344_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020482_032.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029464_015.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028383_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015078_013.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020000_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010277_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030634_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018427_011.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016837_002.png'

 34%|███▎      | 330/984 [00:01<00:03, 211.66it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013285_026.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019767_016.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018366_029.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00003803_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00002395_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027028_017.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020213_078.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021840_016.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021711_014.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015090_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025686_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017188_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018387_030.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028518_021.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00023058_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028628_015.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026810_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012045_009.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025252_053.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021972_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016291_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021381_013.png'

 36%|███▌      | 352/984 [00:01<00:02, 213.13it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021181_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014015_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016964_011.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00008841_025.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00007882_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015058_024.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00008547_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020274_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014976_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029596_018.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030408_013.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00007830_013.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030412_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025252_040.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029532_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019766_023.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013750_016.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017138_037.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00002583_014.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028640_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019924_020.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020405_041.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011237_094.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018253_054.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021967_000.png'

 38%|███▊      | 377/984 [00:01<00:02, 222.31it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030039_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012834_122.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025521_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026753_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012174_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020751_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00002533_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012505_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014346_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025228_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018980_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012834_034.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00023283_019.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027357_014.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016417_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025787_027.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027441_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014738_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013993_083.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022572_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022572_087.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017403_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00023093_007.png'

 41%|████      | 400/984 [00:01<00:02, 218.77it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016291_019.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026555_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016522_023.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012973_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025664_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010575_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014149_018.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011857_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028774_047.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029894_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022572_063.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00009218_015.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022899_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027817_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022416_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018253_017.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018657_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00003072_028.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014626_035.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011269_019.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00007034_016.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017500_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020277_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013977_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014398_031.png'

 43%|████▎     | 425/984 [00:01<00:02, 226.59it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029469_011.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028509_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017582_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027875_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013337_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013031_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00009437_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00009683_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00001558_016.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00005089_040.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028974_016.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011827_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025529_018.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027631_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017124_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020564_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015895_017.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014447_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012834_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029588_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021860_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020393_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027103_001.png'

 46%|████▌     | 448/984 [00:02<00:02, 222.14it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029502_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017714_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00009342_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00005089_014.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016972_025.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027577_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019399_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00001437_012.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021132_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018427_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018360_035.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030128_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020259_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00008814_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012021_081.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00005066_030.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010828_039.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027479_013.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019018_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018253_059.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030636_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021703_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022369_013.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022727_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020482_032.png'

 48%|████▊     | 473/984 [00:02<00:02, 227.47it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028383_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010277_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028518_012.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028208_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018393_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029861_013.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00006836_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011366_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029431_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027927_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022416_048.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021840_016.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013670_151.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010120_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014251_029.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022470_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010805_049.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011925_072.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019625_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015069_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017544_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013391_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021201_010.png'

 50%|█████     | 496/984 [00:02<00:02, 227.31it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029039_020.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016705_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019750_012.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013310_057.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028285_014.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021009_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00023026_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012376_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010092_018.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012021_054.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016191_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030573_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017714_009.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020184_013.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014617_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016094_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00009431_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00001039_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029617_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028640_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017083_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00003973_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020113_030.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00003149_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00006304_060.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017972_032.png'

 53%|█████▎    | 522/984 [00:02<00:01, 233.84it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021710_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000211_016.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026911_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029404_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019363_043.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00006096_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00003333_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011269_018.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016786_009.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013106_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019706_012.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015831_011.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020482_011.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016732_040.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021926_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017981_014.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000468_041.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026538_034.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027113_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010230_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00007124_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013922_022.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022899_014.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030323_028.png'

 55%|█████▌    | 546/984 [00:02<00:01, 232.29it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00001900_026.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000032_037.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019150_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026865_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013922_021.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019634_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026098_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030408_013.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029532_014.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018721_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019646_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019750_029.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017138_037.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013471_029.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00002224_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028640_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011023_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020405_041.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014203_018.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011237_094.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018253_054.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011124_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026194_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019769_014.png'

 58%|█████▊    | 570/984 [00:02<00:01, 218.20it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025521_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018921_038.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012505_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015732_020.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016786_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012364_045.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027357_014.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016417_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025787_027.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026538_012.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027441_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014738_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013993_083.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00023075_033.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029579_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013659_019.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010815_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026695_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010125_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026261_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010277_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00002106_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019313_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018623_001.png'

 60%|██████    | 594/984 [00:02<00:01, 224.10it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020124_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026196_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028698_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016184_040.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017214_015.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020065_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011925_072.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021201_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013911_021.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021179_011.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00006821_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011151_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027833_022.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012376_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020274_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028265_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028027_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018762_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018366_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022837_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00008008_027.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00001787_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017257_001.png'

 63%|██████▎   | 617/984 [00:02<00:01, 222.33it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029105_015.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011269_019.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012376_011.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013508_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012975_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011157_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00001836_082.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00023168_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016568_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014731_028.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025529_018.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00003440_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019651_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027556_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011291_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019499_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00005532_019.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018427_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030323_015.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014014_013.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011925_077.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014177_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028628_020.png'

 65%|██████▌   | 640/984 [00:02<00:01, 221.54it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021420_027.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018101_012.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012892_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017670_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019373_058.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014004_038.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028330_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015649_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021420_020.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015300_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022192_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022237_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022726_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000830_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015440_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011925_076.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017611_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029469_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014551_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027697_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00009229_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012270_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013991_000.png'

 67%|██████▋   | 663/984 [00:03<00:01, 206.01it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012048_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00001075_024.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012175_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011814_031.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000902_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00001688_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004547_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00023078_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00023068_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013911_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027927_009.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014116_009.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019058_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016487_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015794_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012374_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00003948_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018366_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014778_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026221_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015792_005.png'

 70%|██████▉   | 684/984 [00:03<00:01, 198.47it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030394_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00001555_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028876_060.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014280_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00006736_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022977_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018984_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026319_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00008386_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00006751_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00001836_041.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020274_021.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026983_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026132_016.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013673_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004911_018.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026769_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013051_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020332_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017243_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025662_006.png'

 72%|███████▏  | 705/984 [00:03<00:01, 175.40it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027470_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019013_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015141_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026398_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021374_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017199_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015583_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017098_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019177_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030162_029.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026285_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00002578_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013951_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020393_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015018_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011832_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021420_014.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017346_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010959_010.png'

 74%|███████▎  | 724/984 [00:03<00:01, 164.94it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013885_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020774_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028876_035.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017028_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00009403_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021772_016.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019124_090.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016587_069.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00009403_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012415_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011450_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018814_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026911_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025707_015.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012045_019.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00002290_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012592_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027937_004.png'

 75%|███████▌  | 742/984 [00:03<00:01, 169.08it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013674_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013751_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010103_014.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011576_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030413_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011136_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020259_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012021_081.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013993_077.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013659_019.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010277_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022290_015.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028208_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018393_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017710_009.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00003894_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026196_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029861_013.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021670_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011366_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029431_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011514_015.png'

 78%|███████▊  | 764/984 [00:03<00:01, 180.28it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015090_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010120_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013625_069.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015069_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013391_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00005869_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029808_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000193_019.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016705_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016732_027.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028285_014.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012045_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000506_013.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00003064_035.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018032_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00005532_022.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00023283_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00008291_011.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022084_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016291_020.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00002980_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021458_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011237_006.png'

 80%|███████▉  | 787/984 [00:03<00:01, 192.63it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013461_011.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010828_023.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00007728_013.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012576_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018319_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017952_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021489_013.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014956_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00008291_009.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022961_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021772_011.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00003789_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012261_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025962_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014125_042.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011402_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014839_017.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00009608_037.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011583_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021303_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022178_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00003528_024.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00007629_001.png'

 82%|████████▏ | 810/984 [00:03<00:00, 201.17it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010610_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013721_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019863_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014294_011.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012094_011.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00005567_025.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000150_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013992_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015605_051.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00005353_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022215_012.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026848_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004893_070.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022021_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027093_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028454_016.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00007735_018.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011831_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016291_012.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019765_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00023138_009.png'

 84%|████████▍ | 831/984 [00:03<00:00, 199.13it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013272_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004344_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014197_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00003028_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027837_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010652_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00009863_058.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020845_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019157_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00009229_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00007444_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00001933_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00002711_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00009107_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027758_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013249_033.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00023089_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021860_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013993_013.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010447_018.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00008727_009.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017236_075.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00023178_002.png'

 87%|████████▋ | 854/984 [00:04<00:00, 207.01it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021818_026.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016064_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00005567_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018063_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014177_009.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004808_090.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014022_054.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012834_113.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012141_013.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022572_073.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00003391_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00009507_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029469_009.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019892_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030162_026.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013659_019.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026586_009.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020000_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00002106_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00003400_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010767_016.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017972_026.png'

 89%|████████▉ | 876/984 [00:04<00:00, 208.85it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018865_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029909_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018366_029.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029579_014.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020318_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013807_009.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010767_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00009368_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021748_000.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010767_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014083_023.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00005869_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025252_053.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00023078_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010071_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013471_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012094_040.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00008841_025.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012376_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026886_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010936_016.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025252_054.png'

 91%|█████████▏| 898/984 [00:04<00:00, 204.43it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019124_104.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027213_044.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00015530_147.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00001946_029.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017137_016.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014822_039.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00023162_025.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026451_068.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013209_018.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013310_059.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030111_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025252_032.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025787_050.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00009889_018.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022141_030.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020146_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016429_015.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026392_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00022141_023.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017747_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000732_005.png'

 93%|█████████▎| 919/984 [00:04<00:00, 205.43it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018496_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00005140_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00001248_038.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018055_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012094_047.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00008841_044.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028876_027.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00002176_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029259_027.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025368_018.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021321_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027278_007.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011831_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00002176_005.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029807_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00009166_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025368_033.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020408_058.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025954_025.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021782_028.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016568_026.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029075_013.png'

 96%|█████████▌| 941/984 [00:04<00:00, 207.51it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012622_016.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00004342_050.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026221_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028861_009.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021700_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020318_022.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00007471_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028628_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011355_011.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030606_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00027652_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021896_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00011355_027.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00017243_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00028924_009.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025787_039.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018055_038.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016937_014.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019706_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020671_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030162_029.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00014626_017.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019124_006.png'

 98%|█████████▊| 964/984 [00:04<00:00, 212.90it/s]



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018055_045.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00008814_010.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00000583_008.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010828_039.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00019917_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00018253_059.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00030636_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00001153_004.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00012299_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00021703_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029464_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00010815_006.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00001320_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020482_032.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00013187_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00029464_015.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00025769_001.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00016837_002.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00020124_003.png'



'/azureml-share/chestxray/data/ChestX-ray8/test_images/00026920_000.png'

100%|██████████| 984/984 [00:04<00:00, 209.97it/s]


In [12]:
random.seed(0)
random.shuffle(patient_id)

print("first ten patient ids are", patient_id[:10])

# training:valid:test=7:1:2
patient_id_train = patient_id[:int(total_patient_number * 0.7)]
patient_id_valid = patient_id[int(total_patient_number * 0.7):int(total_patient_number * 0.8)]
# get the rest of the patient_id as the test set
patient_id_test = patient_id[int(total_patient_number * 0.8):]
patient_id_test.extend(bbox_patient_index_list)
patient_id_test = list(set(patient_id_test))


print("train:{} valid:{} test:{}".format(len(patient_id_train), len(patient_id_valid), len(patient_id_test)))

# test_set = test_set+left_out_patient_id
# print("train:{} valid:{} test:{}".format(len(train_set), len(valid_set), len(test_set)))

first ten patient ids are [24303, 16035, 4967, 28624, 5378, 20335, 17069, 12271, 16975, 4469]
train:21563 valid:3081 test:6161


In [13]:
# Add a few more project constants

pathologies_name_list = prj_consts.DISEASE_list
NIH_patients_and_labels_file = 'Data_Entry_2017.csv'

#### Finally do preprocessing
Save labels and partitions

In [14]:
labels_df = pd.read_csv(os.path.join(other_data_dir, NIH_patients_and_labels_file))

In [15]:
#show the label distribution

# Unique IDs frequencies can be computed using list comprehension or collections lib
# [[x,(list(crtData['fullID2'])).count(x)] for x in set(crtData['fullID2'])]
# for tallying, collections lib is faster than list comprehension
from collections import Counter
pathology_distribution = Counter(list(labels_df['Finding Labels']))

# Sort it by ID frequency (dict value)
sorted_by_freq = sorted(pathology_distribution.items(), key=lambda x: x[1], reverse=True)
len(sorted_by_freq)
sorted_by_freq[:20]
sorted_by_freq[-10:]

836

[('No Finding', 60361),
 ('Infiltration', 9547),
 ('Atelectasis', 4215),
 ('Effusion', 3955),
 ('Nodule', 2705),
 ('Pneumothorax', 2194),
 ('Mass', 2139),
 ('Effusion|Infiltration', 1603),
 ('Atelectasis|Infiltration', 1350),
 ('Consolidation', 1310),
 ('Atelectasis|Effusion', 1165),
 ('Pleural_Thickening', 1126),
 ('Cardiomegaly', 1093),
 ('Emphysema', 892),
 ('Infiltration|Nodule', 829),
 ('Atelectasis|Effusion|Infiltration', 737),
 ('Fibrosis', 727),
 ('Edema', 628),
 ('Cardiomegaly|Effusion', 484),
 ('Consolidation|Infiltration', 441)]

[('Atelectasis|Cardiomegaly|Consolidation|Effusion|Pneumothorax', 1),
 ('Effusion|Emphysema|Infiltration|Mass|Nodule', 1),
 ('Atelectasis|Edema|Effusion|Infiltration|Pleural_Thickening', 1),
 ('Atelectasis|Cardiomegaly|Consolidation|Effusion|Infiltration|Pneumonia', 1),
 ('Cardiomegaly|Effusion|Infiltration|Pneumonia', 1),
 ('Atelectasis|Consolidation|Effusion|Fibrosis|Pleural_Thickening', 1),
 ('Effusion|Nodule|Pneumothorax|Mass', 1),
 ('Effusion|Fibrosis|Nodule', 1),
 ('Consolidation|Effusion|Mass|Nodule|Pneumothorax', 1),
 ('Atelectasis|Emphysema|Fibrosis|Nodule|Pleural_Thickening|Pneumothorax', 1)]

In [16]:
# now label distribution after spliting

labels_df['Finding Labels'].str.split( '|', expand=False).str.join(sep='*').str.get_dummies(sep='*').sum()

Atelectasis           11559
Cardiomegaly           2776
Consolidation          4667
Edema                  2303
Effusion              13317
Emphysema              2516
Fibrosis               1686
Hernia                  227
Infiltration          19894
Mass                   5782
No Finding            60361
Nodule                 6331
Pleural_Thickening     3385
Pneumonia              1431
Pneumothorax           5302
dtype: int64

In [17]:
def process_data(current_df, patient_ids):
    image_name_index = []
    image_labels = {}
    for individual_patient in tqdm.tqdm(patient_ids):
        for _, row in current_df[current_df['Patient ID'] == individual_patient].iterrows():
            processed_image_name = row['Image Index']
            if processed_image_name in ignored_images_set:
                pass
            else:
                image_name_index.append(processed_image_name)
                image_labels[processed_image_name] = np.zeros(14, dtype=np.uint8)
                for disease_index, ele in enumerate(pathologies_name_list):
                    if re.search(ele, row['Finding Labels'], re.IGNORECASE):
                        image_labels[processed_image_name][disease_index] = 1
                    else:
                        # redundant code but just to make it more readable
                        image_labels[processed_image_name][disease_index] = 0
                # print("processed", row['Image Index'])
    return image_name_index, image_labels


In [18]:
# # create and save train/test/validation partitions list

train_data_index, train_labels = process_data(labels_df, patient_id_train)
valid_data_index, valid_labels = process_data(labels_df, patient_id_valid)
test_data_index, test_labels = process_data(labels_df, patient_id_test)

print("train, valid, test image number is:", len(train_data_index), len(valid_data_index), len(test_data_index))

# save the data
labels_all = {}
labels_all.update(train_labels)
labels_all.update(valid_labels)
labels_all.update(test_labels)

partition_dict = {'train': train_data_index, 'test': test_data_index, 'valid': valid_data_index}

with open(os.path.join(data_partitions_dir,'labels14_unormalized_cleaned.pickle'), 'wb') as f:
    pickle.dump(labels_all, f)

with open(os.path.join(data_partitions_dir,'partition14_unormalized_cleaned.pickle'), 'wb') as f:
    pickle.dump(partition_dict, f)
    
# also save the patient id partitions for pytorch training    
with open(os.path.join(data_partitions_dir,'train_test_valid_data_partitions.pickle'), 'wb') as f:
    pickle.dump([patient_id_train,patient_id_valid,
                 patient_id_test,
                list(set(bbox_patient_index_list))], f)    


100%|██████████| 21563/21563 [00:36<00:00, 592.48it/s]
100%|██████████| 3081/3081 [00:05<00:00, 595.83it/s]
100%|██████████| 6161/6161 [00:14<00:00, 436.44it/s]


train, valid, test image number is: 69190 9594 33281


In [19]:
# sanity check, see train labels

type(train_labels)
{k: train_labels[k] for k in list(train_labels)[:5]}

dict

{'00018080_007.png': array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=uint8),
 '00021033_000.png': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=uint8),
 '00021362_000.png': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=uint8),
 '00024930_000.png': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=uint8),
 '00030379_002.png': array([1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=uint8)}

In [20]:
# jupyter nbconvert --to html .\Code\02_Model\000_preprocess.ipynb