In [1]:
# define logging and working directory
from ProjectRoot import change_wd_to_project_root
change_wd_to_project_root()
from src.utils.notebook_imports import *
from pyforest import *
%matplotlib inline
%reload_ext autoreload
%autoreload 2

from src.data.Dataset import create_3d_volumes_from_4d_files, create_4d_volumes_from_4d_files
from src.utils.utils_io import Console_and_file_logger, ensure_dir
Console_and_file_logger('find_patients_with_AX_and_SAX')

search for root_dir and set working directory
Working directory set to: /mnt/data/git/cardio


Using TensorFlow backend.
2020-07-20 10:45:02,755 INFO -------------------- Start --------------------
2020-07-20 10:45:02,756 INFO Working directory: /mnt/data/git/cardio.
2020-07-20 10:45:02,757 INFO Log file: ./logs/find_patients_with_AX_and_SAX.log
2020-07-20 10:45:02,757 INFO Log level for console: INFO


<src.utils.utils_io.Console_and_file_logger at 0x7f103f9117b8>

1. get a list of all AX volumes
2. get a list of all SAX volumes
3. mapp all sax and ax volumes by their patient id and study year 

In [2]:
ax_volumes = sorted(glob.glob('data/raw/gcn_05_2020/AX/*clean.nrrd'))
print('AX volumes: {}'.format(len(ax_volumes)))
sax_volumes = sorted(glob.glob('data/raw/gcn_05_2020/SAx/*clean.nrrd'))
print('SAX volumes: {}'.format(len(sax_volumes)))

AX volumes: 92
SAX volumes: 279


In [3]:
# extract the patient id and year from both lists
def extract_patient_year(file_name):
    return os.path.basename(file_name).split('-')[1]
ax_patients = set(map(extract_patient_year, ax_volumes))
sax_patients = set(map(extract_patient_year, sax_volumes))
print('AX patients: {}, SAX patients: {}'.format(len(ax_patients), len(sax_patients)))

AX patients: 92, SAX patients: 279


In [4]:
# get intersection/ mapped patients
intersection = ax_patients.intersection(sax_patients)
print('mapped patients: {}'.format(len(intersection)))
print('AX patients that cant be mapped to a sax patient: \n{}'.format(ax_patients - intersection))

mapped patients: 86
AX patients that cant be mapped to a sax patient: 
{'264V7142_2008', 'D4PXE75F_2008', '4N4ZDJQ2_2007', 'KW4MJ3XX_2007', 'FV9UL3AC_2006', 'GL9QL330_2005'}


In [13]:
# filter file lists (ax_volumes and sax_volumes) by the intersection
def filter_by_patient_intersection(file_name):
    """Extracts the patient id and date from the filename and check if it is included in the intersection set"""
    return extract_patient_year(file_name) in intersection
ax_volumes_filtered = list(filter(filter_by_patient_intersection, ax_volumes))
sax_volumes_filtered = list(filter(filter_by_patient_intersection, sax_volumes))
print(len(ax_volumes_filtered), len(sax_volumes_filtered))

86 86


In [14]:
# create file-lists for the mask
ax_masks_filtered = list(map(lambda x:x.replace('clean', 'mask'), ax_volumes_filtered))
sax_masks_filtered = list(map(lambda x:x.replace('clean', 'mask'), sax_volumes_filtered))

In [15]:
# copy 4D images/masks to new directory
# define a helper copy function
def copy_volumes(files, destination_path):
    """
    Copy all files named in files to the destination path
    files: list of full file paths as strings
    destination_path: root destination folder as string
    """
    import shutil
    logging.info('Start to copy: {} files'.format(len(files)))
    # ensure directory exist
    ensure_dir(destination_path)
    for file in files:
        logging.info('copy file: {} to {}'.format(file, destination_path))
        shutil.copy2(file, destination_path)
    # get all nrrd files in the destination directory
    copied_files = glob.glob(os.path.join(destination_path, '*.nrrd'))
    logging.info('Copy finished, found {} files in destination path {}'.format(len(copied_files), destination_path))

export_path = 'data/raw/gcn_05_2020_ax_sax_86'
copy_volumes(ax_volumes_filtered, os.path.join(export_path, 'AX'))
copy_volumes(ax_masks_filtered, os.path.join(export_path, 'AX'))
copy_volumes(sax_volumes_filtered, os.path.join(export_path, 'SAX'))
copy_volumes(sax_masks_filtered, os.path.join(export_path, 'SAX'))

2020-06-09 10:36:17,858 INFO Start to copy: 86 files
2020-06-09 10:36:17,858 INFO copy file: data/raw/gcn_05_2020/AX/0000-0HQQW4ZN_2007-05-23_volume_clean.nrrd to data/raw/gcn_05_2020_ax_sax_86/AX
2020-06-09 10:36:17,933 INFO copy file: data/raw/gcn_05_2020/AX/0000-0PTV75MP_2005-06-27_volume_clean.nrrd to data/raw/gcn_05_2020_ax_sax_86/AX
2020-06-09 10:36:17,965 INFO copy file: data/raw/gcn_05_2020/AX/0000-0RPELLU8_2007-02-13_volume_clean.nrrd to data/raw/gcn_05_2020_ax_sax_86/AX
2020-06-09 10:36:18,044 INFO copy file: data/raw/gcn_05_2020/AX/0000-11YU3CUF_2007-12-10_volume_clean.nrrd to data/raw/gcn_05_2020_ax_sax_86/AX
2020-06-09 10:36:18,122 INFO copy file: data/raw/gcn_05_2020/AX/0000-13JLP3HN_2007-05-22_volume_clean.nrrd to data/raw/gcn_05_2020_ax_sax_86/AX
2020-06-09 10:36:18,252 INFO copy file: data/raw/gcn_05_2020/AX/0000-1E4PF7MR_2006-07-06_volume_clean.nrrd to data/raw/gcn_05_2020_ax_sax_86/AX
2020-06-09 10:36:18,303 INFO copy file: data/raw/gcn_05_2020/AX/0000-1E8MCV9L_2005-

# Create a subfolders with SAX images, which does not occure n the AX dataset

In [8]:
# get patients without a mapped AX stack to train an independent unet
intersection = ax_patients.intersection(sax_patients)
print('mapped patients: {}'.format(len(intersection)))
print('AX patients that cant be mapped to a sax patient: \n{}'.format(len(sax_patients - intersection)))
sax_exclusive_ax = sax_patients - intersection

mapped patients: 86
AX patients that cant be mapped to a sax patient: 
193


In [9]:
# filter the SAX file listexclusive patients with an AX stack by the sax_exclusive_set
def filter_by_patient_intersection(file_name):
    """Extracts the patient id and date from the filename and check if it is included in the intersection set"""
    return extract_patient_year(file_name) in sax_exclusive_ax
sax_volumes_filtered = list(filter(filter_by_patient_intersection, sax_volumes))
print(len(sax_volumes_filtered))

193


In [10]:
# create a list for the mask files
sax_masks_filtered = list(map(lambda x:x.replace('clean', 'mask'), sax_volumes_filtered))

In [11]:
# copy 4D images/masks to new directory
# define a helper copy function
def copy_volumes(files, destination_path):
    """
    Copy all files named in files to the destination path
    files: list of full file paths as strings
    destination_path: root destination folder as string
    """
    import shutil
    logging.info('Start to copy: {} files'.format(len(files)))
    # ensure directory exist
    ensure_dir(destination_path)
    for file in files:
        logging.info('copy file: {} to {}'.format(file, destination_path))
        shutil.copy2(file, destination_path)
    # get all nrrd files in the destination directory
    copied_files = glob.glob(os.path.join(destination_path, '*.nrrd'))
    logging.info('Copy finished, found {} files in destination path {}'.format(len(copied_files), destination_path))

export_path = 'data/raw/gcn_05_2020_sax_excl_ax_patients'
copy_volumes(sax_volumes_filtered, os.path.join(export_path, 'SAX'))
copy_volumes(sax_masks_filtered, os.path.join(export_path, 'SAX'))

2020-07-20 10:58:19,903 INFO Start to copy: 193 files
2020-07-20 10:58:19,908 INFO copy file: data/raw/gcn_05_2020/SAx/0000-02Z1L14N_2006-11-09_volume_clean.nrrd to data/raw/gcn_05_2020_sax_excl_ax_patients/SAX
2020-07-20 10:58:19,937 INFO copy file: data/raw/gcn_05_2020/SAx/0000-04NEJQUZ_2007-03-13_volume_clean.nrrd to data/raw/gcn_05_2020_sax_excl_ax_patients/SAX
2020-07-20 10:58:20,029 INFO copy file: data/raw/gcn_05_2020/SAx/0000-0AE4R74L_1900-01-01_volume_clean.nrrd to data/raw/gcn_05_2020_sax_excl_ax_patients/SAX
2020-07-20 10:58:20,046 INFO copy file: data/raw/gcn_05_2020/SAx/0000-1DRHF745_2005-09-01_volume_clean.nrrd to data/raw/gcn_05_2020_sax_excl_ax_patients/SAX
2020-07-20 10:58:20,113 INFO copy file: data/raw/gcn_05_2020/SAx/0000-1FTR75DW_2006-06-09_volume_clean.nrrd to data/raw/gcn_05_2020_sax_excl_ax_patients/SAX
2020-07-20 10:58:20,175 INFO copy file: data/raw/gcn_05_2020/SAx/0000-1KLZMGEG_2007-01-31_volume_clean.nrrd to data/raw/gcn_05_2020_sax_excl_ax_patients/SAX
2020