# This notebook could be used to download and pre-process the ACDC RVIP data and labels and slice them into 2D

In [1]:
# ------------------------------------------define logging and working directory
from ProjectRoot import change_wd_to_project_root
change_wd_to_project_root()
from src.utils.Tensorflow_helper import choose_gpu_by_id

# ------------------------------------------jupyter magic config
%matplotlib inline
%reload_ext autoreload
%autoreload 2
# ------------------------------------------ import helpers
# Notebook_imports should import glob, os, and many other standard libs
from src.utils.Notebook_imports import *
# load helper function to slice 3d_volumes into 2d_slices
from src.data.Dataset import ensure_dir, create_2d_slices_from_3d_volume_files
from ipyfilechooser import FileChooser

search for root_dir and set working directory
Working directory set to: /mnt/ssd/git/rvip_landmark_detection


# Load CMR and mask file names

In [2]:
# Download and unpack the raw data
# small helper
def clean_import(dir_path):
    import shutil
    try:
        shutil.rmtree(dir_path)
    except OSError as e:
        print("Info: %s : %s" % (dir_path, e.strerror))
        print('Dont worry, Directory will be created.')
    ensure_dir(dir_path)

In [3]:
# Please change only the data_root var, e.g. data/import
dataroot_path = FileChooser('data')
dataroot_path.title = '<b>Choose a data root, data will be downloaded and extracted in this dir</b>'
display(dataroot_path)

FileChooser(path='data', filename='', title='HTML(value='<b>Choose a data root, data will be downloaded and ex…

In [4]:
path_to_original_acdc_files = FileChooser('data')
path_to_original_acdc_files.title = '<b>Choose the path to the original "acdc" root-folder with the patient sub-folders</b>'
display(path_to_original_acdc_files)

FileChooser(path='data', filename='', title='HTML(value='<b>Choose the path to the original "acdc" root-folder…

In [5]:
# define a folder for the acdc cmr and masks, make sure not to use an existing folder
data_root = dataroot_path.selected
import_path = os.path.join(data_root, 'import')
ensure_dir(data_root)
clean_import(import_path)

Info: data/import : No such file or directory
Dont worry, Directory will be created.


In [6]:
# download cleaned rvip 3D cmr and masks
!wget https://heibox.uni-heidelberg.de/f/aa4baab97b78481a9bac/?dl=1 -P {import_path}
print('downloaded')
# unzip and replace
zip_file = glob.glob(os.path.join(import_path,'index.html?dl=*'))[0]
!unzip -o {zip_file} -d {data_root}
# clean temp import older
clean_import(import_path)

--2022-01-28 17:59:29--  https://heibox.uni-heidelberg.de/f/aa4baab97b78481a9bac/?dl=1
Resolving heibox.uni-heidelberg.de (heibox.uni-heidelberg.de)... 129.206.7.113
Connecting to heibox.uni-heidelberg.de (heibox.uni-heidelberg.de)|129.206.7.113|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://heibox.uni-heidelberg.de/seafhttp/files/899b0d56-44dc-4259-be1a-abc2517e9fa7/rvips_public.zip [following]
--2022-01-28 17:59:29--  https://heibox.uni-heidelberg.de/seafhttp/files/899b0d56-44dc-4259-be1a-abc2517e9fa7/rvips_public.zip
Reusing existing connection to heibox.uni-heidelberg.de:443.
HTTP request sent, awaiting response... 200 OK
Length: 258932 (253K) [application/zip]
Saving to: ‘data/import/index.html?dl=1’


2022-01-28 17:59:29 (4,68 MB/s) - ‘data/import/index.html?dl=1’ saved [258932/258932]

downloaded
Archive:  data/import/index.html?dl=1
   creating: data/rvips/
   creating: data/rvips/pp/
  inflating: data/rvips/pp/patient001_frame01_rvip.nrr

In [7]:
# remove old and download new cv-dataframe
!rm {data_root}df_kfold.csv -f
!wget https://heibox.uni-heidelberg.de/f/03f57e89dc8b46668144/?dl=1 -P {import_path}
print('downloaded')
# unzip and replace
zip_file = glob.glob(os.path.join(import_path,'index.html?dl=*'))[0]
!unzip -o {zip_file} -d {data_root}
# clean temp import older
clean_import(import_path)

--2022-01-28 17:59:38--  https://heibox.uni-heidelberg.de/f/03f57e89dc8b46668144/?dl=1
Resolving heibox.uni-heidelberg.de (heibox.uni-heidelberg.de)... 129.206.7.113
Connecting to heibox.uni-heidelberg.de (heibox.uni-heidelberg.de)|129.206.7.113|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://heibox.uni-heidelberg.de/seafhttp/files/1e6af05d-a4c0-4aea-9ce9-ab0b3d6f71b5/df_kfold.zip [following]
--2022-01-28 17:59:38--  https://heibox.uni-heidelberg.de/seafhttp/files/1e6af05d-a4c0-4aea-9ce9-ab0b3d6f71b5/df_kfold.zip
Reusing existing connection to heibox.uni-heidelberg.de:443.
HTTP request sent, awaiting response... 200 OK
Length: 20549 (20K) [application/zip]
Saving to: ‘data/import/index.html?dl=1’


2022-01-28 17:59:38 (374 KB/s) - ‘data/import/index.html?dl=1’ saved [20549/20549]

downloaded
Archive:  data/import/index.html?dl=1
  inflating: data/df_kfold.csv       


# Check the folder structure in data_root

In [8]:
# pp == 100 patients x phases xrvip/cmr = 200 files
!tree -L 1 {data_root}

[01;34mdata/[00m
├── df_kfold.csv
├── [01;34mimport[00m
└── [01;34mrvips[00m

2 directories, 1 file


# Collect the original 3D CMR files from an existing ACDC-data directory
(link to original data)[https://acdc.creatis.insa-lyon.fr/description/databases.html]


--> Use the 3D CMR files from the original ACDC download folder

In [10]:
path_to_acdc_original = path_to_original_acdc_files.selected
print('collect 3D CMR from: {}'.format(path_to_acdc_original))
#searches in all patient folders for any 3D CMR (2 frames per patient) as nifti
images = sorted(glob.glob(os.path.join(path_to_acdc_original, '*/*frame[0-9][0-9].nii.gz')))
print('images: {}'.format(len(images)))

collect 3D CMR from: /mnt/ssd/data/acdc/orig_save/all/
images: 200


# Collect 3D CMR and labels/masks in sorted order

--> make sure both lists are of equal length

In [14]:
# quality check of the image and mask names, find names with wrong names
# give input and output path here
input_path = os.path.join(data_root, 'rvips','pp')
export_path = os.path.join(data_root, '2D')
#searches in all first level folders for any mask as nrrd
masks = sorted(glob.glob(os.path.join(input_path, '*frame[0-9][0-9]_rvip.nrrd'))) 
print('images: {}'.format(len(images)))
print('masks: {}'.format(len(masks)))
assert(len(images) == len(masks)), 'len(images)-> {} != len(masks)-> {} '.format(len(images), len(masks))
# in the optimal case there should be as many images as masks. If not, some of the annotations might have been saved with a wrong name.

images: 200
masks: 200


In [15]:
# Slice the 3D vol in 2D slices
ensure_dir(export_path)
[create_2d_slices_from_3d_volume_files(img_f=img,mask_f=msk, export_path=export_path) for img,msk in zip(images,masks)]

[['01', [10, 256, 216]],
 ['12', [10, 256, 216]],
 ['01', [10, 256, 232]],
 ['12', [10, 256, 232]],
 ['01', [10, 256, 256]],
 ['15', [10, 256, 256]],
 ['01', [10, 256, 232]],
 ['15', [10, 256, 232]],
 ['01', [10, 216, 256]],
 ['13', [10, 216, 256]],
 ['01', [11, 256, 232]],
 ['16', [11, 256, 232]],
 ['01', [10, 224, 222]],
 ['07', [10, 224, 222]],
 ['01', [10, 256, 200]],
 ['13', [10, 256, 200]],
 ['01', [10, 256, 208]],
 ['13', [10, 256, 208]],
 ['01', [10, 256, 208]],
 ['13', [10, 256, 208]],
 ['01', [9, 256, 216]],
 ['08', [9, 256, 216]],
 ['01', [10, 256, 184]],
 ['13', [10, 256, 184]],
 ['01', [10, 256, 216]],
 ['14', [10, 256, 216]],
 ['01', [10, 216, 256]],
 ['13', [10, 216, 256]],
 ['01', [9, 216, 256]],
 ['10', [9, 216, 256]],
 ['01', [10, 256, 192]],
 ['12', [10, 256, 192]],
 ['01', [9, 256, 216]],
 ['09', [9, 256, 216]],
 ['01', [8, 256, 216]],
 ['10', [8, 256, 216]],
 ['01', [11, 256, 216]],
 ['11', [11, 256, 216]],
 ['01', [8, 256, 208]],
 ['11', [8, 256, 208]],
 ['01', [1

# Finally check the folder structure in data_root again

In [17]:
# folder 2D should contain the 2D rvip masks and the corresponding CMR slices
!tree -L 1 {data_root}

[01;34mdata/[00m
├── [01;34m2D[00m
├── df_kfold.csv
├── [01;34mimport[00m
└── [01;34mrvips[00m

3 directories, 1 file
