# Use the model in a inference behaviour

1. Load a cv config with all experiment parameters
2. Load the corresponding data, 
3. create a train and validation generator with the given parameters, exclusive the augmentation parameters
4. reconstruct the model with the given parameters, (we have custom loss functions, simple model.load() will not work)
5. load and apply the corresponding weights (with respect to the distributed training strategy)
6. predict the target vectors with the train and val generators (make sure that we change the batchsize to 1, and avoid shuffle so that we get all files)
7. write the gt and predictions as numpy into the corresponding experiment folder

In [1]:
# ------------------------------------------define logging and working directory
from ProjectRoot import change_wd_to_project_root
change_wd_to_project_root()
from src.utils.Tensorflow_helper import choose_gpu_by_id
# ------------------------------------------define GPU id/s to use
GPU_IDS = '0,1'
GPUS = choose_gpu_by_id(GPU_IDS)
print(GPUS)
# ------------------------------------------jupyter magic config
%matplotlib inline
%reload_ext autoreload
%autoreload 2
# ------------------------------------------ import helpers
# this should import glob, os, and some other standard libs to keep this cell clean
# local imports
from src.utils.Notebook_imports import *
from src.utils.Utils_io import Console_and_file_logger, init_config

# import external libs
from tensorflow.python.client import device_lib
import tensorflow as tf
tf.get_logger().setLevel('ERROR')
import cv2
import pandas as pd
import numpy as np
import SimpleITK as sitk
from ipyfilechooser import FileChooser
from src.data.Generators import DataGenerator


search for root_dir and set working directory
Working directory set to: /mnt/ssd/git/wft21_septum_landmark_detection
['/gpu:0', '/gpu:1']


# Load a config into the global namespace

In [2]:
# Select config of training that you trained on. If Experiment Root was changed change the input-string of the FileChooser aswell.
exp_config_chooser = FileChooser(os.path.join(os.getcwd(),'exp/'), '')
display(exp_config_chooser)
@interact_manual
def load_config():

    global exp_config_chooser, config
    """
    load an experiment config
    """
    if 'exp_config_chooser' in globals():
        config_file  = exp_config_chooser.selected
    else:
        print('no config chooser found')

    # load the experiment config
    with open(config_file, encoding='utf-8') as data_file:
        config = json.loads(data_file.read())
    globals().update(config)
    Console_and_file_logger(EXPERIMENT, logging.INFO)
    logging.info('Loaded config for experiment: {}'.format(config['EXPERIMENT']))

FileChooser(path='/mnt/ssd/git/wft21_septum_landmark_detection/exp', filename='', title='HTML(value='', layout…

interactive(children=(Button(description='Run Interact', style=ButtonStyle()), Output()), _dom_classes=('widge…

# Load Dataframe based on .csv with fold information. 
## Manually input desired fold!

In [7]:
df = pd.read_csv(DF_FOLDS)
# df.head() # shows dataframe with all files and all folds


# Input fold on which you are working. Only the validation data of each fold will be predicted. 
# Load dataframe with all patients of the corresponding fold
df = df[df['fold']==0] 
df.head() 


# useful for troubleshooting , showcases entire dataframe:
# pd.set_option("display.max_rows", None, "display.max_columns", None) 
# print(df)

Unnamed: 0,fold,modality,pathology,patient,x_path,y_path
1344,0,train,RV,patient083,data/raw/ACDC/2D/train/patient083__t01_z0_img....,data/raw/ACDC/2D/train/patient083__t01_z0_msk....
1345,0,train,RV,patient083,data/raw/ACDC/2D/train/patient083__t01_z1_img....,data/raw/ACDC/2D/train/patient083__t01_z1_msk....
1346,0,train,RV,patient083,data/raw/ACDC/2D/train/patient083__t01_z2_img....,data/raw/ACDC/2D/train/patient083__t01_z2_msk....
1347,0,train,RV,patient083,data/raw/ACDC/2D/train/patient083__t01_z3_img....,data/raw/ACDC/2D/train/patient083__t01_z3_msk....
1348,0,train,RV,patient083,data/raw/ACDC/2D/train/patient083__t01_z4_img....,data/raw/ACDC/2D/train/patient083__t01_z4_msk....


# Pandas Dataframe

# Load the corresponding file names for this fold

In [9]:
# Load SAX volumes
from src.data.Dataset import get_trainings_files
x_train_sax, y_train_sax, x_val_sax, y_val_sax = get_trainings_files(data_path=DATA_PATH_SAX,
                                                                     path_to_folds_df=DF_FOLDS,
                                                                     fold=FOLD)
logging.info('SAX train CMR: {}, SAX train masks: {}'.format(len(x_train_sax), len(y_train_sax)))
logging.info('SAX val CMR: {}, SAX val masks: {}'.format(len(x_val_sax), len(y_val_sax)))

2021-07-11 14:34:58,604 INFO Found 1902 images/masks in /mnt/ssd/data/WFT_MRT21/Export_2021-05-26_16_03
2021-07-11 14:34:58,604 INFO Patients train: 75
2021-07-11 14:34:58,664 INFO Selected 1406 of 1902 files with 75 of 100 patients for training fold 3
2021-07-11 14:34:58,665 INFO SAX train CMR: 1406, SAX train masks: 1406
2021-07-11 14:34:58,665 INFO SAX val CMR: 496, SAX val masks: 496


# Load the model, load and set the corresponding weights

In [6]:
# Load Neural Network based on config.
import src.models.Unets as modelmanager
# create a model
logging.info('Create model')
model = modelmanager.create_unet(config)
model.load_weights(os.path.join(config['MODEL_PATH'],'model.h5'))
logging.info('loaded model weights as h5 file')
model.summary()

2021-07-11 14:34:11,679 INFO Create model


using tensorflow, need to monkey patch
tf.python.backend.slice overwritten by monkey patch
(None, 128, 128, 1)


2021-07-11 14:34:13,288 INFO loaded model weights as h5 file


Model: "unet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 128, 128, 1) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 128, 128, 32) 320         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 128, 128, 32) 128         conv2d[0][0]                     
__________________________________________________________________________________________________
dropout (Dropout)               (None, 128, 128, 32) 0           batch_normalization[0][0]        
_______________________________________________________________________________________________

# Save gt and pred into experiment folder
saves groundtruth based on np.stack with gts and predictions as one numpy stack under the pred_filename as a .npy file. The numpy stack is later used to create the images based on the arrays


# fast hack which aligns the patients

In [10]:
# get a list of patients for fast testing
patients = [p for p in sorted(df['patient'].unique())]
#patients

Workaround to align data from generators with dataframe based on .csv. 
The idea is to create one generator per patient.

This means that the original CMR picture and ground-truth are written out in new dimensions aswell, so the predictions can be compared and evaluated using our evaluation script and MITK. This shortcut enables us to skip undoing the generator steps. 

Each patient has two 3D-Volumes, corresponding to the end diastolic (ed) and end systolic (es) phase of the cardiac cycle. 
The amount of slices remain the same across volumes of the same patient, allowing us to sort the files and half them to reconstruct the corresponding two 3d-volumes.

In [11]:
# in short: create one generator per patient
# read config and initialize DataGenerator with given parameters, reading from config. Shuffle and Augmentation have to be off, Batchsize of 1 is required
# logging.getLogger().setLevel(logging.INFO)
from src.data.Generators import DataGenerator
from logging import info
config['SHUFFLE'] = False
config['AUGMENT'] = False
config['AUGMENT_GRID'] = False# make sure no augmentation will be applied to the validation data
config['HIST_MATCHING'] = False
config['BATCHSIZE'] = 1
# by this we can have a different set of parameters for both generators
val_config = config.copy()

# change export_root if needed
export_root = '/mnt/ssd/git/wft21_septum_landmark_detection/data/temp_predictions/k-fold_fold3'
ensure_dir(export_root)

# only chose data from the test split of each cross-validation-fold
df_fold = df[df['modality']=='test']
#print(df_fold)

# filter a list of filenames by a patient id, this is necessary as the filepath in our df differs from the real filenames
def filter_by_patient_id(p_id, f_names):
    return [elem for elem in f_names if p_id in elem]


# show only data on 'unique' patients to sum up folds and slices
for p in sorted(df_fold['patient'].unique()): # for each patient
    info(p) # shows which patient we are at
    # load files and masks for given patient
    files_ = filter_by_patient_id(p, x_val_sax)
    masks_ = filter_by_patient_id(p, y_val_sax)
    info(len(files_)) # shows amount of slices for each patient
    # collect all files for this patient
    # split in ED and ES, using the fact that both have the same amount of slices and the data is sorted.
    ed_f = files_[:len(files_)//2]
    es_f = files_[len(files_)//2:]
    ed_m = masks_[:len(masks_)//2]
    es_m = masks_[len(masks_)//2:]
    f_ = [ed_f, es_f]
    m_ = [ed_m, es_m]
    phases = ['ED', 'ES']
    assert(len(ed_m)==len(ed_f)), 'number of images and masks should be the same, something went wrong'
    info('length of ed_f ' + str(len(ed_f)))
    info('length of es_f ' + str(len(es_f)))
    #print('this is ed_f ' + ed_f)
    #print('this is es_f ' + es_f)
    
    # the following is looped twice so both phases, ED and ES are processed.
    for p_ in range(2):
        phase_cmr_files = f_[p_] 
        phase_mask_files = m_[p_]
        current_phase = phases[p_]
        info('patient: {}, phase: {}, files: {}'.format(p, current_phase, len(phase_cmr_files)))
        
        # create validation generator just for the given patient and fold. This means that each patient requires two generators. The work around allows us to align the info from the .csv with the data from the generators. 
        validation_generator = DataGenerator(phase_cmr_files, phase_mask_files, config=val_config)

        # get cmr mask and save in a numpy.stack
        gts = np.stack([np.squeeze(y) for x, y in validation_generator])
        logging.info('groundtruth shape' + str(gts.shape))
        #get cmr image and save in a numpy stack
        gts_cmr = np.stack([np.squeeze(x) for x, y in validation_generator])
        logging.info('original cmr shape' + str(gts_cmr.shape))

        # predict on the validation generator
        preds = model.predict(validation_generator)
        logging.info(preds.shape)

        # upper_RVIP = 1, lower_RVIP == 2. Corresponds to annotation guide. 
        # transform to int representation (one-hot-encoded)
        # create data based on ground-truth
        gts_flat = np.zeros((gts.shape[:-1]))
        gts_flat[gts[...,0]>0.5] = 1
        gts_flat[gts[...,1]>0.5] = 2
        
        # create data based on predictions
        preds_flat = np.zeros((gts.shape[:-1]))
        preds_flat[preds[...,0]>0.5] = 1
        preds_flat[preds[...,1]>0.5] = 2
        
        info(gts_flat.shape)
        info(preds_flat.shape)
        info(gts_cmr.shape)
        
        # Read image data from Array using sitk library
        gt_sitks = sitk.GetImageFromArray(gts_flat.astype(np.uint8))
        pred_sitks = sitk.GetImageFromArray(preds_flat.astype(np.uint8))
        gt_cmr_sitks = sitk.GetImageFromArray(np.stack(gts_cmr, axis = 0))
        
     
        # Writing images to storage from previously loaded images. 
        sitk.WriteImage(gt_sitks, os.path.join(export_root, '{}_{}_gt.nrrd'.format(p, current_phase)))
        sitk.WriteImage(pred_sitks, os.path.join(export_root, '{}_{}_pred.nrrd'.format(p, current_phase)))
        sitk.WriteImage(gt_cmr_sitks, os.path.join(export_root, '{}_{}_cmr.nrrd'.format(p, current_phase)))
        #sitk.WriteImage(sitk.GetImageFromArray(np.stack(gts_cmr[:10], axis=0)), '/mnt/ssd/git/wft21_septum_landmark_detection/data/temp/3d_new_temp_cmr.nrrd')

logging.info('done! Check the ' + export_root + 'folder for files')

2021-07-11 14:35:08,166 INFO patient007
2021-07-11 14:35:08,166 INFO 20
2021-07-11 14:35:08,167 INFO length of ed_f 10
2021-07-11 14:35:08,167 INFO length of es_f 10
2021-07-11 14:35:08,167 INFO patient: patient007, phase: ED, files: 10
2021-07-11 14:35:08,168 INFO Create DataGenerator
2021-07-11 14:35:08,168 INFO Datagenerator created with: 
 shape: [128, 128]
 spacing: [1.8, 1.8]
 batchsize: 1
 Scaler: MinMax
 Images: 10 
 Augment: False 
 Thread workers: 8
2021-07-11 14:35:08,169 INFO No augmentation
2021-07-11 14:35:08,236 INFO groundtruth shape(10, 128, 128, 2)
2021-07-11 14:35:08,270 INFO original cmr shape(10, 128, 128)
2021-07-11 14:35:09,926 INFO (10, 128, 128, 2)
2021-07-11 14:35:09,928 INFO (10, 128, 128)
2021-07-11 14:35:09,929 INFO (10, 128, 128)
2021-07-11 14:35:09,929 INFO (10, 128, 128)
2021-07-11 14:35:09,932 INFO patient: patient007, phase: ES, files: 10
2021-07-11 14:35:09,933 INFO Create DataGenerator
2021-07-11 14:35:09,933 INFO Datagenerator created with: 
 shape:

# Undo the generator steps

- if you want to plot the predictions onto the original CMR images, you need to reverse the generator steps. This is currently not implemented nor needed.

- If we have masks, undo the cropping/padding, resampling etc. so that our masks have the same size/spacing and name as our input volumes
- If we have regression coordinates, make sure that they could be applied on the input volumes