# Post Processing

**In this notebook:**

* Test DBSCAN for post processing

## Dependencies
Install, load, and initialize all required dependencies for this experiment.

### Install Dependencies

In [None]:
#It should be possible to run the notebook independent of anything else. 
# If dependency cannot be installed via pip, either:
# - download & install it via %%bash
# - atleast mention those dependecies in this section

import sys
!{sys.executable} -m pip install -q -e ../../utils/

^C
[31mERROR: Operation cancelled by user[0m
You should consider upgrading via the '/opt/jupyterhub/bin/python3 -m pip install --upgrade pip' command.[0m


# System libraries

In [25]:
from __future__ import absolute_import, division, print_function
import logging, os, sys

# Enable logging
logging.basicConfig(format='[%(levelname)s] %(message)s', level=logging.INFO, stream=sys.stdout)

# Re-import packages if they change
%load_ext autoreload
%autoreload 2

# Recursion Depth
import sys
sys.setrecursionlimit(10000)

# Intialize tqdm to always use the notebook progress bar
import tqdm
tqdm.tqdm = tqdm.tqdm_notebook

# Third-party libraries
import comet_ml
import numpy as np
import pandas as pd
import nilearn.plotting as nip
import matplotlib.pyplot as plt
import nibabel as nib
import numpy as np
import collections
%matplotlib inline
plt.rcParams["figure.figsize"] = (12,6)
%config InlineBackend.figure_format='retina'  # adapt plots for retina displays
import git


# Project utils

import aneurysm_utils
from aneurysm_utils import evaluation, training


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [26]:
if "workspace" in os.getcwd():
    ROOT = "/workspace"
elif "/group/cake" in os.getcwd(): 
    ROOT = "/group/cake"

### Initialize Environment

In [27]:
env = aneurysm_utils.Environment(project="our-git-project", root_folder=ROOT)
env.cached_data["comet_key"] = "EGrR4luSis87yhHbs2rEaqAWs" 
env.print_info()

Environment Info:

Library Version: 0.1.0
Configured Project: our-git-project

Folder Structure: 
- Root folder: /group/cake
 - Project folder: /group/cake/our-git-project
 - Datasets folder: /data/training
 - Models folder: /group/cake/our-git-project/models
 - Experiments folder: /group/cake/our-git-project/experiments


## Load Data
Download, explore, and prepare all required data for the experiment in this section.

In [28]:
dataset_params = {
    "prediction": "labeled",
    "mri_data_selection": "", 
    "balance_data": False,
    "seed": 1,
    "resample_voxel_dim": (1,1,1)
}

preprocessing_params = {
    'min_max_normalize': None,
    'mean_std_normalize': False,
    'smooth_img': False, # can contain a number: smoothing factor
}


### Load Meta Data

In [29]:
from aneurysm_utils.data_collection import load_aneurysm_dataset

df = load_aneurysm_dataset(
    env,
    mri_data_selection=dataset_params["mri_data_selection"],
    random_state=dataset_params["seed"]
)
df.head()

Unnamed: 0,Aneurysm Geometry,Angiography Data,Vessel Geometry,Labeled Mask Index,Location,Age,Sex,Rupture Status,Age Bin,Aneurysm Count,Case,Path Orig,Path Mask,Path Vessel,Path Labeled Mask
0,A001.stl,A001_orig.nii.gz,A001_vessel.stl,1,Acom,48,m,1.0,"(40, 50]",1,A001,/data/training/A001_orig.nii.gz,/data/training/A001_masks.nii.gz,/data/training/A001_vessel.nii.gz,/data/training/A001_labeledMasks.nii.gz
1,A003.stl,A003_orig.nii.gz,A003_vessel.stl,1,Pcom,58,f,0.0,"(50, 60]",1,A003,/data/training/A003_orig.nii.gz,/data/training/A003_masks.nii.gz,/data/training/A003_vessel.nii.gz,/data/training/A003_labeledMasks.nii.gz
2,A005.stl,A005_orig.nii.gz,A005_vessel.stl,1,PICA,45,m,1.0,"(40, 50]",1,A005,/data/training/A005_orig.nii.gz,/data/training/A005_masks.nii.gz,/data/training/A005_vessel.nii.gz,/data/training/A005_labeledMasks.nii.gz
3,A006.stl,A006_orig.nii.gz,A006_vessel.stl,1,ACom,46,f,1.0,"(40, 50]",1,A006,/data/training/A006_orig.nii.gz,/data/training/A006_masks.nii.gz,/data/training/A006_vessel.nii.gz,/data/training/A006_labeledMasks.nii.gz
4,A008.stl,A008_orig.nii.gz,A008_vessel.stl,1,ACA,72,f,0.0,"(70, 80]",1,A008,/data/training/A008_orig.nii.gz,/data/training/A008_masks.nii.gz,/data/training/A008_vessel.nii.gz,/data/training/A008_labeledMasks.nii.gz


In [30]:
from aneurysm_utils.data_collection import split_mri_images

train_data, test_data, val_data, _ = split_mri_images(
    env, 
    df, 
    prediction=dataset_params["prediction"], 
    encode_labels=False,
    random_state=dataset_params["seed"],
    balance_data=dataset_params["balance_data"],
    resample_voxel_dim=dataset_params["resample_voxel_dim"]
)
mri_imgs_test, labels_test = test_data

train_data, test_data, val_data, _ = split_mri_images(
    env, 
    df, 
    prediction="mask", 
    encode_labels=False,
    random_state=dataset_params["seed"],
    balance_data=dataset_params["balance_data"],
    resample_voxel_dim=dataset_params["resample_voxel_dim"]
)
mri_imgs_test, invidual_labels_test = test_data

109
98
         Images
-----  --------
All         109
Train        87
Val          11
Test         11



  0%|          | 0/87 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

109
98
         Images
-----  --------
All         109
Train        87
Val          11
Test         11



  0%|          | 0/87 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

!free -m

from aneurysm_utils import data_collection,postprocessing 
mir_images, vessel_labels, patients = data_collection.load_mri_images(env,df,prediction="vessel",case_list=case_list,resample_voxel_dim=None)

case_list=["A001","A003","A005","A006","A008","A009","A010","A012","A014","A015","A016","A017_L_","A018","A019"]

mir_images, aneurysm_labels, patients = data_collection.load_mri_images(env,df,prediction="mask",case_list=case_list)
mir_images, invidiual_aneurysm_labels, patients = data_collection.load_mri_images(env,df,prediction="labeled",case_list=case_list)


In [102]:
new= np.load("../markus/predictions.npy")
converter = map(lambda x: x[0], new)
aneurysm_labels = list(converter)
len(aneurysm_labels)

162

In [103]:
for image in aneurysm_labels:
    print(np.unique(image))

[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]


In [84]:
def unify_alternativ(image, patch_size,most_common_shape):
    """
    image: numpy.array
          shape is (number_of_patches,h,w,d) , number_of_patches,heigth,width,depth
    patch_size: 64

    most_common_shape: tuple
                for example(139,139,120)

    Return

    image: np.array

          unified and unpatch image, shape (most_common_shape)


    """
    number_of_patches,heigth,width,depth = image.shape

    dim = np.array(most_common_shape)# size of the image
    n_patches = np.ceil(dim/patch_size).astype(int) # calculates the number of patches for each dim, to cover all voxel at least once in form e.g[3,3,2]
    rest  = n_patches * patch_size%dim ## calculates number of entries for each dimension which overlapp, means for example n_patches = 18 and 64 we have rest = 53
    
    h,w,d = most_common_shape

    ## initializing empty array for the unified image
    unified_image =  np.empty([h,w,d])

    counter = 0 ## counter for patches
    print(counter)

    
    for i in range(n_patches[0]):

        if i == n_patches[0]-1: ## only the last cube is an overlapped cube
          start_x = i*patch_size-rest[0]
          stop_x= (i+1)* patch_size-rest[0]

        else:    
          start_x = i*patch_size
          stop_x = (i+1)* patch_size



        for j in range(n_patches[1]):
            if j == n_patches[1]-1: ## only the last cube is an overlapped cube
                start_y = j*patch_size-rest[1]
                stop_y= (j+1)* patch_size-rest[1]

            else:    
                start_y = j*patch_size
                stop_y = (j+1)* patch_size

            for k in range(n_patches[2]):
                if k == n_patches[2]-1: 
                    start_z = k*patch_size-rest[2]
                    stop_z = (k+1)* patch_size-rest[2]

                else:    
                    start_z = k*patch_size
                    stop_z = (k+1)* patch_size


              ##maximum between channel one or two, probably not necessary
              #max_class = np.maximum(image[n,0,:,:,:],image[n,1,:,:,:])

              ###includes overlapping case
              #unified_image[start_x:stop_x,start_y:stop_y,start_z:stop_z] = np.maximum(unified_image[start_x:stop_x,start_y:stop_y,start_z:stop_z],max_class)
                unified_image[start_x:stop_x,start_y:stop_y,start_z:stop_z] = np.maximum(unified_image[start_x:stop_x,start_y:stop_y,start_z:stop_z],image[counter,:,:,:])
                
                counter+=1## next patch
                
                if (counter==18):
                    break;
    
    return unified_image

In [85]:
def patch_unifier_alternativ(list_patches,size_test_set,most_common_shape,patch_size):
  """
  list_patches: list
                containing predictions from the evaluations, length should be number_of_patches x length_test_set
                each element should have the form (h,w,d)
  size_test_set: int

  most_common_shape: tuple
                most_common shape from the original input images before patchifying
  
  patch_size: int



  Return:

  unified_images: list
                contains the unpatched images 
                each element has the shape:(most_common_shape) 

  """
  
  dim = np.array(most_common_shape)
  n_patches = np.ceil(dim/patch_size).astype(int)# output is number of patches per dimension

  number_of_patches = np.prod(n_patches)# number of patches overall

  h,w,d = most_common_shape
  
  unified_images = []
  assert (len(list_patches)/(number_of_patches))== size_test_set
  

  ##output list: each element has the form (number_of_patches,n_classes,h,w,d)
  images = np.split(np.array(list_patches),size_test_set)  

  for n in range(size_test_set):
        
        unified_images.append(unify_alternativ(images[n],patch_size,most_common_shape))
  assert len(unified_images) == size_test_set
  return unified_images

In [104]:
aneurysm_labels_new=patch_unifier_alternativ(aneurysm_labels,9,(139,139,120),64)


0
0
0
0
0
0
0
0
0


In [105]:
len(aneurysm_labels_new)

9

In [137]:

postprocess_dict={ 
    "dbscan":True,
    "remove_border_candidates":True,
    "resample":True,
    "evaluate_dbscan":True,
    "invidual_aneurysm_labels":invidual_labels_test,
    "size":10
                      
}

In [112]:
for image in aneurysm_labels_new:
    print(np.unique(image))

[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]


In [128]:
for image in invidual_labels_test:
    print(np.unique(image))

[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]


In [147]:
from aneurysm_utils import postprocessing
mri_imgs= postprocessing.postprocess(env,aneurysm_labels_new,postprocess_dict)

[INFO] Postprocessing: DBSCAN...
labels=[-1  0  1  2  3  4  5  6  7  8  9]
[0.]
labels=[-1  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
[ 0. 10. 19. 20.]
labels=[-1  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
[ 0. 17.]
labels=[-1  0  1  2  3  4  5  6  7  8]
[0. 7.]
labels=[-1  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
[0.]
labels=[-1  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]
[0. 6.]
labels=[-1  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22
 23]
[ 0. 18. 22. 24.]
labels=[-1  0  1  2  3  4  5  6  7  8  9]
[ 0. 10.]
labels=[-1  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17]
[ 0. 11.]
[0.]


TypeError: 'int' object is not callable

In [120]:
bounding_boxes =postprocessing.bounding_boxes(mri_imgs,patients)

[0. 1.]
[-0.  1.]
[0. 1.]
[0. 1.]
[0. 1.]
[0. 1.]
[-0.  1.]
[0. 1.]
[0. 1.]
[-0.  1.]
[0. 1.]
[0. 1.]
[0. 1.]


In [124]:
print(vessel_labels[9].shape)
mri_imgs[9].shape

(256, 256, 220)


(256, 256, 220)

In [1]:
evaluation.draw_bounding_box(bounding_boxes[9]["candidates"],aneurysm_array=mri_imgs[9],vessel_array=vessel_labels[9])

NameError: name 'evaluation' is not defined

In [154]:
from aneurysm_utils import evaluation

(256, 256, 220)


In [None]:
score_dict2=evaluation.calc_scores_task_2(aneurysm_labels,aneurysm_labels,mir_imgs,invidual_aneurysm_labels)
score_dict1=evaluation.calc_scores_task_1(mri_imgs,invidual_aneurysm_labels,bounding_boxes)

In [None]:
final_score =evaluation.calc_total_segmentation_score(score_dict2)