## Test IoU script

In [1]:
import os
import random

import cv2 as cv
import numpy as np
import pandas as pd
import nibabel as nib
import matplotlib.pyplot as plt

from typing import Tuple
import numpy as np
import pandas as pd
import tensorflow as tf
from keras import backend as K

In [2]:
os.chdir('..')

In [3]:
from model.metric_utils import calculate_iou
from preprocessing.get_ct_scan_information import build_train_test_df
from model.img_generator import DataGenerator2D

In [4]:
data_path_source_dir = os.path.join('ml4h_proj1_colon_cancer_ct')

train_images_dir = os.path.join(data_path_source_dir, 'imagesTr')

train_images_label_dir = os.path.join(data_path_source_dir, 'labelsTr')

train_images_path_list = [os.path.join(train_images_dir, filename) for filename in os.listdir(train_images_dir) if filename != '.DS_Store' and '._' not in filename]
train_images_labels_path_list = [os.path.join(train_images_label_dir, filename) for filename in os.listdir(train_images_label_dir) if filename != '.DS_Store' and '._' not in filename]

### Let's check if the IoU base function works

In [5]:
a = nib.load(train_images_labels_path_list[0]).get_data()


* deprecated from version: 3.0
* Will raise <class 'nibabel.deprecator.ExpiredDeprecationError'> as of version: 5.0
  """Entry point for launching an IPython kernel.


In [6]:
a.shape

(512, 512, 60)

In [7]:
calculate_iou(target=a, prediction=cv.GaussianBlur(a, (5,5), 0) > 0.5)

0.9988795518207283

In [8]:
calculate_iou(target=a, prediction=cv.flip(a, 0) > 0.5)

0.0

In [9]:
calculate_iou(target=a, prediction=cv.flip(cv.flip(a, 0), 0) > 0.5)

1.0

# Let's check the behavior of the augmentation operations of the generator

In [10]:
random.seed(123)
np.random.seed(123)

Let's choose a set of images that have cancer labeled in them 

In [11]:
tr_df, x_ts_df = build_train_test_df(data_path_source_dir)

In [12]:
cancer_pixels_df = pd.read_pickle('cancer_pixels_df')
cancer_pixels_df.reset_index(inplace=True)

In [13]:
cancer_pixels_df['index'] = cancer_pixels_df.image_name.map(lambda str_: str_.split('.nii.gz')[0].split('colon_')[1])

In [14]:
tr_df_only_cancer = cancer_pixels_df.set_index(['index', 'depth_i'])[['cancer_pixel_area']].join(tr_df, how='inner')

### Identity case

In [15]:
def calculate_iou_df(df_: pd.DataFrame, img_dims: Tuple, model_,
                     pixel_threshold: float = 0.5, prediction_batch_size: int = 32) \
        -> Tuple[pd.DataFrame, list, list]:
    """

    :param df_:
    :param img_dims:
    :param model_:
    :param pixel_threshold:
    :param prediction_batch_size:
    :return:
    """

    iou_list = list()
    y_pred_list = list()
    y_list = list()

    for img_dx, df_ in df_.groupby(level=0):
        img_i_generator = DataGenerator2D(df=df_, x_col='x_tr_img_path', y_col=None,
                                          batch_size=prediction_batch_size, num_classes=None, shuffle=False,
                                          resize_dim=img_dims)

        label_i_generator = DataGenerator2D(df=df_, x_col='x_tr_img_path', y_col='y_tr_img_path',
                                            batch_size=prediction_batch_size, num_classes=None, shuffle=False,
                                            resize_dim=None)

        # Predict for a group of cuts of the same image
        for i, ((X_cut_i, _), (_, y_cut_i)) in enumerate(zip(img_i_generator, label_i_generator)):

            # y_cut_i_predict = model_.predict(X_cut_i)
            y_cut_i_predict = y_cut_i

            # Resize prediction to match label mask dimensions and restack
            #  the predictions so that hey are channel last
            for j, depth_i in enumerate(range(X_cut_i.shape[0])):
                y_cut_i_predict_resized_j = cv.resize(
                    y_cut_i_predict[j, :, :], y_cut_i.shape[1:],
                    interpolation=cv.INTER_CUBIC)  # INTER_LINEAR is faster but INTER_CUBIC is better

                # Add extra dim at the end
                y_cut_i_predict_resized_j = y_cut_i_predict_resized_j.reshape(y_cut_i_predict_resized_j.shape + (1,))
                y_cut_i_j = y_cut_i[j, :, :].reshape(y_cut_i[j, :, :].shape + (1,))

                if j == 0:
                    y_cut_i_predict_resized = y_cut_i_predict_resized_j
                    y_cut_i_restacked = y_cut_i_j

                else:
                    y_cut_i_predict_resized = np.concatenate([y_cut_i_predict_resized, y_cut_i_predict_resized_j],
                                                             axis=2)
                    y_cut_i_restacked = np.concatenate([y_cut_i_restacked, y_cut_i_j], axis=2)

            # When there is only one image in the minibatch it adds an extra dimension
            if len(y_cut_i_restacked.shape) > 3:
                y_cut_i_restacked = np.squeeze(y_cut_i_restacked, axis=3)

            # Now stack the minibatches along the 3rd axis to complete the 3D image
            if i == 0:
                y_i_predict_3d = y_cut_i_predict_resized
                y_i_3d = y_cut_i_restacked

            else:
                y_i_predict_3d = np.concatenate([y_i_predict_3d, y_cut_i_predict_resized], axis=2)
                y_i_3d = np.concatenate([y_i_3d, y_cut_i_restacked], axis=2)

        y_pred_list.append(y_i_predict_3d)
        y_list.append(y_i_3d)

        # Measure IoU over entire 3D image after concatenating all of the cuts
        iou_list.append({'index': img_dx,
                         'iou': calculate_iou(target=y_i_3d, prediction=(y_i_predict_3d > pixel_threshold) * 1)})

    # Let's convert the iou to a pandas dataframe
    iou_df = pd.DataFrame(iou_list).set_index('index')

    return iou_df, y_list, y_pred_list

In [16]:
sample_imgs_indx = tr_df.reset_index(level=0)['index'].sample(10)

In [17]:
sample_tr_df = tr_df.loc[pd.IndexSlice[sample_imgs_indx, :], :]

In [18]:
iou_df, _, _ = calculate_iou_df(df_=sample_tr_df, img_dims=(512, 512), model_=None,
                          pixel_threshold= 0.5, prediction_batch_size= 32)

In [19]:
iou_df.iou.isna().sum()

0

In [20]:
iou_df.iou.mean()

1.0

In [21]:
iou_df

Unnamed: 0_level_0,iou
index,Unnamed: 1_level_1
11,1.0
28,1.0
31,1.0
88,1.0
111,1.0
124,1.0
162,1.0
164,1.0


### Use Gaussian blur to reduce concordancy

In [22]:
def calculate_iou_df(df_: pd.DataFrame, img_dims: Tuple, model_,
                     pixel_threshold: float = 0.5, prediction_batch_size: int = 32) \
        -> Tuple[pd.DataFrame, list, list]:
    """

    :param df_:
    :param img_dims:
    :param model_:
    :param pixel_threshold:
    :param prediction_batch_size:
    :return:
    """

    iou_list = list()
    y_pred_list = list()
    y_list = list()

    for img_dx, df_ in df_.groupby(level=0):
        img_i_generator = DataGenerator2D(df=df_, x_col='x_tr_img_path', y_col=None,
                                          batch_size=prediction_batch_size, num_classes=None, shuffle=False,
                                          resize_dim=img_dims)

        label_i_generator = DataGenerator2D(df=df_, x_col='x_tr_img_path', y_col='y_tr_img_path',
                                            batch_size=prediction_batch_size, num_classes=None, shuffle=False,
                                            resize_dim=None)

        # Predict for a group of cuts of the same image
        for i, ((X_cut_i, _), (_, y_cut_i)) in enumerate(zip(img_i_generator, label_i_generator)):

            #y_cut_i_predict = model_.predict(X_cut_i)
            y_cut_i_predict = cv.GaussianBlur(y_cut_i, (11,11), 0)

            # Resize prediction to match label mask dimensions and restack
            #  the predictions so that hey are channel last
            for j, depth_i in enumerate(range(X_cut_i.shape[0])):
                y_cut_i_predict_resized_j = cv.resize(
                    y_cut_i_predict[j, :, :], y_cut_i.shape[1:],
                    interpolation=cv.INTER_CUBIC)  # INTER_LINEAR is faster but INTER_CUBIC is better

                # Add extra dim at the end
                y_cut_i_predict_resized_j = y_cut_i_predict_resized_j.reshape(y_cut_i_predict_resized_j.shape + (1,))
                y_cut_i_j = y_cut_i[j, :, :].reshape(y_cut_i[j, :, :].shape + (1,))

                if j == 0:
                    y_cut_i_predict_resized = y_cut_i_predict_resized_j
                    y_cut_i_restacked = y_cut_i_j

                else:
                    y_cut_i_predict_resized = np.concatenate([y_cut_i_predict_resized, y_cut_i_predict_resized_j],
                                                             axis=2)
                    y_cut_i_restacked = np.concatenate([y_cut_i_restacked, y_cut_i_j], axis=2)

            # When there is only one image in the minibatch it adds an extra dimension
            if len(y_cut_i_restacked.shape) > 3:
                y_cut_i_restacked = np.squeeze(y_cut_i_restacked, axis=3)

            # Now stack the minibatches along the 3rd axis to complete the 3D image
            if i == 0:
                y_i_predict_3d = y_cut_i_predict_resized
                y_i_3d = y_cut_i_restacked

            else:
                y_i_predict_3d = np.concatenate([y_i_predict_3d, y_cut_i_predict_resized], axis=2)
                y_i_3d = np.concatenate([y_i_3d, y_cut_i_restacked], axis=2)

        y_pred_list.append(y_i_predict_3d)
        y_list.append(y_i_3d)

        # Measure IoU over entire 3D image after concatenating all of the cuts
        iou_list.append({'index': img_dx,
                         'iou': calculate_iou(target=y_i_3d, prediction=(y_i_predict_3d > pixel_threshold) * 1)})

    # Let's convert the iou to a pandas dataframe
    iou_df = pd.DataFrame(iou_list).set_index('index')

    return iou_df, y_list, y_pred_list


In [23]:
iou_df, _, _ = calculate_iou_df(df_=sample_tr_df, img_dims=(512, 512), model_=None,
                          pixel_threshold= 0.5, prediction_batch_size= 32)

In [24]:
iou_df.iou.isna().sum()

0

In [25]:
iou_df.iou.mean()

0.5708457330442347

In [26]:
iou_df

Unnamed: 0_level_0,iou
index,Unnamed: 1_level_1
11,0.56379
28,0.431753
31,0.703364
88,0.48227
111,0.831689
124,0.741255
162,0.402155
164,0.41049


### Use flipping

In [27]:
def calculate_iou_df(df_: pd.DataFrame, img_dims: Tuple, model_,
                     pixel_threshold: float = 0.5, prediction_batch_size: int = 32) \
        -> Tuple[pd.DataFrame, list, list]:
    """

    :param df_:
    :param img_dims:
    :param model_:
    :param pixel_threshold:
    :param prediction_batch_size:
    :return:
    """

    iou_list = list()
    y_pred_list = list()
    y_list = list()

    for img_dx, df_ in df_.groupby(level=0):
        img_i_generator = DataGenerator2D(df=df_, x_col='x_tr_img_path', y_col=None,
                                          batch_size=prediction_batch_size, num_classes=None, shuffle=False,
                                          resize_dim=img_dims)

        label_i_generator = DataGenerator2D(df=df_, x_col='x_tr_img_path', y_col='y_tr_img_path',
                                            batch_size=prediction_batch_size, num_classes=None, shuffle=False,
                                            resize_dim=None)

        # Predict for a group of cuts of the same image
        for i, ((X_cut_i, _), (_, y_cut_i)) in enumerate(zip(img_i_generator, label_i_generator)):

            #y_cut_i_predict = model_.predict(X_cut_i)
            y_cut_i_predict = cv.flip(y_cut_i, 0)

            # Resize prediction to match label mask dimensions and restack
            #  the predictions so that hey are channel last
            for j, depth_i in enumerate(range(X_cut_i.shape[0])):
                y_cut_i_predict_resized_j = cv.resize(
                    y_cut_i_predict[j, :, :], y_cut_i.shape[1:],
                    interpolation=cv.INTER_CUBIC)  # INTER_LINEAR is faster but INTER_CUBIC is better

                # Add extra dim at the end
                y_cut_i_predict_resized_j = y_cut_i_predict_resized_j.reshape(y_cut_i_predict_resized_j.shape + (1,))
                y_cut_i_j = y_cut_i[j, :, :].reshape(y_cut_i[j, :, :].shape + (1,))

                if j == 0:
                    y_cut_i_predict_resized = y_cut_i_predict_resized_j
                    y_cut_i_restacked = y_cut_i_j

                else:
                    y_cut_i_predict_resized = np.concatenate([y_cut_i_predict_resized, y_cut_i_predict_resized_j],
                                                             axis=2)
                    y_cut_i_restacked = np.concatenate([y_cut_i_restacked, y_cut_i_j], axis=2)

            # When there is only one image in the minibatch it adds an extra dimension
            if len(y_cut_i_restacked.shape) > 3:
                y_cut_i_restacked = np.squeeze(y_cut_i_restacked, axis=3)

            # Now stack the minibatches along the 3rd axis to complete the 3D image
            if i == 0:
                y_i_predict_3d = y_cut_i_predict_resized
                y_i_3d = y_cut_i_restacked

            else:
                y_i_predict_3d = np.concatenate([y_i_predict_3d, y_cut_i_predict_resized], axis=2)
                y_i_3d = np.concatenate([y_i_3d, y_cut_i_restacked], axis=2)

        y_pred_list.append(y_i_predict_3d)
        y_list.append(y_i_3d)

        # Measure IoU over entire 3D image after concatenating all of the cuts
        iou_list.append({'index': img_dx,
                         'iou': calculate_iou(target=y_i_3d, prediction=(y_i_predict_3d > pixel_threshold) * 1)})

    # Let's convert the iou to a pandas dataframe
    iou_df = pd.DataFrame(iou_list).set_index('index')

    return iou_df, y_list, y_pred_list


In [28]:
iou_df, _, _ = calculate_iou_df(df_=sample_tr_df, img_dims=(512, 512), model_=None,
                          pixel_threshold= 0.5, prediction_batch_size= 32)

In [29]:
iou_df.iou.isna().sum()

0

In [30]:
iou_df.iou.mean()

0.04927614325935405

In [31]:
iou_df

Unnamed: 0_level_0,iou
index,Unnamed: 1_level_1
11,0.0
28,0.0
31,0.0
88,0.213227
111,0.180982
124,0.0
162,0.0
164,0.0
