## Test IoU script

In [1]:
import os
import random

import cv2 as cv
import numpy as np
import pandas as pd
import nibabel as nib
import matplotlib.pyplot as plt

from typing import Tuple
import numpy as np
import pandas as pd
import tensorflow as tf
from keras import backend as K

from model_utils import calculate_iou_holdout_set, calculate_iou
from img_generator import build_train_test_df, DataGenerator2D

In [2]:
data_path_source_dir = os.path.join('ml4h_proj1_colon_cancer_ct', 'ml4h_proj1_colon_cancer_ct')

train_images_dir = os.path.join(data_path_source_dir, 'imagesTr')

train_images_label_dir = os.path.join(data_path_source_dir, 'labelsTr')

train_images_path_list = [os.path.join(train_images_dir, filename) for filename in os.listdir(train_images_dir) if filename != '.DS_Store' and '._' not in filename]
train_images_labels_path_list = [os.path.join(train_images_label_dir, filename) for filename in os.listdir(train_images_label_dir) if filename != '.DS_Store' and '._' not in filename]

### Let's check if the IoU base function works

In [3]:
a = nib.load(train_images_labels_path_list[0]).get_data()


* deprecated from version: 3.0
* Will raise <class 'nibabel.deprecator.ExpiredDeprecationError'> as of version: 5.0
  """Entry point for launching an IPython kernel.


In [4]:
a.shape

(512, 512, 60)

In [5]:
calculate_iou(target=a, prediction=cv.GaussianBlur(a, (5,5), 0) > 0.5)

0.9988795518207283

In [6]:
calculate_iou(target=a, prediction=cv.flip(a, 0) > 0.5)

0.0

In [7]:
calculate_iou(target=a, prediction=cv.flip(cv.flip(a, 0), 0) > 0.5)

1.0

### Let's check if the function that uses the models prediction works well

In [8]:
from img_generator import build_train_test_df, DataGenerator2D

In [9]:
data_path_source_dir = os.path.join('ml4h_proj1_colon_cancer_ct', 'ml4h_proj1_colon_cancer_ct')

# Let's check the behavior of the augmentation operations of the generator

In [10]:
random.seed(123)
np.random.seed(123)

Let's choose a set of images that have cancer labeled in them 

In [11]:
tr_df, x_ts_df = build_train_test_df(data_path_source_dir)

In [12]:
cancer_pixels_df = pd.read_pickle('cancer_pixels_df')
cancer_pixels_df.reset_index(inplace=True)

In [13]:
cancer_pixels_df['index'] = cancer_pixels_df.image_name.map(lambda str_: str_.split('.nii.gz')[0].split('colon_')[1])

In [14]:
tr_df_only_cancer = cancer_pixels_df.set_index(['index', 'depth_i'])[['cancer_pixel_area']].join(tr_df, how='inner')

### Identity case

In [87]:
def calculate_iou_holdout_set(holdout_df_: pd.DataFrame, img_dims: Tuple, model_,
                              pixel_threshold: float = 0.5, prediction_batch_size: int = 32) -> pd.DataFrame:
    iou_list = list()

    for img_dx, df_ in holdout_df_.groupby(level=0):
        img_i_generator = DataGenerator2D(df=df_, x_col='x_tr_img_path', y_col='y_tr_img_path',
                                          batch_size=prediction_batch_size, num_classes=None, shuffle=False,
                                          resize_dim=img_dims)

        # Predict for a group of cuts of the same image
        for i, (X_cut_i, y_cut_i) in enumerate(img_i_generator):
            #y_cut_i_predict = model_.predict(X_cut_i)
            y_cut_i_predict = y_cut_i
            
            if len(y_cut_i_predict.shape) > 3:
                y_cut_i_predict = np.squeeze(y_cut_i_predict, axis=3)

            if i == 0:
                y_i_predict_3d = y_cut_i_predict
                y_i_3d = y_cut_i

            else:
                y_i_predict_3d = np.concatenate([y_i_predict_3d, y_cut_i_predict], axis=0)
                y_i_3d = np.concatenate([y_i_3d, y_cut_i], axis=0)

        # Measure IoU over entire 3D image after concatenating all of the cuts
        iou_list.append({'index': img_dx,
                         'iou': calculate_iou(target=y_i_3d, prediction=y_i_predict_3d > pixel_threshold)})

        if (y_i_predict_3d > 0).any():
            print(f'Predicted cancer for at least one pixel in image {img_dx}')

    # Let's convert the iou to a pandas dataframe
    iou_df = pd.DataFrame(iou_list).set_index('index')

    return iou_df

In [None]:
iou_df = calculate_iou_holdout_set(holdout_df_=tr_df, img_dims=(512, 512), model_=None,
                              pixel_threshold= 0.5, prediction_batch_size= 32)

Predicted cancer for at least one pixel in image 001
Predicted cancer for at least one pixel in image 005
Predicted cancer for at least one pixel in image 006
Predicted cancer for at least one pixel in image 007
Predicted cancer for at least one pixel in image 008
Predicted cancer for at least one pixel in image 009
Predicted cancer for at least one pixel in image 011
Predicted cancer for at least one pixel in image 012
Predicted cancer for at least one pixel in image 015
Predicted cancer for at least one pixel in image 022
Predicted cancer for at least one pixel in image 024
Predicted cancer for at least one pixel in image 025
Predicted cancer for at least one pixel in image 026
Predicted cancer for at least one pixel in image 027
Predicted cancer for at least one pixel in image 028
Predicted cancer for at least one pixel in image 029
Predicted cancer for at least one pixel in image 030
Predicted cancer for at least one pixel in image 031
Predicted cancer for at least one pixel in ima

In [None]:
iou_df.iou.isna().sum()

In [None]:
iou_df.iou.mean()

In [None]:
iou_df

### Use Gaussian blur to reduce concordancy

In [82]:
def calculate_iou_holdout_set(holdout_df_: pd.DataFrame, img_dims: Tuple, model_,
                              pixel_threshold: float = 0.5, prediction_batch_size: int = 32) -> pd.DataFrame:
    iou_list = list()

    for img_dx, df_ in holdout_df_.groupby(level=0):
        img_i_generator = DataGenerator2D(df=df_, x_col='x_tr_img_path', y_col='y_tr_img_path',
                                          batch_size=prediction_batch_size, num_classes=None, shuffle=False,
                                          resize_dim=img_dims)

        # Predict for a group of cuts of the same image
        for i, (X_cut_i, y_cut_i) in enumerate(img_i_generator):
            #y_cut_i_predict = model_.predict(X_cut_i)
            y_cut_i_predict = cv.GaussianBlur(y_cut_i, (11,11), 0)
            
            if len(y_cut_i_predict.shape) > 3:
                y_cut_i_predict = np.squeeze(y_cut_i_predict, axis=3)

            if i == 0:
                y_i_predict_3d = y_cut_i_predict
                y_i_3d = y_cut_i

            else:
                y_i_predict_3d = np.concatenate([y_i_predict_3d, y_cut_i_predict], axis=0)
                y_i_3d = np.concatenate([y_i_3d, y_cut_i], axis=0)

        # Measure IoU over entire 3D image after concatenating all of the cuts
        iou_list.append({'index': img_dx,
                         'iou': calculate_iou(target=y_i_3d, prediction=y_i_predict_3d > pixel_threshold)})

    # Let's convert the iou to a pandas dataframe
    iou_df = pd.DataFrame(iou_list).set_index('index')

    return iou_df

In [83]:
iou_df = calculate_iou_holdout_set(holdout_df_=tr_df, img_dims=(512, 512), model_=None,
                              pixel_threshold= 0.5, prediction_batch_size= 32)

Predicted cancer for at least one pixel in image 001
Predicted cancer for at least one pixel in image 005
Predicted cancer for at least one pixel in image 006
Predicted cancer for at least one pixel in image 007
Predicted cancer for at least one pixel in image 008
Predicted cancer for at least one pixel in image 009
Predicted cancer for at least one pixel in image 011
Predicted cancer for at least one pixel in image 012
Predicted cancer for at least one pixel in image 015
Predicted cancer for at least one pixel in image 022
Predicted cancer for at least one pixel in image 024
Predicted cancer for at least one pixel in image 025
Predicted cancer for at least one pixel in image 026
Predicted cancer for at least one pixel in image 027
Predicted cancer for at least one pixel in image 028
Predicted cancer for at least one pixel in image 029
Predicted cancer for at least one pixel in image 030
Predicted cancer for at least one pixel in image 031
Predicted cancer for at least one pixel in ima

In [84]:
iou_df.iou.isna().sum()

0

In [85]:
iou_df.iou.mean()

0.5930992058319379

In [86]:
iou_df

Unnamed: 0_level_0,iou
index,Unnamed: 1_level_1
001,0.091980
005,0.699205
006,0.000000
007,0.524362
008,0.571458
...,...
164,0.410490
165,0.349081
166,0.770992
168,0.302119


### Use flipping

In [None]:
def calculate_iou_holdout_set(holdout_df_: pd.DataFrame, img_dims: Tuple, model_,
                              pixel_threshold: float = 0.5, prediction_batch_size: int = 32) -> pd.DataFrame:
    iou_list = list()

    for img_dx, df_ in holdout_df_.groupby(level=0):
        img_i_generator = DataGenerator2D(df=df_, x_col='x_tr_img_path', y_col='y_tr_img_path',
                                          batch_size=prediction_batch_size, num_classes=None, shuffle=False,
                                          resize_dim=img_dims)

        # Predict for a group of cuts of the same image
        for i, (X_cut_i, y_cut_i) in enumerate(img_i_generator):
            #y_cut_i_predict = model_.predict(X_cut_i)
            y_cut_i_predict = cv.flip(y_cut_i, 0)
            
            if len(y_cut_i_predict.shape) > 3:
                y_cut_i_predict = np.squeeze(y_cut_i_predict, axis=3)

            if i == 0:
                y_i_predict_3d = y_cut_i_predict
                y_i_3d = y_cut_i

            else:
                y_i_predict_3d = np.concatenate([y_i_predict_3d, y_cut_i_predict], axis=0)
                y_i_3d = np.concatenate([y_i_3d, y_cut_i], axis=0)

        # Measure IoU over entire 3D image after concatenating all of the cuts
        iou_list.append({'index': img_dx,
                         'iou': calculate_iou(target=y_i_3d, prediction=y_i_predict_3d > pixel_threshold)})

        if (y_i_predict_3d > 0).any():
            print(f'Predicted cancer for at least one pixel in image {img_dx}')

    # Let's convert the iou to a pandas dataframe
    iou_df = pd.DataFrame(iou_list).set_index('index')

    return iou_df

In [None]:
iou_df = calculate_iou_holdout_set(holdout_df_=tr_df, img_dims=(512, 512), model_=None,
                              pixel_threshold= 0.5, prediction_batch_size= 32)

In [None]:
iou_df.iou.isna().sum()

In [None]:
iou_df.iou.mean()

In [None]:
iou_df