## Test IoU script

In [1]:
import os
import random

import cv2 as cv
import numpy as np
import pandas as pd
import nibabel as nib
import matplotlib.pyplot as plt

from typing import Tuple
import numpy as np
import pandas as pd
import tensorflow as tf
from keras import backend as K

from model_utils import calculate_iou_holdout_set, calculate_iou
from img_generator import build_train_test_df, DataGenerator2D

In [2]:
data_path_source_dir = os.path.join('ml4h_proj1_colon_cancer_ct', 'ml4h_proj1_colon_cancer_ct')

train_images_dir = os.path.join(data_path_source_dir, 'imagesTr')

train_images_label_dir = os.path.join(data_path_source_dir, 'labelsTr')

train_images_path_list = [os.path.join(train_images_dir, filename) for filename in os.listdir(train_images_dir) if filename != '.DS_Store' and '._' not in filename]
train_images_labels_path_list = [os.path.join(train_images_label_dir, filename) for filename in os.listdir(train_images_label_dir) if filename != '.DS_Store' and '._' not in filename]

### Let's check if the IoU base function works

In [3]:
a = nib.load(train_images_labels_path_list[0]).get_data()


* deprecated from version: 3.0
* Will raise <class 'nibabel.deprecator.ExpiredDeprecationError'> as of version: 5.0
  """Entry point for launching an IPython kernel.


In [4]:
a.shape

(512, 512, 60)

In [5]:
calculate_iou(target=a, prediction=cv.GaussianBlur(a, (5,5), 0) > 0.5)

0.9988795518207283

In [6]:
calculate_iou(target=a, prediction=cv.flip(a, 0) > 0.5)

0.0

In [7]:
calculate_iou(target=a, prediction=cv.flip(cv.flip(a, 0), 0) > 0.5)

1.0

### Let's check if the function that uses the models prediction works well

In [8]:
from img_generator import build_train_test_df, DataGenerator2D

In [9]:
data_path_source_dir = os.path.join('ml4h_proj1_colon_cancer_ct', 'ml4h_proj1_colon_cancer_ct')

# Let's check the behavior of the augmentation operations of the generator

In [10]:
random.seed(123)
np.random.seed(123)

Let's choose a set of images that have cancer labeled in them 

In [11]:
tr_df, x_ts_df = build_train_test_df(data_path_source_dir)

In [12]:
cancer_pixels_df = pd.read_pickle('cancer_pixels_df')
cancer_pixels_df.reset_index(inplace=True)

In [13]:
cancer_pixels_df['index'] = cancer_pixels_df.image_name.map(lambda str_: str_.split('.nii.gz')[0].split('colon_')[1])

In [14]:
tr_df_only_cancer = cancer_pixels_df.set_index(['index', 'depth_i'])[['cancer_pixel_area']].join(tr_df, how='inner')

In [15]:
def calculate_iou(target: np.ndarray, prediction: np.ndarray) -> float:
    intersection = np.logical_and(target, prediction)
    union = np.logical_or(target, prediction)
    iou_score = np.sum(intersection.astype(np.float64)) / np.sum(union.astype(np.float64))

    return iou_score

In [64]:
def calculate_iou_holdout_set(holdout_df_: pd.DataFrame, img_dims: Tuple, model_,
                              pixel_threshold: float = 0.5, prediction_batch_size: int = 32) -> pd.DataFrame:
    iou_list = list()

    for img_dx, df_ in holdout_df_.groupby(level=0):
        print(f'{img_dx}, print(df_.shape): {df_.shape}')
        #print('print(df_.shape)')
        #print(df_.shape)
        img_i_generator = DataGenerator2D(df=df_, x_col='x_tr_img_path', y_col='y_tr_img_path',
                                          batch_size=prediction_batch_size, num_classes=None, shuffle=False,
                                          resize_dim=img_dims)

        # Predict for a group of cuts of the same image
        for i, (X_cut_i, y_cut_i) in enumerate(img_i_generator):
            print(i)
            print(y_cut_i.shape)
            try:
                # y_cut_i_predict = model_.predict(X_cut_i)
                y_cut_i_predict = y_cut_i
                if len(y_cut_i_predict.shape) > 3:
                    y_cut_i_predict = np.squeeze(y_cut_i_predict, axis=3)

                if i == 0:
                    y_i_predict_3d = y_cut_i_predict
                    y_i_3d = y_cut_i

                else:
                    y_i_predict_3d = np.concatenate([y_i_predict_3d, y_cut_i_predict], axis=0)
                    y_i_3d = np.concatenate([y_i_3d, y_cut_i], axis=0)

            except Exception as e:
                print(e)
                print(y_i_predict_3d.shape)
                print(y_cut_i_predict.shape)
                raise(e)
                    
        print(y_i_3d.shape)
        print((y_i_predict_3d > pixel_threshold).shape)
        # Measure IoU over entire 3D image after concatenating all of the cuts
        iou_list.append({'index': img_dx,
                         'iou': calculate_iou(target=y_i_3d, prediction=y_i_predict_3d > pixel_threshold)})

        #if (y_i_predict_3d > 0).any():
        #    print(f'Predicted cancer for at least one pixel in image {img_dx}')

    # Let's convert the iou to a pandas dataframe
    iou_df = pd.DataFrame(iou_list).set_index('index')

    return iou_df

In [71]:
def calculate_iou_holdout_set(holdout_df_: pd.DataFrame, img_dims: Tuple, model_,
                              pixel_threshold: float = 0.5, prediction_batch_size: int = 32) -> pd.DataFrame:
    iou_list = list()

    for img_dx, df_ in holdout_df_.groupby(level=0):
        img_i_generator = DataGenerator2D(df=df_, x_col='x_tr_img_path', y_col='y_tr_img_path',
                                          batch_size=prediction_batch_size, num_classes=None, shuffle=False,
                                          resize_dim=img_dims)

        # Predict for a group of cuts of the same image
        for i, (X_cut_i, y_cut_i) in enumerate(img_i_generator):
            #y_cut_i_predict = model_.predict(X_cut_i)
            y_cut_i_predict = y_cut_i
            
            if len(y_cut_i_predict.shape) > 3:
                y_cut_i_predict = np.squeeze(y_cut_i_predict, axis=3)

            if i == 0:
                y_i_predict_3d = y_cut_i_predict
                y_i_3d = y_cut_i

            else:
                y_i_predict_3d = np.concatenate([y_i_predict_3d, y_cut_i_predict], axis=0)
                y_i_3d = np.concatenate([y_i_3d, y_cut_i], axis=0)

        # Measure IoU over entire 3D image after concatenating all of the cuts
        iou_list.append({'index': img_dx,
                         'iou': calculate_iou(target=y_i_3d, prediction=y_i_predict_3d > pixel_threshold)})

        if (y_i_predict_3d > 0).any():
            print(f'Predicted cancer for at least one pixel in image {img_dx}')

    # Let's convert the iou to a pandas dataframe
    iou_df = pd.DataFrame(iou_list).set_index('index')

    return iou_df

### Identity case

In [28]:
tr_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,x_tr_img_path,depth,y_tr_img_path
index,depth_i,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,0,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
1,1,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
1,2,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
1,3,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
1,4,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...


In [35]:
a = tr_df.loc[pd.IndexSlice[['036'], :], :]

In [36]:
a

Unnamed: 0_level_0,Unnamed: 1_level_0,x_tr_img_path,depth,y_tr_img_path
index,depth_i,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
36,0,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,97,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
36,1,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,97,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
36,2,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,97,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
36,3,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,97,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
36,4,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,97,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
36,...,...,...,...
36,92,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,97,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
36,93,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,97,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
36,94,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,97,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
36,95,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,97,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...


In [40]:
index=1
batch_size = 50
img_index = '036'

In [38]:
a[[]].iloc[pd.IndexSlice[index * batch_size: (index + 1) * batch_size, :]]

index,depth_i
36,50
36,51
36,52
36,53
36,54
36,55
36,56
36,57
36,58
36,59


In [22]:
tr_df.loc[pd.IndexSlice[[img_index], :], :]

Unnamed: 0_level_0,Unnamed: 1_level_0,x_tr_img_path,depth,y_tr_img_path
index,depth_i,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,0,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
1,1,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
1,2,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
1,3,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
1,4,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
1,5,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
1,6,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
1,7,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
1,8,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
1,9,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...


In [41]:
datagen_1_img = DataGenerator2D(df=tr_df.loc[pd.IndexSlice[[img_index], :], :], x_col='x_tr_img_path', y_col='y_tr_img_path',
                batch_size=16, num_classes=None, shuffle=False,
                resize_dim=None)

In [42]:
len(datagen_1_img)

7

In [43]:
for i, (X_cut_i, y_cut_i) in enumerate(datagen_1_img):
    print(X_cut_i.shape)
    print(i)

(16, 512, 512)
0
(16, 512, 512)
1
(16, 512, 512)
2
(16, 512, 512)
3
(16, 512, 512)
4
(16, 512, 512)
5
(1, 512, 512)
6


In [57]:
np.squeeze(np.ones((1, 512, 512, 1)), axis=3).shape

(1, 512, 512)

In [63]:
calculate_iou_holdout_set(holdout_df_=tr_df.loc[pd.IndexSlice[[img_index], :], :], img_dims=(512, 512), model_=None,
                              pixel_threshold= 0.5, prediction_batch_size= 32)

036, print(df_.shape): (97, 3)
0
(32, 512, 512)
1
(32, 512, 512)
2
(32, 512, 512)
3
(1, 512, 512)
(97, 512, 512)
(97, 512, 512)


Unnamed: 0_level_0,iou
index,Unnamed: 1_level_1
36,1.0


In [72]:
iou_df = calculate_iou_holdout_set(holdout_df_=tr_df, img_dims=(512, 512), model_=None,
                              pixel_threshold= 0.5, prediction_batch_size= 32)

Predicted cancer for at least one pixel in image 001
Predicted cancer for at least one pixel in image 005
Predicted cancer for at least one pixel in image 006
Predicted cancer for at least one pixel in image 007
Predicted cancer for at least one pixel in image 008
Predicted cancer for at least one pixel in image 009
Predicted cancer for at least one pixel in image 011
Predicted cancer for at least one pixel in image 012
Predicted cancer for at least one pixel in image 015
Predicted cancer for at least one pixel in image 022
Predicted cancer for at least one pixel in image 024
Predicted cancer for at least one pixel in image 025
Predicted cancer for at least one pixel in image 026
Predicted cancer for at least one pixel in image 027
Predicted cancer for at least one pixel in image 028
Predicted cancer for at least one pixel in image 029
Predicted cancer for at least one pixel in image 030
Predicted cancer for at least one pixel in image 031
Predicted cancer for at least one pixel in ima

In [73]:
iou_df.iou.isna().sum()

0

In [74]:
iou_df.iou.mean()

1.0