# The purpose of this code is to gather the predicted images, load them into their original volume and calculate the dice
This will save the results as a csv file for later

In [1]:
from keras.models import Sequential, load_model
import os
import numpy as np
import nibabel as nib
import tensorflow as tf
import matplotlib.pyplot as plt
from skimage import measure
from skimage.transform import resize
from keras_unet.metrics import dice_coef
from keras_unet.models import custom_unet
from keras_unet.losses import jaccard_distance
from sklearn.model_selection import train_test_split
from PIL import Image
from PIL import ImageOps
import fnmatch
import nibabel as nib
import shutil
import re
import pandas as pd
%matplotlib inline

-----------------------------------------
keras-unet init: TF version is >= 2.0.0 - using `tf.keras` instead of `Keras`
-----------------------------------------


In [2]:
#define our keras accuracy metrics - these are common formatting gathered from the github and tensorflow community
import numpy as np
from keras import backend as K
smooth = 1

def jaccard_distance_loss(y_true, y_pred, smooth=100): 
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(K.abs(y_true_f * y_pred_f)) 
    sum_ = K.sum(K.abs(y_true_f) + K.abs(y_pred_f)) 
    jac = (intersection + smooth) / (sum_ - intersection + smooth) 
    return (1 - jac) * smooth 

def mean_length_error(y_true, y_pred):
    y_true_f = K.sum(K.round(K.flatten(y_true)))
    y_pred_f = K.sum(K.round(K.flatten(y_pred)))
    delta = (y_pred_f - y_true_f)
    return K.mean(K.tanh(delta))

def dice_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

def dice_coef_loss(y_true, y_pred):
    return -dice_coef(y_true, y_pred)

def np_dice_coef(y_true, y_pred):
    tr = y_true.flatten()
    pr = y_pred.flatten()
    return (2. * np.sum(tr * pr) + smooth) / (np.sum(tr) + np.sum(pr) + smooth)

In [3]:
#gather the images and their path based on a phrase
def gather_set(data_path, phrase):
    set_of = []
    path = data_path + '\\'
    for f in os.listdir(data_path):
      if phrase in f:
        set_of.append(f)
      else:
        continue
    set_of = np.array(set_of)

    indices = np.array(range(len(set_of))) # we will use this in the next step.

    return set_of


In [51]:
#define where the predictions are, where the tensor will be saved, and what the model used for prediction was

filepath_predictions = r"D:\EKS-predicted\KU\UB-cyst"
filepath_tensors = r"D:\EKS-predicted\Tensors\KU\Tensors"
filepath_data = filepath_predictions
images = gather_set(filepath_predictions, 'UAB_cyst_35ep')
model_name = 'UAB_cyst_35ep'

In [52]:
#this snippet loaded in test patients  - gather a list of images to stack into tensors
image_list = []

#Emory
#phrase_list = ['283935R','290336L','290336R','295106L','295106R']
#Mayo
#phrase_list = ['380166R','383193L','383193R','385151L','385151R']
#UAB
#phrase_list = ['457036L','457036R']
#KU
phrase_list = ['113994L','139486R','157925R','183417R','186714L','186714R','187456L']

for i in range(len(phrase_list)):
    phrase1 = phrase_list[i][:-2]
    phrase2 = phrase_list[i][-1]
    print(phrase1, phrase2)
    for z in range(len(images)):
        name = images[z]
        if phrase1 in name:
            if phrase2 in name:
                image_list.append(name)
            else:
                continue
        else:
            continue

11399 L
13948 R
15792 R
18341 R
18671 L
18671 R
18745 L


In [53]:
#make sure we have the patient name/number correct from above in a list to pull from
id_list = []   
for i in range(len(image_list)):
    image_name = image_list[i]
    unique_id =  image_name[0:17]
    id_list.append(unique_id)
unique_ids = list(set(id_list))

In [54]:
print(unique_ids)

['KU_186714_2_96_R_', 'KU_186714_0_78_R_', 'KU_113994_2_99_L_', 'KU_187456_2_120_L', 'KU_113994_3_108_L', 'KU_157925_0_126_R', 'KU_157925_2_144_R', 'KU_139486_1_99_R_', 'KU_139486_0_126_R', 'KU_157925_1_141_R', 'KU_139486_3_111_R', 'KU_183417_0_129_R', 'KU_186714_2_96_L_', 'KU_113994_0_87_L_', 'KU_186714_3_96_L_', 'KU_186714_3_96_R_', 'KU_183417_3_144_R', 'KU_186714_1_93_R_', 'KU_183417_1_144_R', 'KU_187456_1_120_L', 'KU_186714_1_93_L_', 'KU_187456_3_120_L', 'KU_139486_2_99_R_', 'KU_183417_2_144_R', 'KU_157925_3_144_R', 'KU_187456_0_87_L_', 'KU_186714_0_78_L_', 'KU_113994_1_99_L_']


In [55]:
#remove the extra _
for i in range(len(unique_ids)):
    name = unique_ids[i]
    if not name.endswith('_'):
        name = name+'_'
        unique_ids[i]=name

In [56]:
print(unique_ids)

['KU_186714_2_96_R_', 'KU_186714_0_78_R_', 'KU_113994_2_99_L_', 'KU_187456_2_120_L_', 'KU_113994_3_108_L_', 'KU_157925_0_126_R_', 'KU_157925_2_144_R_', 'KU_139486_1_99_R_', 'KU_139486_0_126_R_', 'KU_157925_1_141_R_', 'KU_139486_3_111_R_', 'KU_183417_0_129_R_', 'KU_186714_2_96_L_', 'KU_113994_0_87_L_', 'KU_186714_3_96_L_', 'KU_186714_3_96_R_', 'KU_183417_3_144_R_', 'KU_186714_1_93_R_', 'KU_183417_1_144_R_', 'KU_187456_1_120_L_', 'KU_186714_1_93_L_', 'KU_187456_3_120_L_', 'KU_139486_2_99_R_', 'KU_183417_2_144_R_', 'KU_157925_3_144_R_', 'KU_187456_0_87_L_', 'KU_186714_0_78_L_', 'KU_113994_1_99_L_']


## Stack original and predicted images into a tensor for metric calculations

Once the image is reconstructed into a 3D tensor we can run accuracy calculations, These only need to be generated once

In [None]:
#we will stack our 2D predictions into a 3D tensor for accuracy calculation
#this is for the kidney prediction
for i in range(len(unique_ids)):
    pt_info = unique_ids[i]
    pt_num, yr_num, num_slices = re.findall(r'\d+', pt_info)
    tensor = np.zeros((512,512,int(num_slices)))
    for x in range(int(num_slices)):
        img_name = unique_ids[i]+str(x)+'_K.npy'
        image = np.load(filepath_data + '\\' + img_name)
        img_slice = image
        tensor[:,:,x] = img_slice
        x = x+1
    new_fname = unique_ids[i]+'K.npy'
    np.save(os.path.join(filepath_tensors, new_fname), tensor)

In [57]:

#stack the cyst predictions
for i in range(len(unique_ids)):
    pt_info = unique_ids[i]
    pt_num, yr_num, num_slices = re.findall(r'\d+', pt_info)
    tensor = np.zeros((512,512,int(num_slices)))
    for x in range(int(num_slices)):
        img_name = unique_ids[i]+str(x)+ '_' + model_name +'_C.npy'
        image = np.load(filepath_predictions + '\\' + img_name)
        img_slice = image[:,:,1]
        tensor[:,:,x] = img_slice
        x = x+1
    new_fname = unique_ids[i]+ model_name +'_Cpred.npy'
    np.save(os.path.join(filepath_tensors, new_fname), tensor)

# Gather prediction tensors and calculate stat

Gather the predictions based on the model name, this will work for both kidneys and cysts


In [7]:
#define where the tensor is located for the ground truth and which model was used
filepath_tensors = r'D:\EKS-predicted\Emory\Tensors'
pred_list = gather_set(filepath_tensors, 'INSTITUTION_80-10_35ep')
true_list = gather_set(filepath_tensors, '_K.')
#print(pred_list)
#print(true_list)

In [12]:
#double check that the items we are looking for make sense and will pull the correct file
name =pred_list[3]
print(name[-36:-10])
test = pred_list[3][:-36]+'C.npy'
print(test)

ALL_INSTITUTION_80-10_35ep
EM_283935_3_114_R_C.npy


In [13]:
#here we use the dice coef we defined above to compare the prediction to the ground truth and save that metric as a dataframe
results = []
for i in range(len(pred_list)):
    prediction = np.load(filepath_tensors + '\\'+ pred_list[i])
    true = np.load(filepath_tensors + '\\'+pred_list[i][:-23]+'C.npy')
    dice_calc = dice_coef(true,prediction)
    model = pred_list[i][-23:-10]
    patient = pred_list[i][:-23]
    new_calc = [patient, model, dice_calc.numpy()]
    results.append(new_calc)
    

FileNotFoundError: [Errno 2] No such file or directory: 'D:\\EKS-predicted\\Emory\\Tensors\\EM_283935_0_135_R_ALL_INSTITUTIC.npy'

In [61]:
print(results)

[['KU_113994_0_87_L_', 'UAB_cyst_35ep', 0.7387246021537239], ['KU_113994_1_99_L_', 'UAB_cyst_35ep', 0.7485357092776166], ['KU_113994_2_99_L_', 'UAB_cyst_35ep', 0.7109652364938729], ['KU_113994_3_108_L_', 'UAB_cyst_35ep', 0.7366829706578635], ['KU_139486_0_126_R_', 'UAB_cyst_35ep', 0.8032065047860396], ['KU_139486_1_99_R_', 'UAB_cyst_35ep', 0.7958216703453889], ['KU_139486_2_99_R_', 'UAB_cyst_35ep', 0.8306714329618426], ['KU_139486_3_111_R_', 'UAB_cyst_35ep', 0.7820194874271506], ['KU_157925_0_126_R_', 'UAB_cyst_35ep', 0.7841782541768734], ['KU_157925_1_141_R_', 'UAB_cyst_35ep', 0.6846033186670086], ['KU_157925_2_144_R_', 'UAB_cyst_35ep', 0.6720328638497652], ['KU_157925_3_144_R_', 'UAB_cyst_35ep', 0.6904262656279653], ['KU_183417_0_129_R_', 'UAB_cyst_35ep', 0.7367050832705145], ['KU_183417_1_144_R_', 'UAB_cyst_35ep', 0.6671403010209451], ['KU_183417_2_144_R_', 'UAB_cyst_35ep', 0.7143693730997196], ['KU_183417_3_144_R_', 'UAB_cyst_35ep', 0.6198035052662254], ['KU_186714_0_78_L_', 'UAB_c

## Save the results into a excel file for manipulation

In [62]:
#save the results calculated into an excel file for anlaysis later
df = pd.DataFrame(results)
df.columns =['Patient Number', 'Model', 'Dice Score']
filepath = r"C:\Users\UAB\data\excel results\KU-uab-model-cyst-results.xlsx"
df.to_excel(filepath, index=False)

## Additional Metric Calculations

In [19]:
# Import packages
import medpy
import medpy.metric
import numpy as np
import seg_metrics.seg_metrics as sg
import SimpleITK as sitk
import matplotlib.pyplot as plt
import copy
from skimage import measure, morphology
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
import time
#import gdown
import pandas as pd

In [309]:
#define where the tensor is located for the ground truth and which model was used
filepath_tensors = r'D:\EKS-predicted\UAB\Tensors'
pred_list = gather_set(filepath_tensors, 'Kansas_cyst_35ep')
true_list = gather_set(filepath_tensors, '_C.')
#print(pred_list)
#print(true_list)

In [310]:
#double check that the items we are looking for make sense and will pull the correct file
name =pred_list[3]
print(name[-26:-10])
test = pred_list[3][:-26]+'C.npy'
print(test)

Kansas_cyst_35ep
UB_457036_1_105_R_C.npy


In [311]:
results = []
labels = [1]
spacing = np.array([1,1,1]) 
df = pd.DataFrame()
for i in range(len(pred_list)):
    prediction = np.load(filepath_tensors + '\\'+ pred_list[i])
    true = np.load(filepath_tensors + '\\'+pred_list[i][:-26]+'C.npy')
    #dice_calc = dice_coef(true,prediction)
    metrics = sg.write_metrics(labels=labels,  # exclude background if needed
                  gdth_img=true,
                  pred_img=prediction,
                  #csv_file=csv_file,  # save results to the csv_file 
                  spacing=spacing,   # assign spacing
                  metrics=['hd', 'hd95', 'msd', 'dice', 'jaccard'])
    model = pred_list[i][-26:-10]
    patient = pred_list[i][:-26]
    new_calc = [patient, model]
    results.append(new_calc)
    met = pd.DataFrame(metrics[0])
    label = pd.DataFrame(new_calc).T
    #df = df.append(pd.concat([label, met], axis=1))
    #new_row = pd.concat([label, met], axis=1)
    df = pd.concat([df, pd.DataFrame(metrics[0])], ignore_index=True)


1it [00:01,  1.70s/it]
1it [00:01,  1.67s/it]
1it [00:01,  1.74s/it]
1it [00:01,  1.73s/it]
1it [00:01,  1.99s/it]
1it [00:01,  1.82s/it]
1it [00:01,  1.91s/it]
1it [00:01,  1.83s/it]


In [312]:
data = pd.DataFrame(results)
data.columns =['Patient Number', 'Model']
df1 = pd.concat([data, df], axis=1)
filepath = r"D:\EKS-predicted\uab-ku-model-cyst-results.xlsx"
df1.to_excel(filepath, index=False)

  df1.to_excel(filepath, index=False)
