In [3]:
import glob
import os
import pandas as pd
# Numpy for numpy.arrays
import numpy as np

# Include ITK for DICOM reading.
import itk

# Include pydicom_seg for DICOM SEG objects
import pydicom
import pydicom_seg

# for downloading data from TCIA
from tcia_utils import nbia

# This is the most common import command for itkWidgets.
#   The view() function opens an interactive viewer for 2D and 3D
#   data in a variety of formats.

import SimpleITK as sitk
import nibabel as nib
from tqdm import tqdm
import matplotlib.pyplot as plt
import pickle





In [4]:
#import the pirads file and change the rownames:
pirads = pd.read_excel('Pirads_updated_age_PSA.xlsx')
pirads.rename(columns={'Unnamed: 0': 'lesion_name'},inplace=True)
pirads.index = list(pirads['lesion_name'])
pirads.drop('lesion_name',axis=1,inplace=True)

In [5]:
translation = pd.read_excel('C:\\Users\\Joel Fischer\\Documents\\Masterarbeit\\Studie\\Projekt\\Auswertungsdaten\\Benchmark\\translation.xlsx')
pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', None)  # or 1000
translation.drop(8,inplace=True) #remove the bell_0027 case as its metadata is missing. 


# Import all the lesion segmentations:

In [6]:
#Get all lesion filenames:
lesion_filenames = []
directory = 'E:\\ksa_study_data'
for path, dirs, filenames in os.walk(directory):
    for f in filenames:
        if f.endswith('.nii.gz'):
            #get only the files of the 67 Cases we have biopsied totally:
            name = f.split('_')[0].split('-')[0] + '_' + f.split('_')[0].split('-')[1]
            #we need to manual add the bell_0045 Case as it is missing in the translation:
            if name in list(translation['Case']) or name == 'bell_0045':
                #we only want lesions of t2w images, not full prostate segmentations or t2w images:
                if f.split('.')[0].split('_')[-1] != 'none':
                    if f.split('_')[1] == 't2w':
                        if f.split('_')[2] != 'pro':
                            lesion_filenames.append(f)

# Calculate the IoU,DSC,Fscore for pooled segmentations for each reader:

In [30]:

def IoU_of_two_segmentations(segA,segB):
    # segmentation_A: lesion mask of a lesion
    # segmentation_A: lesion mask of a different lesion
    intersection = np.sum(segA*segB)
    fsum = np.sum(segA)
    ssum = np.sum(segB)
    union = fsum + ssum - intersection
    iou = (intersection) / (union)
    iou = round(iou, 3)
    return iou

def DICE_COE(segmentation_A, segmentation_B):
    # segmentation_A: lesion mask of a lesion
    # segmentation_A: lesion mask of a different lesion
    intersection = np.sum(segmentation_A*segmentation_B)
    fsum = np.sum(segmentation_A)
    ssum = np.sum(segmentation_B)
    dice = (2 * intersection) / (fsum + ssum)
    dice = round(dice, 3)
    return (dice)

def fbeta_score(segA,segB,beta):
    '''We want to limit the impact of the oversensitivity of the FUSE-AI solution by penalizing its False Negatives more. 
    This way the super large volumne is ignored '''
    intersection = np.sum(segA*segB)
    fsum = np.sum(segA)
    ssum = np.sum(segB)
    fn = ssum - intersection
    fp = fsum - intersection
    fscore = ((1+beta**2)*intersection)/(((1+beta**2)*intersection)+(beta**2 * fn) + fp)
    return round(fscore,3)

def get_segmentation_array(filename):
    seg = nib.load(filename)
    return seg.get_fdata()

def get_list_of_segmentations(filename_list):
    segmentation_list = []
    for filename in range(len(filename_list)):
        Case = filename_list[filename].split('-')[0]
        Case_name = filename_list[filename].split('_')[0]
        seg = get_segmentation_array(f"E:/ksa_study_data/{Case}/{Case_name}/{filename_list[filename]}")
        segmentation_list.append(seg)
    return segmentation_list

def get_filenames_of_Case(Case_segmentations):
    n_pcai100_auto = []
    n_rad1_rad = []
    n_rad1_ass = []
    n_rad2_rad = []
    n_rad2_ass = []
    n_rad3_rad = []
    n_rad3_ass = []

    for segmentation in Case_segmentations:
        if segmentation.split('.')[-3].split('_')[-1] == 'auto' and segmentation.split('.')[-3].split('_')[-2] == 'pcai100':
            n_pcai100_auto.append(segmentation)
        if segmentation.split('.')[-3].split('_')[-1] == 'rad' and segmentation.split('.')[-3].split('_')[-2] == 'rad1':
            n_rad1_rad.append(segmentation)
        if segmentation.split('.')[-3].split('_')[-1] == 'ass' and segmentation.split('.')[-3].split('_')[-2] == 'rad1':
            n_rad1_ass.append(segmentation)
        if segmentation.split('.')[-3].split('_')[-1] == 'rad' and segmentation.split('.')[-3].split('_')[-2] == 'rad2':
            n_rad2_rad.append(segmentation)
        if segmentation.split('.')[-3].split('_')[-1] == 'ass' and segmentation.split('.')[-3].split('_')[-2] == 'rad2':
            n_rad2_ass.append(segmentation)
        if segmentation.split('.')[-3].split('_')[-1] == 'rad' and segmentation.split('.')[-3].split('_')[-2] == 'rad3':
            n_rad3_rad.append(segmentation)
        if segmentation.split('.')[-3].split('_')[-1] == 'ass' and segmentation.split('.')[-3].split('_')[-2] == 'rad3':
            n_rad3_ass.append(segmentation)
    return(n_pcai100_auto,n_rad1_rad,n_rad1_ass,n_rad2_rad,n_rad2_ass,n_rad3_rad,n_rad3_ass)

def make_binary_array(seg_array):
    for i in range(seg_array.shape[0]):
        for j in range(seg_array.shape[1]):
            for k in range(seg_array.shape[2]):
                #if a voxel only belongs to array1:
                if seg_array[i,j,k] > 1:
                    seg_array[i,j,k] = 1
    return seg_array


def summed_segmentations(segmentation_list):

    while len(segmentation_list) > 1: #only add them up, if there are at least 2 segmentations left:

        summed_seg = np.where(segmentation_list[0] != 1, 0, 1) + np.where(segmentation_list[1] != 0, 1, 0)
        summed_seg_binary = make_binary_array(summed_seg)
        del segmentation_list[1]
        segmentation_list[0] = summed_seg_binary
    return segmentation_list[0]
        


 Here a comparison bewteen FUSE-AI prediction and the Reader 1-3 prediction is made. Both, the FUSE-AI and the Reader lesions segmentations are
 pooled togheter, so that we can then look at the DSC/IoU of the AI vs Reader1_man and AI vs Reader1_ass and see if there is a difference. 

In [58]:
#For each Case, Make an individual Dataframe to store the IoU values:

#Get all the Cases:
Cases = list(translation['Case'])
Cases.append('bell_0045')

#For each of the 67 Casees, create a dataframe in a list:
IoU_Case_dataframe_list = {Case + '_IoU': pd.DataFrame() for Case in Cases}
DSC_Case_dataframe_list = {Case + '_IoU': pd.DataFrame() for Case in Cases}
betascore_Case_dataframe_list = {Case + '_IoU': pd.DataFrame() for Case in Cases}


for Case in tqdm(Cases):
    #Get the Case name with the '-':
    Case_name = Case.split('_')[0] + '-' + Case.split('_')[1]
    #Get only the filenames for the specific Case:
    Case_segmentations = [i for i in lesion_filenames if i.startswith(Case_name)]
    #For these segmentations create a into the dataframe list with the dimensions of NxN, N beeing the number of segmentations of this Case:
    dataframe_name = Case + '_' + 'IoU'
    #create the row and column names:
    columns = [Case +'_fuseai',Case +'_rad1_rad',Case +'_rad1_ass',Case +'_rad2_rad',Case +'_rad2_ass',Case +'_rad3_rad',Case +'_rad3_ass']
    #Create the dataframe table:
    IoU_Case_dataframe_list[dataframe_name] = pd.DataFrame(columns=columns, index=columns)
    DSC_Case_dataframe_list[dataframe_name] = pd.DataFrame(columns=columns, index=columns)
    betascore_Case_dataframe_list[dataframe_name] = pd.DataFrame(columns=columns, index=columns)
    #If there is more than one predicted lesion for the FUSE-AI, we pool the prediction. 
    filenames_of_segs_to_get_pooled = get_filenames_of_Case(Case_segmentations)
    
    #For each Entry of this dataframe, calculate the IoU:
    for i, rowname in enumerate(IoU_Case_dataframe_list[dataframe_name].index):
        #Get the combined segmentation for this Reader (row):
        try:
            row_segmentation = summed_segmentations(get_list_of_segmentations(filenames_of_segs_to_get_pooled[i]))
            
        #if the lesion does not exist: make the entire row to 'no comparison':
        except:
            IoU_Case_dataframe_list[dataframe_name].loc[rowname] = 'no_comparison'
            DSC_Case_dataframe_list[dataframe_name].loc[rowname] = 'no_comparison'
            betascore_Case_dataframe_list[dataframe_name].loc[rowname] = 'no_comparison'
            continue
        for j,colname in enumerate(IoU_Case_dataframe_list[dataframe_name].columns):
            #Get the combined segmentation for this Reader (column):
            try:
                col_segmentation = summed_segmentations(get_list_of_segmentations(filenames_of_segs_to_get_pooled[j]))
                IoU_Case_dataframe_list[dataframe_name].loc[rowname,colname] = IoU_of_two_segmentations(row_segmentation,col_segmentation)
                DSC_Case_dataframe_list[dataframe_name].loc[rowname,colname] = DICE_COE(row_segmentation,col_segmentation)
                betascore_Case_dataframe_list[dataframe_name].loc[rowname,colname] = fbeta_score(row_segmentation,col_segmentation,2)


            except:
                IoU_Case_dataframe_list[dataframe_name].loc[rowname,colname] = 'no_comparison'
                DSC_Case_dataframe_list[dataframe_name].loc[rowname,colname] = 'no_comparison'
                betascore_Case_dataframe_list[dataframe_name].loc[rowname,colname] = 'no_comparison'


100%|██████████| 67/67 [1:14:59<00:00, 67.16s/it] 


In [62]:
#save it as a pickle file:
# with open(f"C:/Users/Joel Fischer/Documents/Masterarbeit/Studie/Projekt/Auswertungsdaten/Benchmark/Lesion_wise_analysis/IoU_combined_segmentations.pickle","wb") as f:
#     pickle.dump(IoU_Case_dataframe_list,f)

In [60]:
#save it as a pickle file:
# with open(f"C:/Users/Joel Fischer/Documents/Masterarbeit/Studie/Projekt/Auswertungsdaten/Benchmark/Lesion_wise_analysis/DSC_combined_segmentations.pickle","wb") as f:
#     pickle.dump(DSC_Case_dataframe_list,f)

In [61]:
 #save it as a pickle file:
# with open(f"C:/Users/Joel Fischer/Documents/Masterarbeit/Studie/Projekt/Auswertungsdaten/Benchmark/Lesion_wise_analysis/betascore_combined_segmentations.pickle","wb") as f:
#     pickle.dump(betascore_Case_dataframe_list,f)

# Part which calculates the iou for every lesion individually:

In [143]:
#For each Case, Make an individual Dataframe to store the IoU values:


#Get all the Cases:
Cases = list(translation['Case'])
Cases.append('bell_0045')

#For each of the 67 Casees, create a dataframe in a list:
IoU_Case_dataframe_list = {Case + '_IoU': pd.DataFrame() for Case in Cases}

for Case in ['bell_0001']:
    #Get the Case name with the '-':
    Case_name = Case.split('_')[0] + '-' + Case.split('_')[1]
    #Get only the filenames for the specific Case:
    Case_segmentations = [i for i in lesion_filenames if i.startswith(Case_name)]
    #For these segmentations create a into the dataframe list with the dimensions of NxN, N beeing the number of segmentations of this Case:
    dataframe_name = Case + '_' + 'IoU'
    IoU_Case_dataframe_list[dataframe_name] = pd.DataFrame(columns=Case_segmentations, index=Case_segmentations)

    #For each Entry of this dataframe, calculate the IoU:
    for rowname in IoU_Case_dataframe_list[dataframe_name].index:
        for colname in IoU_Case_dataframe_list[dataframe_name].columns:
            filename_SegA = f"E:/ksa_study_data/{Case.split('_')[0]}/{Case_name}/{rowname}"
            filename_SegB = f"E:/ksa_study_data/{Case.split('_')[0]}/{Case_name}/{colname}"
            IoU_Case_dataframe_list[dataframe_name].loc[rowname,colname] = IoU_of_two_segmentations(filename_SegA,filename_SegB)
