# Utils

In [1]:
# Mount drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# KAREN
# this is the script utils.py from the website
!pip install simpleitk

import csv
import os
import sys
import numpy as np
import SimpleITK as sitk
from scipy.ndimage import zoom

def readCsv(csvfname):
    # read csv to list of lists
    with open(csvfname, 'r') as csvf:
        reader = csv.reader(csvf)
        csvlines = list(reader)
    return csvlines

def writeCsv(csfname,rows):
    # write csv from list of lists
    with open(csfname, 'w', newline='') as csvf:
        filewriter = csv.writer(csvf)
        filewriter.writerows(rows)
        
def readMhd(filename):
    # read mhd/raw image
    itkimage = sitk.ReadImage(filename)
    scan = sitk.GetArrayFromImage(itkimage) #3D image
    spacing = itkimage.GetSpacing() #voxelsize
    origin = itkimage.GetOrigin() #world coordinates of origin
    transfmat = itkimage.GetDirection() #3D rotation matrix
    return scan,spacing,origin,transfmat

def getImgWorldTransfMats(spacing,transfmat):
    # calc image to world to image transformation matrixes
    transfmat = np.array([transfmat[0:3],transfmat[3:6],transfmat[6:9]])
    for d in range(3):
        transfmat[0:3,d] = transfmat[0:3,d]*spacing[d]
    transfmat_toworld = transfmat #image to world coordinates conversion matrix
    transfmat_toimg = np.linalg.inv(transfmat) #world to image coordinates conversion matrix
    
    return transfmat_toimg,transfmat_toworld

def convertToWorldCoord(xyz,origin,transfmat_toworld):
    # convert image to world coordinates
    xyz = np.matmul(transfmat_toworld,xyz)
    xyz = xyz + origin
    return xyz



# Code

In [0]:
# convert the centerpoint coordinates to world coordinates
def img_to_world_coords(center, origin, spacing, transfmat):
    _,transfmat_toworld = getImgWorldTransfMats(spacing,transfmat)
    return convertToWorldCoord(center, origin, transfmat_toworld)

In [0]:
# calculate confidence intervals
# these are given to me

In [0]:
# compare predicted centerpoint to ground truth centerpoint

# calculate the diameter of the ground truth nodule
def get_diameter(volume):
    return (6 * (volume / np.pi)) ** (1/3)

# calculate the euclidean distance between the centerpoint of the predicted
# nodule and the centerpoint of the ground truth nodule
def euclidean_dist(pred_x, pred_y, gt_x, gt_y):
    a = (gt_x - pred_x) ** 2
    b = (gt_y - pred_y) ** 2
    return np.sqrt(a + b)

# is the predicted nodule a match to the ground truth nodule?
# pred_nodule -> [x, y]
# gt_nodule -> [x, y, volume]
def is_match(pred_nodule, gt_nodule):
    isMatch = False
    diameter = get_diameter(gt_nodule[2])
    if diameter < 3:
        diameter = 3
    distance = euclidean_dist(pred_nodule[0], pred_nodule[1], gt_nodule[0], gt_nodule[1])
    if distance <= diameter:
        isMatch = True
    return isMatch

In [0]:
# get the caseid
import re
def get_caseid_and_index(str):
    return re.findall(r'\d+', str)

In [0]:
# read in IndexNodules.csv
indexNodules = readCsv('/content/drive/My Drive/IndexNodules.csv')
indexNodules.pop(0)
# read in predicted_nodules.csv
preds = readCsv('/content/drive/My Drive/predicted_nodules_70_MAX.csv')
# preds = readCsv('/content/drive/My Drive/predicted_nodules_70_1000.csv')
# preds = readCsv('/content/drive/My Drive/predicted_nodules_95_R50_DC5_1x.csv')
preds.pop(0)
for row in range(len(preds)):
    ids = get_caseid_and_index(preds[row][0])
    preds[row] = [int(ids[0])] + preds[row]
    preds[row][1] = int(ids[1])
# read in trainNodules_gt.csv (get volume)
ground_truths = readCsv('/content/drive/My Drive/LNDb/trainset_csv/trainNodules_gt.csv')
ground_truths.pop(0)

# true positives [agree_lvl 0, agree_lvl 1, agree_lvl 2, agree_lvl 3, total]
tp = [0, 0, 0, 0, 0]
# false positives [agree_lvl 0, agree_lvl 1, agree_lvl 2, agree_lvl 3, total]
fp = [0, 0, 0, 0, 0]
# false negatives [agree_lvl 0, agree_lvl 1, agree_lvl 2, agree_lvl 3, total]
fn = [0, 0, 0, 0, 0]
# true negatives [agree_lvl 0, agree_lvl 1, agree_lvl 2, agree_lvl 3, total]
tn = [0, 0, 0, 0, 0]

old_preds = []

for i in range(len(indexNodules)):
    gt_caseID = int(indexNodules[i][0])
    gt_index = int(indexNodules[i][1])
    agree_lvl = int(ground_truths[i][7])
    # is there an entry in predictions for that caseID?
    # if the top element in predictions has the same caseID (not greater than) (always top element because we are popping rows we have inspected already from predictions)
    pred_caseID = int(preds[0][0])
    pred_index = int(preds[0][1])
    if pred_caseID == gt_caseID:
        # if gt_index is LESS than pred_index, then a nodule was missed -> false negative...DO NOT remove the row from preds (could have a match later)
        if gt_index < pred_index:
            if int(ground_truths[i][8]) == 0: # a non-nodule
                tn[4] += 1
                tn[agree_lvl] += 1
            else: # missed an actual nodule
                # nodule missed
                fn[4] += 1
                fn[agree_lvl] += 1
        # if gt_index is GREATER than pred_index, then a nodule was predicted where there was none -> false positive...DO remove the row from preds
        elif gt_index > pred_index:
            # nodule was predicted on a slice where there was no nodule (!!!! IS THIS EVER POSSIBLE????)
            fp[4] += 1
            fp[agree_lvl] += 1
            old_preds.append(preds.pop(0))
        # if gt_index EQUALS pred_index, then check if nodule is a match
        else:
            pred_x = float(preds[0][2])
            pred_y = float(preds[0][3])
            gt_x = float(indexNodules[i][2])
            gt_y = float(indexNodules[i][3])
            gt_volume = float(ground_truths[i][9])
            # check if prediction is a match to the ground truth
            if is_match([pred_x, pred_y], [gt_x, gt_y, gt_volume]):
                if ground_truths[i][8] == 0:
                    # matched a non-nodule
                    fp[4] += 1
                    fp[agree_lvl] += 1
                else:
                    # matched a true nodule
                    tp[4] += 1
                    tp[agree_lvl] += 1
            else:
                # nodule is a miss
                fp[4] += 1
                fp[agree_lvl] += 1
                fn[4] += 1
                fn[agree_lvl] += 1
            pred = preds.pop(0)
            pred.append(int(indexNodules[i][4])) # append the z-value so that we can convert to world coordinates after
            old_preds.append(pred)
    else:
        fn[4] += 1
        fn[agree_lvl] += 1
    if len(preds) == 0:
        break

In [8]:
print(tp)
print(fp)
print(fn)
print(tn)

[172, 257, 126, 35, 590]
[167, 173, 40, 4, 384]
[224, 290, 51, 9, 574]
[54, 0, 0, 0, 54]


In [9]:
# fix up the list of predictions
preds1 = []
for i in range(len(old_preds)):
    old_preds[i][2] = float(old_preds[i][2])
    old_preds[i][3] = float(old_preds[i][3])
    tmp = float(old_preds[i][4])
    old_preds[i][4] = old_preds[i][5]
    old_preds[i][5] = tmp
    old_preds[i].pop(1)
    preds1.append(old_preds[i])
print(preds1)


[[1, 423.40997314453125, 271.452392578125, 252, 0.7469912767410278], [2, 362.42041015625, 352.7093505859375, 170, 0.8199936747550964], [2, 166.93450927734375, 300.2872314453125, 182, 0.7470126152038574], [2, 333.701416015625, 394.01751708984375, 218, 0.8617516160011292], [2, 301.83428955078125, 354.5638122558594, 236, 0.8237185478210449], [2, 404.8975830078125, 358.13275146484375, 90, 0.896274745464325], [3, 121.4024658203125, 289.7017822265625, 185, 0.9150320887565613], [3, 119.96788024902344, 315.1512451171875, 198, 0.9590526223182678], [3, 385.770751953125, 328.81781005859375, 200, 0.7276769280433655], [3, 322.27783203125, 243.60116577148438, 189, 0.9301328659057617], [3, 131.9813995361328, 280.89984130859375, 240, 0.759917676448822], [4, 374.71173095703125, 194.89865112304688, 162, 0.7056125998497009], [5, 125.7776107788086, 369.5992431640625, 154, 0.7880538702011108], [7, 136.19830322265625, 175.6374053955078, 65, 0.8291102051734924], [8, 83.39476013183594, 312.27728271484375, 85,

In [0]:
# read in the case information file
caseInfo = readCsv('/content/drive/My Drive/CaseInformation.csv')
caseInfo.pop(0)
case_info = caseInfo

In [11]:
preds = preds1
caseInfo = case_info
world_preds = []
transfmat = [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0]

for i in range(len(preds)):
    case = caseInfo[0]
    caseID = int(case[0])
    pred = preds[i]
    predCaseID = int(pred[0])
    if predCaseID > caseID:
        caseInfo.pop(0)
        case = caseInfo[0]
        caseID = int(case[0])
    if caseID == predCaseID:
        tmp = pred
        pred_ctr = [float(pred[1]), float(pred[2]), float(pred[3])]
        case_origin = [float(case[4]), float(case[5]), float(case[6])]
        case_spacing = [float(case[1]), float(case[2]), float(case[3])]
        # convert to world coordinates
        wrld_xyz = img_to_world_coords(pred_ctr, case_origin, case_spacing, transfmat)
        tmp[1] = wrld_xyz[0]
        tmp[2] = wrld_xyz[1]
        tmp[3] = wrld_xyz[2]
        world_preds.append(tmp)
    if len(preds) == 0:
        break
print(world_preds)

[[1, 98.99219068115082, -144.31016782695923, -45.5, 0.7469912767410278], [2, 88.47553159056397, -124.56119321526796, -129.5, 0.8199936747550964], [2, -53.175384866612234, -162.54675220193178, -117.5, 0.7470126152038574], [2, 67.66547919569703, -94.62890850247652, -81.5, 0.8617516160011292], [2, 44.57426060496064, -123.21743290366442, -63.5, 0.8237185478210449], [2, 119.25488926230165, -120.63134609402925, -209.5, 0.896274745464325], [3, -113.04933211687623, -177.48226401690061, -101.5, 0.9150320887565613], [3, -114.11966744545518, -158.49457881334837, -88.5, 0.9590526223182678], [3, 84.19419386502685, -148.29804012659605, -86.5, 0.7276769280433655], [3, 36.82252314206542, -211.87764582518156, -97.5, 0.9301328659057617], [3, -105.15645578983842, -184.04933712366636, -46.5, 0.759917676448822], [4, 93.98492537678985, -219.20213348923414, -644.5, 0.7056125998497009], [5, -83.4509628093073, -91.44251991568296, -657.5, 0.7880538702011108], [7, -89.82571623209535, -202.09786442402418, -252.0,

In [0]:
headers = ['LNDbID', 'x', 'y', 'z', 'Nodule']

world_preds = [headers] + world_preds
writeCsv('/content/drive/My Drive/predictedNodulesA.csv', world_preds)