In [1]:
import json

with open('_annotations_origin_aug.json') as f:
    detection_test_data = json.load(f)

In [2]:
detection_test_data.keys()

dict_keys(['images', 'categories', 'annotations'])

In [23]:
detection_test_data['images'][0]

{'file_name': '0-(125)-[0]_0_1.3.6.1.4.1.14519.5.2.1.6279.6001.997611074084993415992563148335.png',
 'height': 640,
 'width': 640,
 'id': 0}

In [30]:
detection_test_data['images'][0]['file_name'].split('(')[1].split(')')[0]

'125'

In [12]:
series_uid = detection_test_data['images'][0]['file_name'].split('_')[-1].replace('.png', '')
i_idx = int(detection_test_data['images'][0]['file_name'].split('(')[1].split(')')[0])
isMal = detection_test_data['annotations'][0]['category_id']

In [28]:
series_uid

'1.3.6.1.4.1.14519.5.2.1.6279.6001.997611074084993415992563148335'

In [33]:
i_idx

125

In [29]:
isMal

1

In [68]:
# Ground Truth Input
import os
import collections
from collections import namedtuple
import glob
import SimpleITK as sitk
import csv
import functools
from util import XyzTuple, xyz2irc, logging, getCache
from pylidc_func import masks_build

import ast

In [78]:
CandidateInfoTuple = namedtuple('CandidateInfoTuple', 'isNodule_bool, hasAnnotation_bool, isMal_bool, diameter_mm, series_uid, center_xyz, classes')

def getCandidateInfoDict(requireOnDisk_bool=True):  #把candidateInfoList包成Dict
    candidateInfo_list = getCandidateInfoList(requireOnDisk_bool)
    candidateInfo_dict = {}

    for candidateInfo_tup in candidateInfo_list:
        candidateInfo_dict.setdefault(candidateInfo_tup.series_uid,
                                        []).append(candidateInfo_tup)

    return candidateInfo_dict

def getCandidateInfoList(requireOnDisk_bool=True):
    mhd_list = glob.glob('D:/IMProject/LunaData/subset*/*.mhd')
    # mhd_list = glob.glob('D:/IMProject/LunaData/subset0/*.mhd')
    presentOnDisk_set = {os.path.split(p)[-1][:-4] for p in mhd_list}

    candidateInfo_list = []
    with open('D:/IMProject/LunaData/annotations_with_malignancy.csv', "r") as f:
        for row in list(csv.reader(f))[1:]:
            series_uid = row[0]
            annotationCenter_xyz = tuple([float(x) for x in row[1:4]])
            annotationDiameter_mm = float(row[4])
            isMal_bool = {'False': False, 'True': True}[row[5]] #it record the malignancy or not
            classes = 0 if isMal_bool else 1

            if series_uid not in presentOnDisk_set and requireOnDisk_bool:
                continue

            candidateInfo_list.append(
                CandidateInfoTuple(
                    True,
                    True,
                    isMal_bool,
                    annotationDiameter_mm,
                    series_uid,
                    annotationCenter_xyz,
                    classes
                )
            )

    candidateInfo_list.sort(reverse=True)
    return candidateInfo_list

DetectionDataInfoTuple = namedtuple('DetectionDataInfoTuple', 'isNodule_bool, hasAnnotation_bool, isMal_bool, diameter_mm, series_uid, center_xyz, z_range, xy_ranges')

def getDetectionDataList(requireOnDisk_bool=True):
    mhd_list = glob.glob('D:/IMProject/LunaData/subset*/*.mhd')
    # mhd_list = glob.glob('D:/IMProject/LunaData/subset0/*.mhd')
    presentOnDisk_set = {os.path.split(p)[-1][:-4] for p in mhd_list}

    detectionDataInfo_list = []
    with open('D:/IMProject/annotations_object_detection_1127.csv', "r") as f:
        for row in list(csv.reader(f))[1:]:
            series_uid = row[0]
            annotationCenter_xyz = tuple([float(x) for x in row[1:4]])
            annotationDiameter_mm = float(row[4])
            isMal_bool = row[5] == 1.0
            z_range = ast.literal_eval(row[7])
            # print(cbbox, type(cbbox))
            xy_ranges = ast.literal_eval(row[8])
            # print(cbbox_detail, type(cbbox_detail)
            
#             if series_uid not in presentOnDisk_set and requireOnDisk_bool:
#                 continue

            detectionDataInfo_list.append(
                DetectionDataInfoTuple(
                    True,
                    True,
                    isMal_bool,
                    annotationDiameter_mm,
                    series_uid,
                    annotationCenter_xyz,
                    z_range,
                    xy_ranges
                )
            )

    detectionDataInfo_list.sort(reverse=True)
    return detectionDataInfo_list

In [147]:
class Ct:
    def __init__(self, series_uid):
        mhd_path = glob.glob('D:/IMProject/LunaData/subset*/{}.mhd'.format(series_uid))
        # mhd_path = glob.glob('D:/IMProject/LunaData/subset0/{}.mhd'.format(series_uid))
        # print(mhd_path)

        ct_mhd = sitk.ReadImage(mhd_path)
        if ct_mhd.GetDimension()==4 and ct_mhd.GetSize()[3]==1:
            ct_mhd = ct_mhd[...,0]
        self.hu_a = np.array(sitk.GetArrayFromImage(ct_mhd), dtype=np.float32)

        # CTs are natively expressed in https://en.wikipedia.org/wiki/Hounsfield_scale
        # HU are scaled oddly, with 0 g/cc (air, approximately) being -1000 and 1 g/cc (water) being 0.

        self.series_uid = series_uid
        self.origin_xyz = XyzTuple(*ct_mhd.GetOrigin())
        self.vxSize_xyz = XyzTuple(*ct_mhd.GetSpacing())
        self.direction_a = np.array(ct_mhd.GetDirection()).reshape(3, 3)

        self.mask = masks_build(series_uid, self.hu_a)
        
    def getRawCandidate(self, center_xyz, width_irc):
        center_irc = xyz2irc(center_xyz, self.origin_xyz, self.vxSize_xyz, self.direction_a)

        slice_list = []
        for axis, center_val in enumerate(center_irc):
            start_ndx = int(round(center_val - width_irc[axis]/2))
            end_ndx = int(start_ndx + width_irc[axis])

            assert center_val >= 0 and center_val < self.hu_a.shape[axis], repr([self.series_uid, center_xyz, self.origin_xyz, self.vxSize_xyz, center_irc, axis])

            if start_ndx < 0:
                # log.warning("Crop outside of CT array: {} {}, center:{} shape:{} width:{}".format(
                #     self.series_uid, center_xyz, center_irc, self.hu_a.shape, width_irc))
                start_ndx = 0
                end_ndx = int(width_irc[axis])

            if end_ndx > self.hu_a.shape[axis]:
                # log.warning("Crop outside of CT array: {} {}, center:{} shape:{} width:{}".format(
                #     self.series_uid, center_xyz, center_irc, self.hu_a.shape, width_irc))
                end_ndx = self.hu_a.shape[axis]
                start_ndx = int(self.hu_a.shape[axis] - width_irc[axis])

            slice_list.append(slice(start_ndx, end_ndx))

        ct_chunk = self.hu_a[tuple(slice_list)]
        mask_chunk = self.mask[tuple(slice_list)]

        return ct_chunk, center_irc, mask_chunk
    
    def getDetectionTestCandidate(self, center_irc, width_irc):
        slice_list = []
        for axis, center_val in enumerate(center_irc):
            start_ndx = int(round(center_val - width_irc[axis]/2))
            end_ndx = int(start_ndx + width_irc[axis])

            assert center_val >= 0 and center_val < self.hu_a.shape[axis], repr([self.series_uid, center_xyz, self.origin_xyz, self.vxSize_xyz, center_irc, axis])

            if start_ndx < 0:
                # log.warning("Crop outside of CT array: {} {}, center:{} shape:{} width:{}".format(
                #     self.series_uid, center_xyz, center_irc, self.hu_a.shape, width_irc))
                start_ndx = 0
                end_ndx = int(width_irc[axis])

            if end_ndx > self.hu_a.shape[axis]:
                # log.warning("Crop outside of CT array: {} {}, center:{} shape:{} width:{}".format(
                #     self.series_uid, center_xyz, center_irc, self.hu_a.shape, width_irc))
                end_ndx = self.hu_a.shape[axis]
                start_ndx = int(self.hu_a.shape[axis] - width_irc[axis])

            slice_list.append(slice(start_ndx, end_ndx))

        ct_chunk = self.hu_a[tuple(slice_list)]
        mask_chunk = self.mask[tuple(slice_list)]

        return ct_chunk, mask_chunk

In [80]:
candi_list = getDetectionDataList()
candi_list[0]

DetectionDataInfoTuple(isNodule_bool=True, hasAnnotation_bool=True, isMal_bool=False, diameter_mm=32.27003025, series_uid='1.3.6.1.4.1.14519.5.2.1.6279.6001.287966244644280690737019247886', center_xyz=(67.82725575, 85.37992457, -109.7467238), z_range=[85, 100], xy_ranges=[[0.6712328767, 0.6976516634, 0.0390625, 0.033203125], [0.6722113503, 0.7035225049, 0.060546875, 0.048828125], [0.6702544031, 0.7015655577, 0.068359375, 0.064453125], [0.6722113503, 0.7015655577, 0.080078125, 0.068359375], [0.6712328767, 0.7015655577, 0.0859375, 0.072265625], [0.6712328767, 0.7025440313, 0.08984375, 0.07421875], [0.6722113503, 0.7045009785, 0.087890625, 0.078125], [0.6692759295, 0.7054794521, 0.0859375, 0.080078125], [0.668297456, 0.7064579256, 0.083984375, 0.08203125], [0.6663405088, 0.7084148728, 0.080078125, 0.07421875], [0.6653620352, 0.7074363992, 0.07421875, 0.068359375], [0.6604696673, 0.7093933464, 0.056640625, 0.060546875], [0.6614481409, 0.7133072407, 0.04296875, 0.056640625], [0.6634050881, 

In [81]:
len(candi_list)

1182

In [109]:
matches = [x for x in candi_list if x.series_uid=='1.3.6.1.4.1.14519.5.2.1.6279.6001.603126300703296693942875967838']

In [110]:
matches[0]

DetectionDataInfoTuple(isNodule_bool=True, hasAnnotation_bool=True, isMal_bool=False, diameter_mm=10.16765351, series_uid='1.3.6.1.4.1.14519.5.2.1.6279.6001.603126300703296693942875967838', center_xyz=(-31.90828695, 27.18117285, -170.1513701), z_range=[63, 68], xy_ranges=[[0.469667319, 0.5694716243, 0.0234375, 0.02734375], [0.4706457926, 0.5694716243, 0.029296875, 0.03125], [0.4706457926, 0.5684931507, 0.029296875, 0.029296875], [0.469667319, 0.5704500978, 0.02734375, 0.029296875], [0.4686888454, 0.5733855186, 0.017578125, 0.015625]])

In [111]:
65 - matches[0].z_range[0]

2

In [113]:
matches[0].xy_ranges[65 - matches[0].z_range[0]][0]

0.4706457926

In [35]:
ct = Ct('1.3.6.1.4.1.14519.5.2.1.6279.6001.603126300703296693942875967838')

<__main__.Ct at 0x1cd00041a90>

In [38]:
for c in matches:
    c_irc = xyz2irc(c.center_xyz, ct.origin_xyz, ct.vxSize_xyz, ct.direction_a)
    print(c_irc)

IrcTuple(index=65, row=395, col=366)
IrcTuple(index=51, row=315, col=119)
IrcTuple(index=72, row=337, col=413)


In [85]:
for i, d in enumerate(detection_test_data['images']):
    s_uid = d['file_name'].split('_')[-1].replace('.png', '')
    i_idx = int(d['file_name'].split('(')[1].split(')')[0])
    isMal_B = {0: False, 1: True}[detection_test_data['annotations'][i]['category_id']]
    # print(s_uid)
    # print(i_idx)
    # print(isMal_B)
    matches = [x for x in candi_list if x.series_uid==s_uid and x.isMal_bool==isMal_B]
    
    if len(matches) > 1:
        print(f"{s_uid}, with {i_idx} has {len(matches)} matches")
        ct = Ct(s_uid)
        closest_irc = xyz2irc(matches[0].center_xyz, ct.origin_xyz, ct.vxSize_xyz, ct.direction_a)
        # print("matches[0] =", closest_irc)
        closest_dist = abs(closest_irc.index - i_idx)
        for c in matches[1:]:
            c_irc = xyz2irc(c.center_xyz, ct.origin_xyz, ct.vxSize_xyz, ct.direction_a)
            # print("cur_irc =", c_irc)
            cur_dist = abs(c_irc.index - i_idx)
            if cur_dist == closest_dist:
                print("OH NO! They are the same")
            elif cur_dist < closest_dist:
                closest_irc = c_irc
                closest_dist = cur_dist
        
        print(f"closest to i_idx = {i_idx} is {closest_irc}")
        
        # new_center_irc = (i_idx, closest_irc.row, closest_irc.col)

        break

1.3.6.1.4.1.14519.5.2.1.6279.6001.603126300703296693942875967838, with 52 has 4 matches
OH NO! They are the same
closest to i_idx = 52 is IrcTuple(index=51, row=315, col=119)


In [101]:
detection_test_data['annotations'][1]['bbox'][1] / 640

0.559375

In [152]:
import json

width_irc = (1, 64, 64)
with open('_annotations_origin_aug.json') as f:
    detection_test_data = json.load(f)
    size = len(detection_test_data['images'])
    for i, d in enumerate(detection_test_data['images']):
        s_uid = d['file_name'].split('_')[-1].replace('.png', '')
        i_idx = int(d['file_name'].split('(')[1].split(')')[0])
        isMal_B = {0: False, 1: True}[detection_test_data['annotations'][i]['category_id']]

        # print(s_uid)
        # print(i_idx)
        # print(isMal_B)
        # matches = [x for x in candi_list if x.series_uid==s_uid and x.isMal_bool==isMal_B and i_idx >= x.z_range[0] and i_idx < x.z_range[1]]

        # if len(matches) > 1:
            # print(f"{s_uid}, with {i_idx} has {len(matches)} matches")
        x_rate = (detection_test_data['annotations'][i]['bbox'][0] + 0.5 * detection_test_data['annotations'][i]['bbox'][2]) / 640
        y_rate = (detection_test_data['annotations'][i]['bbox'][1] + 0.5 * detection_test_data['annotations'][i]['bbox'][3]) / 640
            # Found = False
            # foundCand = None
            # for c in matches:
            #     # print(f"getting index {i_idx} in range {c.z_range} = {i_idx - c.z_range[0]} from {len(c.xy_ranges)}")
            #     if c.xy_ranges[i_idx - c.z_range[0]] == 'nan':
            #         # print("This is nan")
            #         continue
            #     # print(f"x = {x_rate}, y = {y_rate} with candidate x = {c.xy_ranges[i_idx - c.z_range[0]][0]}, y = {c.xy_ranges[i_idx - c.z_range[0]][1]}")
            #     # print(f"cur range = {c.z_range}")
            #     if math.isclose(x_rate, c.xy_ranges[i_idx - c.z_range[0]][0], abs_tol = 0.0025) and math.isclose(y_rate, c.xy_ranges[i_idx - c.z_range[0]][1], abs_tol = 0.0025):
            #         if Found:
            #             print(f"OH NO! Found {i_idx} in {foundCand} with currrent {c}")
            #         else:
            #             print("Found it!")
            #             Found = True
            #             foundCand = c
            # if not Found:
            #     print("OMG That is a big problem!!!!!")
        x_real = round(x_rate * 512)
        y_real = round(y_rate * 512)
        ct = Ct(s_uid)
            # center_irc = xyz2irc(foundCand.center_xyz, ct.origin_xyz, ct.vxSize_xyz, ct.direction_a)
            # print(f"Real z = {i_idx}, x = {x_real}, y = {y_real}. With center_irc = {center_irc}")
            # break
        new_center_irc = (i_idx, y_real, x_real)
            # break

        candidate_a, mask_a = ct.getDetectionTestCandidate(new_center_irc, width_irc)
        
        candidate_t = torch.from_numpy(candidate_a).to(torch.float32)
        mask_t = torch.from_numpy(mask_a).to(torch.float32)

        label_t = torch.tensor([False, False], dtype=torch.float32)

        if not isMal_B:
            label_t[0] = True
            index_t = 0
        else:
            label_t[1] = True
            index_t = 1

        torch.save(candidate_t, f'DetectionTest/{width_irc[0]}_{width_irc[1]}_{width_irc[2]}/{i}.pt')
        torch.save(label_t, f'DetectionTest/{width_irc[0]}_{width_irc[1]}_{width_irc[2]}/{i}_label.pt')
        torch.save(mask_t, f'DetectionTest/{width_irc[0]}_{width_irc[1]}_{width_irc[2]}/{i}_mask.pt')
        if i % 50 == 0:
            print(f"Saving Image [{i+1} / {size}] at LunaData/{width_irc[0]}_{width_irc[1]}_{width_irc[2]}")

Saving Image [1 / 789] at LunaData/1_64_64
Saving Image [51 / 789] at LunaData/1_64_64
Saving Image [101 / 789] at LunaData/1_64_64
Failed to reduce all groups to <= 4 Annotations.
Some nodules may be close and must be grouped manually.
Failed to reduce all groups to <= 4 Annotations.
Some nodules may be close and must be grouped manually.
Failed to reduce all groups to <= 4 Annotations.
Some nodules may be close and must be grouped manually.
Failed to reduce all groups to <= 4 Annotations.
Some nodules may be close and must be grouped manually.
Saving Image [151 / 789] at LunaData/1_64_64
Saving Image [201 / 789] at LunaData/1_64_64
Saving Image [251 / 789] at LunaData/1_64_64
Failed to reduce all groups to <= 4 Annotations.
Some nodules may be close and must be grouped manually.
Failed to reduce all groups to <= 4 Annotations.
Some nodules may be close and must be grouped manually.
Failed to reduce all groups to <= 4 Annotations.
Some nodules may be close and must be grouped manually

In [None]:
def calculate_box(cmask, cbbox):
    #print(cbbox)
    
    cmask_reshape = np.transpose(np.array(cmask), (2, 1, 0))
    #print(cmask_reshape)
    ret = []
    
    for z_mask in cmask_reshape:
        min_x, max_x, min_y, max_y = 511, 0, 511, 0
        for i in range(len(z_mask)):
            for j in range(len(z_mask[i])):
                if z_mask[i][j]:
                    if i <= min_y:
                        min_y = i
                    if j <= min_x:
                        min_x = j
                    if i >= max_y:
                        max_y = i
                    if j >= max_x:
                        max_x = j
        #print("cbbox:", cbbox)
        #print("minx, maxx, miny, maxy: ", (min_x, max_x, min_y, max_y))
        if min_x == 511 and max_x == 0 and min_y == 511 and max_y == 0:
            ret.append('nan')
        else:
            center_x = round(((min_x + max_x) / 2 + cbbox[0].start) / 511, 10)
            center_y = round(((min_y + max_y) / 2 + cbbox[1].start) / 511, 10)
            width_x = round((max_x - min_x + 4) / 512, 10)
            height_y = round((max_y - min_y + 4) / 512, 10)
            #print("centerx, centery, width, height: ", center_x, center_y, width_x, height_y)
            ret.append([center_y, center_x, height_y, width_x])
        
        #print(ret)
        
    return ret

In [119]:
from sklearn.metrics import confusion_matrix

a = [0,0,0,1,1,1]
b = [0,0,1,1,1,1]

tn, fp, fn, tp = confusion_matrix(a, b).ravel()
(tn, fp, fn, tp)

(2, 1, 0, 3)

In [128]:
myRecall = tp / (tp + fn)
myRecall

1.0

In [120]:
recall_score(a, b)

1.0

In [145]:
rw = recall_score(a, b, average='weighted')
rw

0.8333333333333334

In [129]:
myPrecision = tp / (tp + fp)
myPrecision

0.75

In [147]:
precision_score(a, b)

0.75

In [148]:
pw = precision_score(a, b, average="weighted")
pw

0.875

In [133]:
myF1 = (2 * myPrecision * myRecall) / (myPrecision + myRecall)
myF1

0.8571428571428571

In [141]:
f1_score(a, b, average="binary")

0.8571428571428571

In [146]:
f1 = (2 * pw * rw) / (pw + rw)
f1

0.8536585365853658

In [142]:
f1_score(a, b, average='weighted')

0.8285714285714286

In [3]:
from PIL import Image, ImageDraw
def np2Png(np_arr, target_name):
    min_value = np.min(np_arr)
    max_value = np.max(np_arr)
    scaled_np_arr = np_arr * 255
    scaled_np_arr = scaled_np_arr.astype(np.uint8)
    slice_ori = Image.fromarray(scaled_np_arr, mode='L')
    slice_ori.save(target_name)

In [58]:
import glob
ct_path = glob.glob('D:/IMProject/DetectionData/Train/*_ct.pt')
mask_path = glob.glob('D:/IMProject/DetectionData/Train/*_mask.pt')
label_path = glob.glob('D:/IMProject/DetectionData/Train/*_label.pt')

In [59]:
len(mask_path)

37926

In [51]:
mask_path[0]

'D:/IMProject/DetectionData/Train\\0-(116)-[0]_0_1.3.6.1.4.1.14519.5.2.1.6279.6001.997611074084993415992563148335_mask.pt'

In [65]:
len(ct_path)

37926

In [64]:
len(label_path)

37914

In [46]:
# img = torch.load(f'DetectionTest/1_64_64/0_mask.pt').numpy()
img = torch.load(mask_path[4]).numpy()
img[0]

array([[ 42.,  52.,  38., ..., 124., 134., 135.],
       [ 35.,  61.,  56., ..., 120., 131., 138.],
       [ 35.,  45.,  51., ..., 117., 114., 128.],
       ...,
       [164., 179., 201., ..., 235., 240., 248.],
       [180., 181., 191., ..., 244., 244., 256.],
       [179., 188., 196., ..., 254., 258., 263.]], dtype=float32)

In [47]:
np2Png(img[0], "test.png")

In [76]:
ct_vpath = glob.glob('D:/IMProject/DetectionData/Valid/*_ct.pt')
mask_vpath = glob.glob('D:/IMProject/DetectionData/Valid/*_mask.pt')
label_vpath = glob.glob('D:/IMProject/DetectionData/Valid/*_label.pt')

In [77]:
len(ct_vpath)

789

In [78]:
len(mask_vpath)

789

In [79]:
len(label_vpath)

789

In [71]:
ct_tpath = glob.glob('D:/IMProject/DetectionData/Test/*_ct.pt')
mask_tpath = glob.glob('D:/IMProject/DetectionData/Test/*_mask.pt')
label_tpath = glob.glob('D:/IMProject/DetectionData/Test/*_label.pt')

In [75]:
len(ct_tpath)

7905

In [73]:
len(mask_tpath)

7905

In [74]:
len(label_tpath)

7905