In [1]:
### Code to convert segment data to coordinate 
### characteristics:
### - consider only the bigger nodule as the real one!

In [2]:
import SimpleITK as sitk
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
import numpy as np
import glob
import matplotlib
from utils import (
    load_itk, resample, 
    gen_coordinate_and_masks
)
from skimage import measure
from matplotlib import pyplot as plt
from tqdm import tqdm
import pandas as pd

from sklearn.cluster import KMeans

### HC dataset

In [3]:
MASK_PATH = r'/data/HC_Masks_resample/*/*.nii.gz'
mask_list = glob.glob(MASK_PATH)
print(len(mask_list))
print(mask_list[0])

101
/data/HC_Masks_resample/PL032303325116347/PL032303325116347.nii.gz


In [4]:
# mask_list = list(filter(lambda k: 'PL652297908013408' in k, mask_list))
# print(len(mask_list))

### MSD dataset

In [5]:
# MASK_PATH = r'/data/MSD_Masks_resample/*/lung*'
# mask_list = glob.glob(MASK_PATH)
# print(len(mask_list))
# print(mask_list[0])

### batch execution

In [6]:
nodule_coordinates = []
dataset = 'HC'
for mask in tqdm(mask_list):
    img, origin, spacing = load_itk(mask)    
    img = img.transpose(2, 1, 0)
    nodule_mask = img.copy()
    # nodule_mask, spacing = resample(img, spacing)

    coord_nodule_mask = gen_coordinate_and_masks(nodule_mask)

    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111, projection='3d')
    
    # p = nodule_mask.transpose(2, 1, 0)
    p = nodule_mask.copy()
    verts, faces, normals, values = measure.marching_cubes(p, 0)
    mesh = Poly3DCollection(verts[faces], alpha=0.7)
    face_color = [1, 0, 0, 0.7] # red
    mesh.set_facecolor(face_color)
    ax.add_collection3d(mesh)

    # p = coord_nodule_mask['nodule_bounding_box'].transpose(2, 1, 0)
    p = coord_nodule_mask['nodule_bounding_box'].copy()
    verts, faces, normals, values = measure.marching_cubes(p, 0)
    mesh = Poly3DCollection(verts[faces], alpha=0.20)
    face_color = [0.45, 0.45, 0.75] # blue
    mesh.set_facecolor(face_color)
    ax.add_collection3d(mesh)

    # p = coord_nodule_mask['outlier_mask'].transpose(2, 1, 0)
    # verts, faces, normals, values = measure.marching_cubes(p, 0)
    # mesh = Poly3DCollection(verts[faces], alpha=0.20)
    # face_color = [0, 0, 0] # black
    # mesh.set_facecolor(face_color)
    # ax.add_collection3d(mesh)
    
    ax.set_xlim(0, p.shape[0])
    ax.set_ylim(0, p.shape[1])
    ax.set_zlim(0, p.shape[2])

    # ax.view_init(20, 95)

    raw_name = None
    if dataset == 'HC':
        raw_name = mask.split('/')[3]
    if dataset == 'MSD':
        raw_name = mask.split('/')[3].split('.')[0]
    plt.savefig(
        '/data/' + dataset + \
        '-clean-nodules-coord-preview/' + \
        raw_name + '.png'
    )
    plt.close()
    matplotlib.use('Agg')

    nodule_coordinates.append({
        "name": raw_name,
        "spacing": spacing,
        "origin": origin,
        "exam_refined": coord_nodule_mask["exam_refined"],
        "x_min": coord_nodule_mask["x_min"],
        "y_min": coord_nodule_mask["y_min"],
        "z_min": coord_nodule_mask["z_min"],
        "x_max": coord_nodule_mask["x_max"],
        "y_max": coord_nodule_mask["y_max"],
        "z_max": coord_nodule_mask["z_max"]
    })

    plt.show()

100%|██████████| 101/101 [8:37:39<00:00, 307.52s/it]   


In [7]:
np.unique(coord_nodule_mask['nodule_bounding_box'], return_counts=True)

(array([0., 1.]), array([114426669,     40467]))

In [8]:
nodule_coordinates = pd.DataFrame(nodule_coordinates)
nodule_coordinates.to_csv('/data/output/hc_clean_nodules3.csv', index=False)
nodule_coordinates.head()

Unnamed: 0,name,spacing,origin,exam_refined,x_min,y_min,z_min,x_max,y_max,z_max
0,PL032303325116347,"[1.25, 0.703125, 0.703125]","[-239.75, 121.796875, 173.5968780517578]",False,175,171,62,201,200,76
1,PL061850842690412,"[1.25, 0.703125, 0.703125]","[-241.5, 390.023193359375, 255.02317810058594]",False,455,256,227,487,288,240
2,PL064775189851313,"[1.25, 0.703125, 0.703125]","[-13.5, 372.078125, 234.078125]",False,206,231,195,240,260,208
3,PL070153620449075,"[1.25, 0.703125, 0.703125]","[16.5, 369.023193359375, 246.02317810058594]",False,445,330,222,514,388,247
4,PL075999918976403,"[1.25, 0.703125, 0.703125]","[356.9999694824219, 308.0977783203125, 218.097...",False,359,208,181,419,266,216


In [9]:
int('aa')

ValueError: invalid literal for int() with base 10: 'aa'

### unique exam

In [None]:
%%time
img, origin, spacing = load_itk(mask_list[0])

print(np.unique(img, return_counts=True))
print(img.shape)
print(origin, spacing)

In [None]:
%%time
img2, spacing2 = resample(img, spacing, [1.25, 0.703125, 0.703125])

print(np.unique(img2, return_counts=True))
print(img2.shape)
print(origin, spacing2)

In [None]:
%%time
x, y, z = img2.shape

count = 0
coordinates = []
for i in range(x):
    for j in range(y):
        for k in range(z):
             if img2[i][j][k] == 1:
                 coordinates.append((i, j, k))

print(len(coordinates))

In [None]:
%%time
from sklearn.neighbors import NearestNeighbors

X = np.array(coordinates)
nbrs = NearestNeighbors(n_neighbors=100, algorithm='ball_tree').fit(X)
distances, indices = nbrs.kneighbors(X)

In [None]:
distances.shape

In [None]:
print(type(distances))

In [None]:
disto = []
for index, dista in enumerate(distances):
    disto.append({
        "index": index,
        "distance": dista.sum()
    })

print(len(disto))

In [None]:
sorted_disto = sorted(disto, key=lambda x:x["distance"], reverse=True)

In [None]:
sorted_disto[0:20]

In [None]:
outliers = []
for unique_value in sorted_disto:
    if unique_value['distance'] > 1500:
        outliers.append(unique_value)

print(outliers)

In [None]:
%%time
xs = []
ys = []
zs = []
for a in range(len(coordinates)):
    xs.append(coordinates[a][0])
    ys.append(coordinates[a][1])
    zs.append(coordinates[a][2])

In [None]:
print(np.min(xs))
print(np.min(ys))
print(np.min(zs))
print(np.max(xs))
print(np.max(ys)) 
print(np.max(zs)) 

In [None]:
%%time
p = img2.transpose(2, 1, 0)
verts, faces, normals, values = measure.marching_cubes(p, 0)

fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111, projection='3d')

mesh = Poly3DCollection(verts[faces], alpha=0.05)
face_color = [1, 0, 0, 0.7]
mesh.set_facecolor(face_color)
ax.add_collection3d(mesh)

### bounding box
# axes = [x, y, z]
# data = np.zeros(axes)

# for i in range(np.min(xs), np.max(xs)):
#     for j in range(np.min(ys), np.max(ys)):
#         for k in range(np.min(zs), np.max(zs)):
#             data[i][j][k] = 1

# p = data.transpose(2, 1, 0)
# verts, faces, normals, values = measure.marching_cubes(p, 0)

# mesh = Poly3DCollection(verts[faces], alpha=0.20)
# face_color = [0.45, 0.45, 0.75]
# mesh.set_facecolor(face_color)
# ax.add_collection3d(mesh)

### kmeans center
axes = [x, y, z]
data = np.zeros(axes)

# for kmeans_clusters in kmeans_output:
#     data[int(kmeans_clusters[0])][int(kmeans_clusters[1])][int(kmeans_clusters[2])] = 1

if len(outliers) > 0:
    for distrib in outliers:
        a = coordinates[distrib["index"]][0]
        b = coordinates[distrib["index"]][1]
        c = coordinates[distrib["index"]][2]
        data[a][b][c] = 1
    
    p = data.transpose(2, 1, 0)
    
    verts, faces, normals, values = measure.marching_cubes(p, 0)
    
    mesh = Poly3DCollection(verts[faces], alpha=0.95)
    face_color = [0, 0, 0]
    mesh.set_facecolor(face_color)
    ax.add_collection3d(mesh)

ax.set_xlim(0, p.shape[0])
ax.set_ylim(0, p.shape[1])
ax.set_zlim(0, p.shape[2])

# ax.view_init(90, 90)

plt.show()