# IMPORTS & DRIVE CONNECTIONS

In [None]:
!pip install SimpleITK


from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
import sys
import numpy as np
import os
import pandas as pd
import cv2 as cv
import matplotlib.pyplot as plt

import SimpleITK as sitk

from google.colab.patches import cv2_imshow

In [None]:
import inspect
path = "/content/drive/Shareddrives/IA DL_project/ML IA/IMAGE_PROCESSING_PIPELINE"

if path not in sys.path:
  sys.path.append(path)

import luna_module
from luna_module import *

# List all function names in the luna_module
function_names = [name for name, obj in inspect.getmembers(luna_module) if inspect.isfunction(obj)]
print(function_names)

['annotations_by_uid', 'binarize_lung', 'binarize_lung_3d', 'binary_closing', 'binary_dilation', 'binary_erosion', 'binary_fill_holes', 'binary_opening', 'center_of_mass', 'clear_border', 'convert_annotation_df', 'convert_annotation_df_with_uid', 'create_3d_mask', 'create_annotations_mask', 'create_annotations_mask_by_uid', 'create_patch', 'debugger', 'draw_ellipsoid', 'find_by_uid', 'find_neighborhood_indices', 'find_neighborhood_indices_more_precise', 'get_slice_candidates', 'get_slice_candidates_old', 'get_slices', 'get_uids', 'img_by_uid', 'masked_annotations_by_uid', 'masked_annotations_with_info_by_uid', 'meta_by_uid', 'norm2float', 'norm2uint16', 'norm2uint8', 'normalize_intensity', 'plot_slices', 'process_slice_candidates', 'process_slices', 'remove_non_central_objects', 'sensitivity_score', 'sensitivity_score_more_precise', 'show_3_images', 'subset_by_uid', 'unwanted_object_filter']


# LOAD IMAGE

In [None]:
path = "/content/drive/Shareddrives/IA DL_project/ML IA/LUNA16"

path_subsets = os.path.join(path, "subsets") # path for subsets folder
subsets = os.listdir(path_subsets) # subset folders present
ANNOTATIONS_DF = pd.read_csv(os.path.join(path, "annotations.csv"))

In [None]:
read_in_subset = "subset0"
EXPANDED_ANNOTATIONS_DF = pd.read_csv(os.path.join(path, f"{read_in_subset}_annotations_expanded.csv"))

all_uids_in_subset = set(map(lambda filename: os.path.splitext(filename)[0], os.listdir(os.path.join(path_subsets, read_in_subset))))

read_in_uids = list(filter(lambda uid: len(annotations_by_uid(uid, ANNOTATIONS_DF)), all_uids_in_subset))

print(len(all_uids_in_subset))
print(len(read_in_uids))

89
67


# NODULE SEGMENTATION

# Candidates

In [None]:
def get_slice_candidates(img, z, nthreshold=-400, pthreshold=200, debug=False, debug_res=False):
    # Get the minimum value of the image
    min_val = AIR_TH

    # Create a copy of the image
    img_n = img.copy()
    if debug:
        debugger(img_n, "img_n")

    # Apply thresholding to the image
    # img_n = np.where(img < nthreshold, min_val, img_n)
    img_n[img < nthreshold] = AIR_TH
    # img_n = np.where(img < nthreshold, min_val, img_n)
    img_n[img > pthreshold] = AIR_TH
    if debug:
        debugger(img_n, "img_n")

    # Normalize the image
    inp = norm2uint16(img_n)
    if debug:
        debugger(inp, "inp")

    # Apply Gaussian blur to the normalized image
    blurred = cv.GaussianBlur(inp, (5, 5), 0)
    if debug:
        debugger(blurred, "blurred")

    # Convert the image to 8-bit
    image_8bit = norm2uint8(blurred)
    if debug:
        debugger(image_8bit, "image_8bit")

    # Apply Otsu's thresholding
    ret, otsu_img = cv.threshold(
        image_8bit, 0, 255, cv.THRESH_BINARY + cv.THRESH_OTSU)
    otsu_img = norm2float(otsu_img)
    if debug:
        debugger(otsu_img, "otsu_img")

    # Binarize the original image
    lung_mask = binarize_lung(img).astype(float)
    if debug:
        debugger(lung_mask, "lung_mask")

    # Apply morphological closing to the lung mask
    lung_mask = cv.morphologyEx(
        lung_mask, cv.MORPH_CLOSE, cv.getStructuringElement(cv.MORPH_ELLIPSE, (35, 35)))
    if debug:
        debugger(lung_mask, "lung_mask_closed")

    # Apply morphological erosion to the thresholded image
    kernel_size = 5
    eroded = cv.morphologyEx(otsu_img, cv.MORPH_ERODE,
                             cv.getStructuringElement(cv.MORPH_ELLIPSE, (3, 3)))
    if debug:
        debugger(eroded, "eroded")

    # Apply morphological closing with a larger kernel
    kernel = cv.getStructuringElement(
        cv.MORPH_ELLIPSE, (kernel_size, kernel_size))
    closing_img = cv.morphologyEx(otsu_img, cv.MORPH_CLOSE, kernel)
    if debug:
        debugger(closing_img, "closing_img")

    # Combine the lung mask and the closed image to get the region of interest
    roi = lung_mask * closing_img
    if debug:
        debugger(roi, "roi")

    # Apply morphological opening to the updated region of interest
    roi_new = cv.morphologyEx(
        roi, cv.MORPH_OPEN, cv.getStructuringElement(cv.MORPH_ELLIPSE, (3, 3)))
    if debug:
        debugger(roi_new, "roi_new_opened")

    # Compute the distance transform
    distance_transform = cv.distanceTransform(
        norm2uint8(roi_new), cv.DIST_L1, 3)
    if debug:
        debugger(distance_transform, "distance_transform")

    distance_transform_vis = norm2uint8(distance_transform)
    if debug:
        debugger(distance_transform_vis, "distance_transform_vis")

    # Apply morphological opening to the distance transform
    kernel2 = cv.getStructuringElement(cv.MORPH_ELLIPSE, (3, 3))
    eroded_distance_transform = cv.morphologyEx(
        distance_transform_vis, cv.MORPH_OPEN, kernel2)
    if debug:
        debugger(eroded_distance_transform, "eroded_distance_transform")

    # Threshold the eroded distance transform to get internal markers

    _, internal_markers = cv.threshold(
        eroded_distance_transform, 0, 127, cv.THRESH_BINARY)  # NOTE: HARDCODED TH!!!
    if debug:
        debugger(internal_markers, "internal_markers")

    # Find contours in the internal markers
    contours, _ = cv.findContours(
        internal_markers, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)

    # Label connected components in the closed image
    labels = measure.label(internal_markers)
    properties = measure.regionprops(labels)

    if debug:
      debugger(labels)

    if debug:
      plt.imshow(labels)
      plt.show()

    # Sort regions by area and keep those larger than a threshold
    detected_objects = [obj for obj in properties if obj.area < 1500] # area filtering
    detected_objects = [obj for obj in properties if obj.eccentricity < 0.7500] # eccentricity filtering
    detected_objects = [obj for obj in properties if obj.solidity > 0.600] # solidity filtering

    cx = []
    cy = []
    candidate_mask = np.zeros_like(img, dtype=float)
    for obj in detected_objects:
      candidate_mask[labels == obj.label] = 1.
      if debug_res:
        debugger(candidate_mask)
      centroid = obj.centroid
      cy.append(centroid[0])  # y-coordinate
      cx.append(centroid[1])

    # DataFrame with the centroids
    coord = pd.DataFrame({'x': np.round(cx).astype(int), 'y': np.round(cy).astype(int), 'z': z})

    if debug:
        debugger(candidate_mask, "candidate_mask")

    if debug_res:
        show_3_images([inp, lung_mask, candidate_mask])

    return coord, candidate_mask


### Candidates results from z axis

In [None]:
def process_slice_candidates_z(img, verbose=False, debug=False):
    """
    Process each slice of a 3D image to identify candidate regions of interest.

    Parameters:
    img (numpy.ndarray): A 3D image represented as a numpy array.
    verbose (bool): If True, print detailed progress information. Default is False.

    Returns:
    pd.DataFrame: A dataframe containing candidate coordinates (x, y, z).
    list: A list of masks corresponding to the candidate regions in each slice.
    """
    # Initialize an empty DataFrame to store candidate coordinates
    candidates = pd.DataFrame(columns=["x", "y", "z"])
    # Initialize an empty list to store candidate masks for each slice
    candidates_masks = []

    # Iterate over slices along the first axis (z-axis)
    for slice_i in range(img.shape[0]):
        if verbose:
            print(f"[START] - processing slice #{slice_i}")

        # Extract the current slice along the z-axis
        slice_ = img[slice_i, :, :]

        # Apply the function to identify candidates and their masks in the current slice
        slice_candidates, slice_candidates_mask = get_slice_candidates(
            slice_, slice_i, debug_res=debug)

        if verbose:
            print(
                f"[DONE] - {len(slice_candidates)} candidates found for slice #{slice_i}")

        # Append the found candidates to the main candidates DataFrame
        candidates = pd.concat(
            [candidates, slice_candidates], ignore_index=True)

        # Add the mask for the current slice to the candidates_masks list
        candidates_masks.append(slice_candidates_mask)

        if verbose:
            print(f"\n[STATUS] - {len(candidates)} candidates found\n" +
                  f"[STATUS] - {img.shape[0]-slice_i} slices left\n")

    return candidates, candidates_masks


## Filter out false candidates from y axis

In [None]:
def filter_slice_on_y_axis(y_slice, area_th=(48*48),  debug=False):
  if not len(y_slice.nonzero()[0]):
    if debug:
      print(f"[SKIP] - empty mask at slice ")
      return y_slice.astype(bool)
  if debug:
    plt.imshow(y_slice)
    plt.show()

  # Label connected components in the closed image
  labels = measure.label(y_slice)
  properties = measure.regionprops(labels)

  if debug:
    plt.imshow(labels)
    plt.show()

  # Sort regions by area and keep those larger than a threshold
  detected_objects = [obj for obj in properties if obj.area < area_th and obj.area] # area filtering
  detected_objects = [obj for obj in detected_objects if obj.solidity > 0.65] # solidity filtering

  # Create an empty image to hold the result
  filtered_image = np.zeros_like(y_slice, dtype=bool)

  # Fill in the regions of the relevant objects
  for prop in detected_objects:
      if debug:
        print(f"label: {prop.label}")
        print(f"area: {prop.area}")
        print(f"eccentricity: {prop.eccentricity}")
        print(f"solidity: {prop.solidity}")
      filtered_image[labels == prop.label] = True

  if debug:
    plt.imshow(filtered_image)
    plt.show()

  return filtered_image

In [None]:
# raise Exception("STOP IT")

In [None]:
def process_slice_candidates_2_axes(img_3d, verbose=False, debug=False):
  # Candidate extraction from Z axis
  z_centers_df, masks = process_slice_candidates_z(img_3d, )

  # Reconstruction of 3d mask
  mask_img_3d = np.array(masks)

  # Filtering out false candidates on Y axis
  y_filtered_mask_img_3d = process_slices(mask_img_3d, axis_index=1, func=filter_slice_on_y_axis,)

  # Identify connected components in 3D mask
  labeled_mask = measure.label(y_filtered_mask_img_3d, connectivity=1)

  num_features = labeled_mask.max()

  # Calculate the centers of mass for each object
  centers = center_of_mass(y_filtered_mask_img_3d, labeled_mask, range(1, num_features + 1))
  centers = np.array(centers)

  y_centers_df = pd.DataFrame({
      "x": np.round(centers[:,2]).astype(int),
      "y": np.round(centers[:,1]).astype(int),
      "z": np.round(centers[:,0]).astype(int),
  })
  return y_centers_df, z_centers_df



In [None]:
read_in_uids

['1.3.6.1.4.1.14519.5.2.1.6279.6001.310548927038333190233889983845',
 '1.3.6.1.4.1.14519.5.2.1.6279.6001.832260670372728970918746541371',
 '1.3.6.1.4.1.14519.5.2.1.6279.6001.979083010707182900091062408058',
 '1.3.6.1.4.1.14519.5.2.1.6279.6001.194440094986948071643661798326',
 '1.3.6.1.4.1.14519.5.2.1.6279.6001.417815314896088956784723476543',
 '1.3.6.1.4.1.14519.5.2.1.6279.6001.188376349804761988217597754952',
 '1.3.6.1.4.1.14519.5.2.1.6279.6001.293757615532132808762625441831',
 '1.3.6.1.4.1.14519.5.2.1.6279.6001.124154461048929153767743874565',
 '1.3.6.1.4.1.14519.5.2.1.6279.6001.640729228179368154416184318668',
 '1.3.6.1.4.1.14519.5.2.1.6279.6001.277445975068759205899107114231',
 '1.3.6.1.4.1.14519.5.2.1.6279.6001.716498695101447665580610403574',
 '1.3.6.1.4.1.14519.5.2.1.6279.6001.334517907433161353885866806005',
 '1.3.6.1.4.1.14519.5.2.1.6279.6001.213140617640021803112060161074',
 '1.3.6.1.4.1.14519.5.2.1.6279.6001.724251104254976962355686318345',
 '1.3.6.1.4.1.14519.5.2.1.6279.600

In [None]:
scores_ = {}
hits_ = {}

save_path = f"/content/drive/Shareddrives/IA DL_project/ML IA/LUNA16/candidates/solidity_on_z/{read_in_subset}"
save = False

for i, uid, in enumerate(read_in_uids):
  print(f"Case {i+1} out of {len(read_in_uids)}")

  if f"{uid}.csv" in os.listdir(f"{save_path}/y/") and save:
    print(f"[SKIP] - {uid} - Candidates already extracted")
    continue

  # Only read if there is missing information
  print(f"[START] -- READING IN: {read_in_subset}/{uid}")
  img = sitk.ReadImage(os.path.join(path_subsets, read_in_subset, f"{uid}.mhd"))

  img_3d = sitk.GetArrayFromImage(img)

  print(f"[CANDIDATES] - {uid}")
  centers_y_df, centers_z_df = process_slice_candidates_2_axes(img_3d, debug=True)

  if save:
    centers_z_df.to_csv(os.path.join(save_path, "z", f"{uid}.csv"))
    centers_y_df.to_csv(os.path.join(save_path, "y", f"{uid}.csv"))

  print(f"[Z-axis] - Count: {len(centers_z_df)} ")
  tp_z_i , tp_z_dict = find_neighborhood_indices_more_precise(centers_z_df, convert_annotation_df(annotations_by_uid(uid, EXPANDED_ANNOTATIONS_DF)))
  print(f"[TRUE POSITIVE, Z] - ")
  print(tp_z_dict)

  print(f"[Y-axis] - Count: {len(centers_y_df)}")
  tp_y_i , tp_y_dict = find_neighborhood_indices_more_precise(centers_y_df, convert_annotation_df(annotations_by_uid(uid, EXPANDED_ANNOTATIONS_DF)))
  print(f"[TRUE POSITIVE, Y] - ")
  print(tp_y_dict)

  sensitivity  = sensitivity_score_more_precise(centers_y_df, convert_annotation_df(annotations_by_uid(uid, EXPANDED_ANNOTATIONS_DF)))
  print(f"[SENSITIVITY] - {sensitivity}")
  print(f"[DONE] - {uid}\n")
  scores_[uid] = sensitivity
  hits_[uid] = {"z": tp_z_dict, "y": tp_y_dict}



Case 1 out of 67
[START] -- READING IN: subset0/1.3.6.1.4.1.14519.5.2.1.6279.6001.310548927038333190233889983845
[CANDIDATES] - 1.3.6.1.4.1.14519.5.2.1.6279.6001.310548927038333190233889983845
[Z-axis] - Count: 1218 
[TRUE POSITIVE, Z] - 
{63: [1175, 1193, 1203, 1210]}
[Y-axis] - Count: 662
[TRUE POSITIVE, Y] - 
{63: [633, 645]}
[SENSITIVITY] - 1.0
[DONE] - 1.3.6.1.4.1.14519.5.2.1.6279.6001.310548927038333190233889983845

Case 2 out of 67
[START] -- READING IN: subset0/1.3.6.1.4.1.14519.5.2.1.6279.6001.832260670372728970918746541371
[CANDIDATES] - 1.3.6.1.4.1.14519.5.2.1.6279.6001.832260670372728970918746541371
[Z-axis] - Count: 1167 
[TRUE POSITIVE, Z] - 
{103: [358, 371], 104: []}
[Y-axis] - Count: 731
[TRUE POSITIVE, Y] - 
{103: [229], 104: []}
[SENSITIVITY] - 0.5
[DONE] - 1.3.6.1.4.1.14519.5.2.1.6279.6001.832260670372728970918746541371

Case 3 out of 67
[START] -- READING IN: subset0/1.3.6.1.4.1.14519.5.2.1.6279.6001.979083010707182900091062408058
[CANDIDATES] - 1.3.6.1.4.1.14519.5

# Validate sensitivity

In [None]:
scores_
av_score = 0

for i in scores_.values():
  av_score += i

av_score/len(scores_.values())

0.9124378109452735

In [None]:
sen_z = 0
sen_y = 0
count = 0

for i in hits_.values():
  for z in (i["z"].values()):
    count +=1
    if len(z):
      sen_z += 1
  for y in (i["y"].values()):
    if len(y):
      sen_y += 1

print(f"sensitivity z: {sen_z/count}")
print(f"sensitivity y: {sen_y/count}")



sensitivity z: 0.9196428571428571
sensitivity y: 0.9107142857142857
