# Imports

In [1]:
use_kaggle=False
use_jupyter=True

In [2]:
if use_jupyter:
    print("Using Jupyter")
    !dir "C:/develop/python/Final Project/Data/"
elif use_kaggle:
    print("Using Kaggle")
    !ls "/kaggle/"
else:
    print("Using Colab with drive")
    from google.colab import drive
    drive.mount('/content/drive')
    !ls "/content/drive/My Drive/Colab Notebooks/ML course/"

Using Jupyter
 Volume in drive C has no label.
 Volume Serial Number is B0A9-3BF3

 Directory of C:\develop\python\Final Project\Data

09/04/2020  13:50    <DIR>          .
09/04/2020  13:50    <DIR>          ..
09/04/2020  11:29         9,955,043 Data.zip
07/04/2020  16:24    <DIR>          extracted-masks-images
07/04/2020  16:24         1,856,537 extracted_cells.csv
07/04/2020  15:43    <DIR>          masks-images
07/04/2020  15:43    <DIR>          resized-images
               2 File(s)     11,811,580 bytes
               5 Dir(s)  446,589,669,376 bytes free


In [3]:
import numpy as np
import pandas as pd
import uuid
import os
import csv
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import time
import cv2
from skimage import io
from skimage import data, color, img_as_ubyte
from skimage.feature import canny
from skimage.transform import hough_ellipse
from skimage.draw import ellipse_perimeter, ellipse

# Consts and Setups

In [4]:
if use_jupyter:
    root_input_dir = 'C:/develop/python/Final Project/Data/'
    root_output_dir = 'C:/develop/python/Final Project/Data/'
elif use_kaggle:
    root_input_dir = '/kaggle/input/'
    root_output_dir = '/kaggle/working/output/'
else:
    root_input_dir = '/content/drive/My Drive/Colab Notebooks/ML course/Final Project - BioCell/'
    root_output_dir = '/content/drive/My Drive/Colab Notebooks/ML course/Final Project - BioCell/'

In [11]:
cell_masks_subdirectory = 'extracted-masks-images'
minimum_cover_percent = 89
max_factor_size = 4
uuid_row_index = 1
input_csv_filename = 'extracted_cells.csv'
output_csv_filename = 'filtered_cells.csv'
images_extension = ".png"
csv_field_names = ['source_file', 'mask_uuid', 'left_x', 'right_x', 'top_y', 'bottom_y', 'id']
start_line=5001
end_line=10000

In [6]:
plt.gray()
np.seterr(divide='ignore', invalid='ignore')

# Inputs
input_csv_file_path = root_input_dir + input_csv_filename
masks_images_path = root_input_dir + cell_masks_subdirectory + '/'
input_csv_rows_list = []

# Outputs
output_csv_rows_list = []
output_csv_file_path = root_output_dir + f'{start_line}-{end_line}_' + output_csv_filename

<Figure size 432x288 with 0 Axes>

# Function Definition

In [7]:
def getMaskBestEllipse(mask_data):
    edges = canny(mask_data, sigma=2.0, low_threshold=0.55, high_threshold=0.8)
    ellipses_data = hough_ellipse(edges, accuracy=10, threshold=40, min_size=0, max_size=500)
    ellipses_data.sort(order='accumulator')
    best = list(ellipses_data[-1])
    yc, xc, a, b = [int(round(x)) for x in best[1:5]]
    orientation = best[5]
    return yc, xc, a, b, orientation

In [8]:
def resizeImage(image_data, scale_size):
    width = int(current_mask_data.shape[1] * scale_size)
    height = int(current_mask_data.shape[0] * scale_size)
    dim = (width, height)
    # resize image
    return cv2.resize(current_mask_data, dim, interpolation = cv2.INTER_AREA)

In [9]:
def getEllipseCoverPercent(mask_data, factor_size):
    # Resize the image first
    resized_image = resizeImage(mask_data, factor_size)
    mask_copy = resized_image.copy()

    # Get the best ellipse that covers the mask
    yc, xc, a, b, orientation = getMaskBestEllipse(resized_image)

    # Try to draw the best ellipse on the mask
    try:
        # Get the indices of the ellipse fill and the ellipse perimiter
        perimiter_y, perimiter_x = ellipse_perimeter(yc, xc, a, b, orientation)
        fill_y, fill_x = ellipse(yc, xc, a, b, rotation=-orientation)

        # Set 0 (black) in the mask copy where the indexes are located
        mask_copy[list(perimiter_y), list(perimiter_x)] = 0
        mask_copy[list(fill_y), list(fill_x)] = 0
        
        # Get the number of nonzero pixels - that is the number of mask pixels that the ellipse does not cover
        full_mask_nonzero = np.count_nonzero(resized_image)
        ellipse_non_zero = np.count_nonzero(mask_copy)

        return ((full_mask_nonzero - ellipse_non_zero) / full_mask_nonzero) * 100
    except:
        print(f'Could not get ellipse for factor size {factor_size}')
        return 0

# Filter Logic

In [12]:
# TODO:
# Read the csv file, which is the output of the extraction module
# Loop over the lines and foreach line extract and read the mask image (using the uuid)
# get the ellipse data from the getMaskBestEllipse function
# set the mask copy values to 0 (black) in the ellipse area
# calculate the number of nonzero pixels in the mask copy
# if it exceeds a predefined value (max_white_pixel_outside_ellipse, need to decide what is the best value) - it's not a good filter
# if it does not exceed - a good filter, insert the line from the input csv to a new output csv

with open(input_csv_file_path) as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    input_csv_rows_list = list(csv_reader)
    counter = start_line
    for line in input_csv_rows_list[start_line:end_line + 1]:
        start = time.time()
        counter = counter + 1
        # Get the mask data
        current_mask_uuid = line[uuid_row_index]
        current_mask_filename = current_mask_uuid + images_extension
        current_mask_data = io.imread(masks_images_path + current_mask_filename, as_gray=True)

        factor_size = 2
        ellipse_detected = False
        
        while factor_size <= max_factor_size and not ellipse_detected:
            # Get the coverage percent for the curretn sace factor
            current_cover_percent = getEllipseCoverPercent(current_mask_data, factor_size)

            # If the coverage percentage is greater than the wanted minimum - this is a good mask
            if current_cover_percent > minimum_cover_percent:
                ellipse_detected = True
                output_csv_rows_list.append({
                    "source_file": line[0],
                    "mask_uuid": line[1],
                    "left_x": line[2],
                    "right_x": line[3],
                    "top_y": line[4],
                    "bottom_y": line[5],
                    "id": line[6]
                })
            
            factor_size = factor_size + 1
        end = time.time()
        print(f'line number {counter} - time {end - start} seconds')

                    
with open(output_csv_file_path, 'w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=csv_field_names)
    print(f'Writing {len(output_csv_rows_list)} filtered lines to {output_csv_filename}')
    writer.writerows(output_csv_rows_list)

Writing 3118 filtered lines to filtered_cells.csv
