# Mount to drive and image loading

In [1]:
use_kaggle=False
use_jupyter=True

In [2]:
if use_jupyter:
    print("Using Jupyter")
    !dir "C:/develop/python/Final Project/Data/"
elif use_kaggle:
    print("Using Kaggle")
    !ls "/kaggle/"
else:
    print("Using Colab with drive")
    from google.colab import drive
    drive.mount('/content/drive')
    !ls "/content/drive/My Drive/Colab Notebooks/ML course/"

Using Jupyter
 Volume in drive C has no label.
 Volume Serial Number is B0A9-3BF3

 Directory of C:\develop\python\Final Project\Data

07/04/2020  16:21    <DIR>          .
07/04/2020  16:21    <DIR>          ..
07/04/2020  16:21    <DIR>          extracted-masks-images
07/04/2020  15:43    <DIR>          masks-images
07/04/2020  15:43    <DIR>          resized-images
               0 File(s)              0 bytes
               5 Dir(s)  446,754,246,656 bytes free


In [3]:
# Imports
import numpy as np
import pandas as pd
import uuid
import os
import csv
from skimage import io
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

# Consts and Setups

In [4]:
if use_jupyter:
    root_input_dir = 'C:/develop/python/Final Project/Data/'
    root_output_dir = 'C:/develop/python/Final Project/Data/'
elif use_kaggle:
    root_input_dir = '/kaggle/input/cells segmentation output/'
    root_output_dir = '/kaggle/working/output/'
else:
    root_input_dir = '/content/drive/My Drive/Colab Notebooks/ML course/Final Project - BioCell/'
    root_output_dir = '/content/drive/My Drive/Colab Notebooks/ML course/Final Project - BioCell/'

In [5]:
margin_size = 3
cells_subdir = 'resized-images'
masks_subdir = 'masks-images'
extracted_masks_subdir = 'extracted-masks-images'
csv_filename = 'extracted_cells.csv'
image_extension = '.png'
csv_field_names = ['source_file', 'mask_uuid', 'left_x', 'right_x', 'top_y', 'bottom_y', 'id']

In [6]:
plt.gray()

# Inputs
cells_images_path = root_input_dir + cells_subdir + '/'
masks_images_path = root_input_dir + masks_subdir + '/'

# Outputs
csv_rows_list = []
csv_file_path = root_output_dir + csv_filename
extracted_masks_images_path = root_output_dir + extracted_masks_subdir + '/'
if not os.path.exists(extracted_masks_images_path):
    os.makedirs(extracted_masks_images_path)

<Figure size 432x288 with 0 Axes>

# Function Definition

In [7]:
def get_cell_bbox_location(mask):
    mask_x_axis = np.where((mask!=0).argmax(axis=1) != 0)
    left_x = mask_x_axis[0][0]
    right_x = mask_x_axis[0][len(mask_x_axis[0]) - 1]
    mask_y_axis = np.where((mask!=0).argmax(axis=0) != 0)
    top_y = mask_y_axis[0][0]
    bottom_y = mask_y_axis[0][len(mask_y_axis[0]) - 1]

    return left_x, right_x, top_y, bottom_y

In [8]:
def save_image(data, image_path):
    plt.imsave(image_path, data)

# Extraction Logic

In [9]:
cell_files = os.listdir(cells_images_path)

for filename in cell_files:
    # Check if files exist
    if not os.path.exists(masks_images_path + filename):
        print ("File " + masks_images_path + filename + " does not exist")
        continue

    # Full original cells tiff
    cells_full_img = io.imread(cells_images_path + filename)

    # Mask built by CellProfiler application - each cell gets its own unique id and it's pixels are given that ID value
    masks_full_img = io.imread(masks_images_path + filename)

    last_id = np.amax(masks_full_img)

    for mask_id in range(1, last_id + 1):
        mask_uuid = str(uuid.uuid4())
        current_mask = masks_full_img.copy()
        current_mask = np.where(current_mask == mask_id, 1, 0)

        current_cell = cells_full_img.copy()
        current_cell = current_cell * current_mask

        left_x, right_x, top_y, bottom_y = get_cell_bbox_location(current_mask)

        if not (left_x <=3 or right_x <= 3 or top_y <= 3 or bottom_y <= 3):
            save_image(current_mask[left_x - margin_size:right_x + margin_size, top_y - margin_size:bottom_y + margin_size], extracted_masks_images_path + mask_uuid + image_extension)
            csv_rows_list.append({
              "source_file": filename,
              "mask_uuid": mask_uuid,
              "left_x": left_x,
              "right_x": right_x,
              "top_y": top_y,
              "bottom_y": bottom_y,
              "id": mask_id
            })

with open(csv_file_path, 'w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=csv_field_names)
    print(f'Writing {len(csv_rows_list)} lines to {csv_filename}')
    writer.writerows(csv_rows_list)

    # for row in csv_rows_list[0:100]:
    #   writer.writerow(row)

Writing 21122 lines to extracted_cells.csv
