In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!unzip /content/drive/MyDrive/micro_data.zip -d /content

In [166]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
from torch.utils.data import random_split, DataLoader, Dataset
from PIL import Image
from glob import glob

HALF_SIDE_SCALE = 1.2
CLASSES = []


def get_cell_line_from(folder):
  split = folder.split("_")
  if len(split) == 4:
    return split[2]
  return None


folder_names = glob("/content/micro_data/*/*")
for folder_name in folder_names:
  cell_line = get_cell_line_from(folder_name)
  if cell_line != None:
    CLASSES.append(cell_line)

CLASSES = list(set(CLASSES))
print(CLASSES)

['LCLC', 'MCF7', 'H838', 'Hela', 'MDAMB231', 'HepG2']


In [174]:
def square_mask(image_shape, center, half_side):
  row_min = max(int(center[0] - half_side), 0)
  row_max = min(int(center[0] + half_side), image_shape[0])
  col_min = max(int(center[1] - half_side), 0)
  col_max = min(int(center[1] + half_side), image_shape[1])
  mask = np.zeros(image_shape, dtype=bool)
  mask[row_min:row_max, col_min:col_max] = True
  return mask


def get_cell_image(im_mic, im_markers, cell_id):
  im_marker = np.where(im_markers == cell_id, im_markers, 0)
  indices = np.indices(im_marker.shape)
  cell_indicies = np.where(im_marker != 0, indices, 0)

  centroid = (np.round(np.sum(cell_indicies[0]) / np.count_nonzero(im_marker)),
              np.round(np.sum(cell_indicies[1]) / np.count_nonzero(im_marker)))

  max_y_dist = np.max(np.abs(np.where(im_marker != 0, indices[0], centroid[0]) - centroid[0]))
  max_x_dist = np.max(np.abs(np.where(im_marker != 0, indices[1], centroid[1]) - centroid[1]))
  half_side = max(max_y_dist, max_x_dist)

  im_cell = cv2.cvtColor(im_mic, cv2.COLOR_BGR2GRAY)
  mask = square_mask(im_cell.shape, centroid, HALF_SIDE_SCALE * half_side)

  im_cell[mask == False] = 0
  contours, _ = cv2.findContours(im_cell, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  x, y, w, h = cv2.boundingRect(contours[0])

  im_cell = im_cell[y:y+h, x:x+w]
  return im_cell


out_dir = "/content/drive/MyDrive/cell_data"
if not os.path.exists(out_dir):
  os.makedirs(out_dir)

files = glob("/content/micro_data/**/*_seg.npz", recursive=True)

for file in files:
  cell_line = get_cell_line_from(os.path.dirname(file))
  if cell_line == None:
    continue

  seg = np.load(file)
  im_mic = seg["im_mic"]
  im_markers = seg["im_markers"]

  max_cell_id = np.max(im_markers)

  for cell_id in range(1, max_cell_id + 1):
    file_name = os.path.basename(file).split('/')[-1].split(".npz")[0]
    out_path = os.path.join(out_dir, f"{cell_line}_{file_name}_{cell_id}.png")
    if not os.path.exists(out_path):
      im_cell = get_cell_image(im_mic, im_markers, max_cell_id)
      cv2.imwrite(out_path, im_cell)

In [None]:
!zip -r /content/drive/MyDrive/cell_data.zip /content/drive/MyDrive/cell_data
!rm -rf /content/drive/MyDrive/cell_data