In [1]:
import numpy as np
import pandas as pd
from os.path import join
from skimage.measure import regionprops_table
import matplotlib.pyplot as plt
import os
from os import listdir
from os.path import isfile, join

In [2]:
#nuclei and cellSizePath
nucleiPath = "segmentations/cellSize/tiles/tilesNuclei"
cellSizePath = "segmentations/cellSize/tiles/tilesCellSize"

In [3]:
nucFolders = [x[0] for x in os.walk(nucleiPath)]
nucFolders = sorted(nucFolders)

In [4]:
def nucleiCheck(cytoPath, nucleiPath, borderpixel, accuracy):
    """
    Check if nucleus is located inside a cell and process cytoplasm and nuclei masks.
    
    Parameters:
    - cytoPath (str): Path to the cytoplasm mask file (npy format).
    - nucleiPath (str): Path to the nuclei mask file (npy format).
    - borderpixel (int): Number of pixels from the borders of the image to exclude from the analysis.
    - accuracy (float): Minimum required coverage ratio of nucleus pixels in the cell for the nucleus to be considered inside.

    Returns:
    - cytoOutlines (ndarray): Updated outlines with identified nuclei masked.
    - cytoMasks (ndarray): Updated masks indicating which cells have nuclei inside.
    - imgCyto (ndarray): Cytoplasm image array.
    """
     # Load segmentation data from provided paths
    nucleiSegmentation = np.load(nucleiPath, allow_pickle=True).item()
    cytoSegmentation = np.load(cytoPath, allow_pickle=True).item()
     # Extract masks and images from segmentation data
    nucleiMasks = nucleiSegmentation['masks']
    cytoMasks = cytoSegmentation['masks']
    cytoOutlines = cytoSegmentation['outlines']
    imgCyto = cytoSegmentation['img']
    # Generate excluded pixel ranges based on borderpixel parameter
    excludeX = np.arange(imgCyto.shape[1] - borderpixel + 1 ,imgCyto.shape[1] +1)
    downValues = np.arange(0,borderpixel)
    excludeX = np.concatenate((downValues, excludeX))
    excludeY = np.arange(imgCyto.shape[0] - borderpixel +1 ,imgCyto.shape[0] +1)
    excludeY = np.concatenate((downValues, excludeY))
    # nucleicheck
    # Identify unique cell masks
    uniqueCells = np.unique(cytoMasks)
    for c in uniqueCells:
        if c != 0:
            # Get coordinates for current cell
            cellCoord = np.where(cytoMasks==c)
            checkY = np.isin(cellCoord[0],excludeY)
            checkX = np.isin(cellCoord[1],excludeX)
            # Exclude pixels at the image border
            if np.any(checkY):
                cytoMasks[cellCoord[0], cellCoord[1]] = 0
                cytoOutlines[cellCoord[0], cellCoord[1]] = 0
            elif np.any(checkX):
                cytoMasks[cellCoord[0], cellCoord[1]] = 0
                cytoOutlines[cellCoord[0], cellCoord[1]] = 0
            else:
                nuclei = nucleiMasks[cellCoord[0], cellCoord[1]]
                uniqueNuclei = np.unique(nuclei)
                # Conditions that determine if nuclei should be disregarded if less than a nucleus (or whole cell is nucleus) and if more than 3 nuclei 
                if len(np.unique(nuclei)) <=1:
                    cytoMasks[cellCoord[0], cellCoord[1]] = 0
                    cytoOutlines[cellCoord[0], cellCoord[1]] = 0
                elif len(np.unique(nuclei)) > 3:
                    cytoMasks[cellCoord[0], cellCoord[1]] = 0
                    cytoOutlines[cellCoord[0], cellCoord[1]] = 0
                else:
                    coverage = 0.0
                    for n in uniqueNuclei:
                        if n!= 0:
                            nucleiCoord = np.where(nucleiMasks==n)
                            #How many pixels does this nucleus have:
                            cytoNumber = cytoMasks[nucleiCoord[0], nucleiCoord[1]]
                            #occurence of nucleus pixel in cyto
                            cytoNumberNuclei = np.bincount(cytoNumber)[c]
                            # max percentage of how many pixels of nuc are in cellSize
                            if (cytoNumberNuclei/len(cytoNumber) > coverage):
                                coverage = cytoNumberNuclei/len(cytoNumber)

                    # Mask the cell if nucleus coverage is below acceptable accuracy threshold
                    if (coverage < accuracy):
                        cytoMasks[cellCoord[0], cellCoord[1]] = 0
                        cytoOutlines[cellCoord[0], cellCoord[1]] = 0
    return cytoOutlines, cytoMasks, imgCyto

In [None]:
dfResults = pd.DataFrame()
dicNoValue = {"age":[],"aorta":[],"file":[]}
# Iterate through folders containing nuclei segmentations
for nucFolder in nucFolders:
    os.chdir(nucFolder)
    # Collect .npy files from the current nucleus folder, filtering out unnecessary files
    files = [f for f in listdir(nucFolder) if isfile(join(nucFolder, f)) if ".DS_Store" not in f if ".npy" in f if "._" not in f]
    for file in files:
        # Extract metadata from the folder structure
        aorta = nucFolder.split("/")[-1]
        age = nucFolder.split("/")[-2]
        # Check for the existence of cell size files and proceed with analysis
        if os.path.exists(join(cellSizePath, age, aorta, file)):
            cellSizeStartPathAna = join(cellSizePath, age, aorta, file)
            cytoOutlines, cytoMasks, imgCyto = nucleiCheck(cellSizePathAna, join(nucFolder, file), 5, 0.75)
            prop_dict = regionprops_table(cytoMasks, properties=['label','area', 'centroid', 'major_axis_length', 'minor_axis_length', "orientation"])
            # Extract and compile properties of identified cells
            dfHelp = pd.DataFrame(prop_dict)
            dfHelp['age'] = age
            dfHelp['aorta'] = aorta
            dfHelp['file'] = file
            dfResults = dfResults.append(dfHelp)
        else:
             # Store cases without corresponding cell size segmentations for later review
            dicNoValue["age"].append(age)
            dicNoValue["aorta"].append(aorta)
            dicNoValue["file"].append(file)

In [7]:
dfResults.to_csv("results/cellShapesNew.csv")