# Downstream Analysis of cell segmentation
This code takes the numpy.files generated by cellpose and tabulates physical properties for each mask(major axis and minor axis length, area of mask , centroid, orientation, mean intensity in each channel). The values are saved in a csv file. 

In [1]:
import numpy as np
import pandas as pd
from skimage.measure import regionprops_table
import skimage.measure
import math
import matplotlib.pyplot as plt
import os
from os import listdir
from cellpose import plot, utils
import cellpose
import skimage.io
from scipy import stats

In [2]:
import os

# Root directory that contains output files
dirPath = '/home/mzo5929/Keerthana/MoiseEtAl/extractedData/segmentationResults/figure6/'
inputPath1 = []

# Walk through all directories and files in dirPath
for root, dirs, files in os.walk(dirPath):
    # Filter for .npy files
    for file in files:
        if file.endswith(".npy"):
            filePath = os.path.join(root, file)
            inputPath1.append(filePath)

# inputPath1 now contains paths to all .npy files under dirPath, including in deeply nested subdirectories


In [4]:
len(inputPath1)

Only for K40A static (to only consider images without the string spec)

In [None]:
spec = "flow"
inputPath1= [i for i in inputPath1 if not spec in i]

For specific input files (enter below)

In [4]:
inputPath1 = ['']

Regular- elongation/eccentricity (for most images)

In [6]:
properties = ('label','centroid', 'area', 'orientation', 'major_axis_length', 'minor_axis_length', 'bbox')
index = 0
for inputPath in inputPath1:
    print(index)
    index = index + 1
    data = np.load(inputPath, allow_pickle=True).item()
    outlines = data['outlines']
    masks = data['masks']
    image = data['img']
    uniqueCells = np.unique(masks)
    #Remove boundary conditions 
    excludeX = np.arange(image.shape[1] - 4 ,image.shape[1] +1)
    downValues = np.arange(0,5)
    excludeX = np.concatenate((downValues, excludeX))
    excludeY = np.arange(image.shape[0] - 4 ,image.shape[0] +1)
    excludeY = np.concatenate((downValues, excludeY))
    for c in uniqueCells:
        if c != 0:
            cellCoord = np.where(masks==c)
            checkY = np.isin(cellCoord[0],excludeY)
            checkX = np.isin(cellCoord[1],excludeX)
            if np.any(checkY):
              masks[cellCoord[0], cellCoord[1]] = 0
              outlines[cellCoord[0], cellCoord[1]] = 0
            elif np.any(checkX):
              masks[cellCoord[0], cellCoord[1]] = 0
              outlines[cellCoord[0], cellCoord[1]] = 0
    masks_array = np.asarray(masks)
    prop_dict = regionprops_table(masks_array, properties = properties)
    df = pd.DataFrame(prop_dict)
    def coordinates(row, coord):
        if coord == "xMajor1":
            return row ['centroid-1'] - math.sin(row['orientation']) * 0.5 * row['major_axis_length']
        elif coord == "yMajor1":
            return row ['centroid-0'] - math.cos(row['orientation']) * 0.5 * row['major_axis_length']
        elif coord == "xMinor1":
            return row ['centroid-1'] + math.cos(row['orientation']) * 0.5 * row['minor_axis_length']
        elif coord == "yMinor1":
            return row ['centroid-0'] - math.sin(row['orientation']) * 0.5 * row['minor_axis_length']


        elif coord == "xMajor2":
            return row ['centroid-1'] + math.sin(row['orientation']) * 0.5 * row['major_axis_length']
        elif coord == "yMajor2":
            return row ['centroid-0'] + math.cos(row['orientation']) * 0.5 * row['major_axis_length']
        elif coord == "xMinor2":
            return row ['centroid-1'] - math.cos(row['orientation']) * 0.5 * row['minor_axis_length']
        elif coord == "yMinor2":
            return row ['centroid-0'] + math.sin(row['orientation']) * 0.5 * row['minor_axis_length']
        else:
            return 0

    #Plot major and minor axis
    df['xMajor1'] = df.apply(lambda row: coordinates(row, "xMajor1"), axis=1)
    df['yMajor1'] = df.apply(lambda row: coordinates(row, "yMajor1"), axis=1)
    df['xMinor1'] = df.apply(lambda row: coordinates(row, "xMinor1"), axis=1)
    df['yMinor1'] = df.apply(lambda row: coordinates(row, "yMinor1"), axis=1)

    df['xMajor2'] = df.apply(lambda row: coordinates(row, "xMajor2"), axis=1)
    df['yMajor2'] = df.apply(lambda row: coordinates(row, "yMajor2"), axis=1)
    df['xMinor2'] = df.apply(lambda row: coordinates(row, "xMinor2"), axis=1)
    df['yMinor2'] = df.apply(lambda row: coordinates(row, "yMinor2"), axis=1)

    x0 = df['centroid-1']
    y0 = df['centroid-0']
    xMajor1 = df['xMajor1']
    xMajor2 = df['xMajor2']
    yMajor1 = df['yMajor1']
    yMajor2 = df['yMajor2']

    xMinor1 = df['xMinor1']
    xMinor2 = df['xMinor2']
    yMinor1 = df['yMinor1']
    yMinor2 = df['yMinor2']

    fig, ax = plt.subplots(figsize=(10, 6))
    ax.imshow(image, cmap=plt.cm.gray)

    #ax.plot((x0, xMajor1), (y0, yMajor1), '-r', linewidth=1)
    #ax.plot((x0, xMinor1), (y0, yMinor1), '-r', linewidth=1)

    ax.plot((xMajor1, xMajor2), (yMajor1, yMajor2), '-r', linewidth=1)
    ax.plot((xMinor1, xMinor2), (yMinor1, yMinor2), '-g', linewidth=1)
    ax.plot(x0, y0, '.g', markersize=2)
    outputBasePath = inputPath.replace("segmentationResults", "parameterResults")
    outputBasePath = outputBasePath.replace('/Segmentation', '')
    # Derive the base directory for 'parameterResults' and append the specific subfolders 'image' and 'csv'
    outputImageDir = os.path.join(os.path.dirname(outputBasePath), "Images")
    outputCsvDir = os.path.join(os.path.dirname(outputBasePath), "Parameters")

    # Ensure the output directories exist
    os.makedirs(outputImageDir, exist_ok=True)
    os.makedirs(outputCsvDir, exist_ok=True)

    # Define the output file names, replacing '.npy' with the appropriate extension
    outputImageFile = os.path.basename(outputBasePath).replace(".npy", ".png")
    outputCsvFile = os.path.basename(outputBasePath).replace(".npy", ".csv")
    print(outputImageFile)
    # Construct the full paths for the output files
    outputImagePath = os.path.join(outputImageDir, outputImageFile)
    outputCsvPath = os.path.join(outputCsvDir, outputCsvFile)

    # Save the plot as a PNG image in the 'image' subfolder
    plt.savefig(outputImagePath, dpi = 300)
    # plt.show()

    # Save the DataFrame as a CSV file in the 'csv' subfolder
    # Assuming 'df' is your DataFrame
    df.to_csv(outputCsvPath)

###Till here

0
huvec_sm9_hdac6ko_flow_vecad__0031_zoverlay_seg.png
1
huvec_sm9_hdac6ko_flow_vecad__0029_zoverlay_seg.png
2
huvec_sm9_hdac6ko_flow_vecad__0009_zoverlay_seg.png
3
huvec_sm9_hdac6ko_flow_vecad__0041_zoverlay_seg.png
4
huvec_sm9_hdac6ko_flow_vecad__0008_zoverlay_seg.png
5
huvec_sm9_hdac6ko_flow_vecad__0001_zoverlay_seg.png
6
huvec_sm9_hdac6ko_flow_vecad__0015_zoverlay_seg.png
7
huvec_sm9_hdac6ko_flow_vecad__0036_zoverlay_seg.png
8
huvec_sm9_hdac6ko_flow_vecad__0038_zoverlay_seg.png
9
huvec_sm9_hdac6ko_flow_vecad__0043_zoverlay_seg.png
10
huvec_sm9_hdac6ko_flow_vecad__0039_zoverlay_seg.png
11
huvec_sm9_hdac6ko_flow_vecad__0045_zoverlay_seg.png
12
huvec_sm9_hdac6ko_flow_vecad__0023_zoverlay_seg.png
13
huvec_sm9_hdac6ko_flow_vecad__0007_zoverlay_seg.png
14
huvec_sm9_hdac6ko_flow_vecad__0035_zoverlay_seg.png
15
huvec_sm9_hdac6ko_flow_vecad__0000_zoverlay_seg.png
16
huvec_sm9_hdac6ko_flow_vecad__0030_zoverlay_seg.png
17
huvec_sm9_hdac6ko_flow_vecad__0010_zoverlay_seg.png
18
huvec_sm9_hdac6ko

  fig, ax = plt.subplots(figsize=(10, 6))


huvec_sm9_hdac6ko_flow_vecad__0026_zoverlay_seg.png
21
huvec_sm9_hdac6ko_flow_vecad__0021_zoverlay_seg.png
22
huvec_sm9_hdac6ko_flow_vecad__0020_zoverlay_seg.png
23
huvec_sm9_hdac6ko_flow_vecad__0012_zoverlay_seg.png
24
huvec_sm9_hdac6ko_flow_vecad__0040_zoverlay_seg.png
25
huvec_sm9_hdac6ko_flow_vecad__0022_zoverlay_seg.png
26
huvec_sm9_hdac6ko_flow_vecad__0033_zoverlay_seg.png
27
huvec_sm9_hdac6ko_flow_vecad__0006_zoverlay_seg.png
28
huvec_sm9_hdac6ko_flow_vecad__0011_zoverlay_seg.png
29
huvec_sm9_hdac6ko_flow_vecad__0016_zoverlay_seg.png
30
huvec_sm9_hdac6ko_flow_vecad__0014_zoverlay_seg.png
31
huvec_sm9_hdac6ko_flow_vecad__0018_zoverlay_seg.png
32
huvec_sm9_hdac6ko_flow_vecad__0025_zoverlay_seg.png
33
huvec_sm9_hdac6ko_flow_vecad__0034_zoverlay_seg.png
34
huvec_sm9_hdac6ko_flow_vecad__0042_zoverlay_seg.png
35
huvec_sm9_hdac6ko_flow_vecad__0003_zoverlay_seg.png
36
huvec_sm9_hdac6ko_flow_vecad__0024_zoverlay_seg.png
37
huvec_sm9_hdac6ko_flow_vecad__0027_zoverlay_seg.png
38
huvec_sm9_

In [None]:
import os
import matplotlib.pyplot as plt
from matplotlib.image import imread

def list_images(directory):
    """ Recursively list all image files under the given directory grouped by subdirectory """
    image_files = {}
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                if root not in image_files:
                    image_files[root] = []
                image_files[root].append(os.path.join(root, file))
    return image_files

def display_images_by_folder(image_dict, figsize=(20, 12), dpi=300):
    """ Display images grouped by their folder, with each image displayed using specified figsize and dpi """
    for folder, images in image_dict.items():
        for image in images:
            fig, ax = plt.subplots(figsize=figsize)
            fig.suptitle(f'Image from {folder}: {os.path.basename(image)}')
            img = imread(image)
            ax.imshow(img)
            ax.axis('off')
            plt.show()

# Usage example:
root_directory = '/home/mzo5929/Keerthana/MoiseEtAl/extractedData/parameterResults/figure3/'  # Update this to your images directory path
images = list_images(root_directory)
display_images_by_folder(images)

For K40 mutants - only fitc channel

In [None]:
properties = ('label', 'mean_intensity', 'major_axis_length', 'minor_axis_length','orientation')
for inputPath in inputPath1:
    data = np.load(inputPath, allow_pickle=True).item()
    outlines = data['outlines']
    masks = data['masks']
    flows = data['flows']
    
    ##Use lines below for K40A_new3 (specific file names were used during this analyis)
    copy = "K40A tiffs and jpgs"
    image_new = inputPath.split('Output')[0] + copy+ inputPath.split('Output')[1]
    image_new = image_new.replace("dapi_cy5","fitc")

    #Rename input file to open fitc file
    image_new = image_new.replace(".npy",".jpg")
    image_new = image_new.replace("_seg","")
    image_new = image_new.replace("overlay","fitc")
    print(image_new)
    
    #Confirm if fitc file exists, read the file and get mean intensity in green channel
    if os.path.isfile(image_new):
        image = skimage.io.imread(image_new)
        uniqueCells = np.unique(masks)
        excludeX = np.arange(image.shape[1] - 4 ,image.shape[1] +1)
        downValues = np.arange(0,5)
        excludeX = np.concatenate((downValues, excludeX))
        excludeY = np.arange(image.shape[0] - 4 ,image.shape[0] +1)
        excludeY = np.concatenate((downValues, excludeY))
        for c in uniqueCells:
            if c != 0:
                cellCoord = np.where(masks==c)
                checkY = np.isin(cellCoord[0],excludeY)
                checkX = np.isin(cellCoord[1],excludeX)
                if np.any(checkY):
                    masks[cellCoord[0], cellCoord[1]] = 0
                    outlines[cellCoord[0], cellCoord[1]] = 0
                elif np.any(checkX):
                    masks[cellCoord[0], cellCoord[1]] = 0
                    outlines[cellCoord[0], cellCoord[1]] = 0
        masks_array = np.asarray(masks)
        prop_dict = regionprops_table(masks_array, intensity_image=image ,properties = properties)
        df = pd.DataFrame(prop_dict)
        #Save mean intensity and other values as csv file
        df.to_csv(inputPath.replace(".npy","_fitc.csv", 1))
        #Save outlines
        outlines = data['outlines']
        outlines_0 = pd.DataFrame(outlines[0])
        outlines_1 = pd.DataFrame(outlines[1])
        outlines =[outlines_0, outlines_1]
        outlines= pd.concat(outlines)
        outlines.to_csv(inputPath.replace(".npy","_outlines.csv", 1))

In [10]:
!python -m pip install cellpose[gui]

Collecting pyqtgraph>=0.11.0rc0 (from cellpose[gui])
  Downloading pyqtgraph-0.13.4-py3-none-any.whl.metadata (1.3 kB)
Collecting pyqt5 (from cellpose[gui])
  Downloading PyQt5-5.15.10-cp37-abi3-manylinux_2_17_x86_64.whl.metadata (2.1 kB)
Collecting pyqt5.sip (from cellpose[gui])
  Downloading PyQt5_sip-12.13.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.whl.metadata (504 bytes)
Collecting google-cloud-storage (from cellpose[gui])
  Downloading google_cloud_storage-2.16.0-py2.py3-none-any.whl.metadata (6.1 kB)
Collecting google-auth<3.0dev,>=2.26.1 (from google-cloud-storage->cellpose[gui])
  Downloading google_auth-2.29.0-py2.py3-none-any.whl.metadata (4.7 kB)
Collecting google-api-core<3.0.0dev,>=2.15.0 (from google-cloud-storage->cellpose[gui])
  Downloading google_api_core-2.18.0-py3-none-any.whl.metadata (2.7 kB)
Collecting google-cloud-core<3.0dev,>=2.3.0 (from google-cloud-storage->cellpose[gui])
  Downloading google_cloud_core-2.4.1-py2.py3-none-any.whl.metadata (2.7 kB)

creating new log file
2024-04-15 20:08:49,549 [INFO] WRITING LOG OUTPUT TO /home/mzo5929/.cellpose/run.log
qt.qpa.plugin: Could not load the Qt platform plugin "xcb" in "" even though it was found.
This application failed to start because no Qt platform plugin could be initialized. Reinstalling the application may fix this problem.

Available platform plugins are: eglfs, linuxfb, minimal, minimalegl, offscreen, vnc, wayland-egl, wayland, wayland-xcomposite-egl, wayland-xcomposite-glx, webgl, xcb.

