Script to convert the output from GUI to the same format as obtained by segmenting using code. This will make the data more uniform and easy for downstream processes

In [70]:
import numpy as np
import pandas as pd
from skimage.measure import regionprops_table
import skimage.measure
import math
import matplotlib.pyplot as plt
import os
from os import listdir
from cellpose import plot, utils
import cellpose
import skimage.io
from scipy import stats
import numpy as np
import time, os, sys
from urllib.parse import urlparse
import skimage.io
import matplotlib.pyplot as plt
import matplotlib as mpl
import cellpose
%matplotlib inline
# mpl.rcParams['figure.dpi'] = 300
import csv
from urllib.parse import urlparse
from cellpose import models, core, utils, io
import torch
from os import listdir
from os.path import isfile, join
import multipagetiff as mtif
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
from cellpose import plot
import matplotlib.pyplot as plt
import pandas as pd
import colorsys

In [71]:
def hsv_to_rgb(arr):
    hsv_to_rgb_channels = np.vectorize(colorsys.hsv_to_rgb)
    h, s, v = np.rollaxis(arr, axis=-1)
    r, g, b = hsv_to_rgb_channels(h, s, v)
    rgb = np.stack((r, g, b), axis=-1)
    return rgb

def mask_overlay(img, masks, colors=None):
    """Overlay masks on image (set image to grayscale).

    Args:
        img (int or float, 2D or 3D array): Image of size [Ly x Lx (x nchan)].
        masks (int, 2D array): Masks where 0=NO masks; 1,2,...=mask labels.
        colors (int, 2D array, optional): Size [nmasks x 3], each entry is a color in 0-255 range.

    Returns:
        RGB (uint8, 3D array): Array of masks overlaid on grayscale image.
    """
    if colors is not None:
        if colors.max() > 1:
            colors = np.float32(colors)
            colors /= 255
        colors = utils.rgb_to_hsv(colors)
    if img.ndim > 2:
        img = img.astype(np.float32).mean(axis=-1)
    else:
        img = img.astype(np.float32)

    HSV = np.zeros((img.shape[0], img.shape[1], 3), np.float32)
    HSV[:, :, 2] = np.clip((img / 255. if img.max() > 1 else img) * 1.5, 0, 1)
    hues = np.linspace(0, 1, masks.max() + 1)[np.random.permutation(masks.max())]
    for n in range(int(masks.max())):
        ipix = (masks == n + 1).nonzero()
        if colors is None:
            HSV[ipix[0], ipix[1], 0] = hues[n]
        else:
            HSV[ipix[0], ipix[1], 0] = colors[n, 0]
        HSV[ipix[0], ipix[1], 1] = 1.0
    RGB = (hsv_to_rgb(HSV) * 255).astype(np.uint8)
    return RGB

In [72]:
def image_to_rgb(img0, channels=[0, 0]):
    """Converts image from 2 x Ly x Lx or Ly x Lx x 2 to RGB Ly x Lx x 3.

    Args:
        img0 (ndarray): Input image of shape 2 x Ly x Lx or Ly x Lx x 2.

    Returns:
        ndarray: RGB image of shape Ly x Lx x 3.

    """
    img = img0.copy()
    img = img.astype(np.float32)
    if img.ndim < 3:
        img = img[:, :, np.newaxis]
    if img.shape[0] < 5:
        img = np.transpose(img, (1, 2, 0))
    if channels[0] == 0:
        img = img.mean(axis=-1)[:, :, np.newaxis]
    for i in range(img.shape[-1]):
        if np.ptp(img[:, :, i]) > 0:
            img[:, :, i] = np.clip(normalize99(img[:, :, i]), 0, 1)
            img[:, :, i] = np.clip(img[:, :, i], 0, 1)
    img *= 255
    img = np.uint8(img)
    RGB = np.zeros((img.shape[0], img.shape[1], 3), np.uint8)
    if img.shape[-1] == 1:
        RGB = np.tile(img, (1, 1, 3))
    else:
        RGB[:, :, channels[0] - 1] = img[:, :, 0]
        if channels[1] > 0:
            RGB[:, :, channels[1] - 1] = img[:, :, 1]
    return RGB

def show_segmentation(fig, img, maski, channels=[0, 0], file_name=None):
    """Plot segmentation results (like on website).

    Can save each panel of figure with file_name option. Use channels option if
    img input is not an RGB image with 3 channels.

    Args:
        fig (matplotlib.pyplot.figure): Figure in which to make plot.
        img (ndarray): 2D or 3D array. Image input into cellpose.
        maski (int, ndarray): For image k, masks[k] output from Cellpose.eval, where 0=NO masks; 1,2,...=mask labels.
        flowi (int, ndarray): For image k, flows[k][0] output from Cellpose.eval (RGB of flows).
        channels (list of int, optional): Channels used to run Cellpose, no need to use if image is RGB. Defaults to [0, 0].
        file_name (str, optional): File name of image. If file_name is not None, figure panels are saved. Defaults to None.
        seg_norm (bool, optional): Improve cell visibility under labels. Defaults to False.
    """
    ax = fig.add_subplot(1, 4, 1)
    img0 = img.copy()

    if img0.shape[0] < 4:
        img0 = np.transpose(img0, (1, 2, 0))
    if img0.shape[-1] < 3 or img0.ndim < 3:
        img0 = image_to_rgb(img0, channels=channels)
    else:
        if img0.max() <= 50.0:
            img0 = np.uint8(np.clip(img0, 0, 1) * 255)
    ax.imshow(img0)
    ax.set_title("original image")
    ax.axis("off")

    outlines = utils.masks_to_outlines(maski)

    overlay = mask_overlay(img0, maski)

    ax = fig.add_subplot(1, 4, 2)
    outX, outY = np.nonzero(outlines)
    imgout = img0.copy()
    imgout[outX, outY] = np.array([255, 0, 0])  # pure red

    ax.imshow(imgout)
    ax.set_title("predicted outlines")
    ax.axis("off")

    ax = fig.add_subplot(1, 4, 3)
    ax.imshow(overlay)
    ax.set_title("predicted masks")
    ax.axis("off")


    if file_name is not None:
        save_path = os.path.splitext(file_name)[0]
        io.imsave(save_path + "_overlay.jpg", overlay)
        io.imsave(save_path + "_outlines.jpg", imgout)

In [73]:
import os, datetime, gc, warnings, glob, shutil
from natsort import natsorted
import numpy as np
import cv2
import tifffile
import logging, pathlib, sys
from tqdm import tqdm
from pathlib import Path
import re
from roifile import ImagejRoi, roiwrite

try:
    import matplotlib.pyplot as plt
    MATPLOTLIB = True
except:
    MATPLOTLIB = False

def normalize99(Y, lower=1, upper=99, copy=True):
    """
    Normalize the image so that 0.0 corresponds to the 1st percentile and 1.0 corresponds to the 99th percentile.

    Args:
        Y (ndarray): The input image.
        lower (int, optional): The lower percentile. Defaults to 1.
        upper (int, optional): The upper percentile. Defaults to 99.
        copy (bool, optional): Whether to create a copy of the input image. Defaults to True.

    Returns:
        ndarray: The normalized image.
    """
    X = Y.copy() if copy else Y
    x01 = np.percentile(X, lower)
    x99 = np.percentile(X, upper)
    if x99 - x01 > 1e-3:
        X = (X - x01) / (x99 - x01)
    else:
        X[:] = 0
    return X


def imsave(filename, arr):
    """
    Saves an image array to a file.

    Args:
        filename (str): The name of the file to save the image to.
        arr (numpy.ndarray): The image array to be saved.

    Returns:
        None
    """
    ext = os.path.splitext(filename)[-1].lower()
    if ext == ".tif" or ext == ".tiff":
        tifffile.imwrite(filename, arr)
    else:
        if len(arr.shape) > 2:
            arr = cv2.cvtColor(arr, cv2.COLOR_BGR2RGB)
        cv2.imwrite(filename, arr)

def save_to_png(images, masks, file_names):
    """ deprecated (runs io.save_masks with png=True) 
    
        does not work for 3D images
    
    """
    save_masks(images, masks, file_names, png=True)
    
def outlines_to_text(base, outlines):
    with open(base + "_cp_outlines.txt", "w") as f:
        for o in outlines:
            xy = list(o.flatten())
            xy_str = ",".join(map(str, xy))
            f.write(xy_str)
            f.write("\n")


def save_masks(images, masks, file_names, png=True, tif=True, channels=[0, 0],
               suffix="", save_flows=False, save_outlines=True, dir_above=False,
               in_folders=False, savedir="/Volumes/fsmresfiles/Basic_Sciences/CDB/GoyalLab/People/KeerthanaArun/ArispeLab/MoisesEtAl/extractedData/segmentationResults/resubmissionImages/TempAnalysis/Static/", save_txt=True, save_mpl=True):
    """ Save masks + nicely plotted segmentation image to png and/or tiff.

    Can save masks, flows to different directories, if in_folders is True.

    If png, masks[k] for images[k] are saved to file_names[k]+"_cp_masks.png".

    If tif, masks[k] for images[k] are saved to file_names[k]+"_cp_masks.tif".

    If png and matplotlib installed, full segmentation figure is saved to file_names[k]+"_cp.png".

    Only tif option works for 3D data, and only tif option works for empty masks.

    Args:
        images (list): Images input into cellpose.
        masks (list): Masks output from Cellpose.eval, where 0=NO masks; 1,2,...=mask labels.
        flows (list): Flows output from Cellpose.eval.
        file_names (list, str): Names of files of images.
        png (bool, optional): Save masks to PNG. Defaults to True.
        tif (bool, optional): Save masks to TIF. Defaults to False.
        channels (list, int, optional): Channels used to run Cellp
        ose. Defaults to [0,0].
        suffix (str, optional): Add name to saved masks. Defaults to "".
        save_flows (bool, optional): Save flows output from Cellpose.eval. Defaults to False.
        save_outlines (bool, optional): Save outlines of masks. Defaults to False.
        dir_above (bool, optional): Save masks/flows in directory above. Defaults to False.
        in_folders (bool, optional): Save masks/flows in separate folders. Defaults to False.
        savedir (str, optional): Absolute path where images will be saved. If None, saves to image directory. Defaults to None.
        save_txt (bool, optional): Save masks as list of outlines for ImageJ. Defaults to False.
        save_mpl (bool, optional): If True, saves a matplotlib figure of the original image/segmentation/flows. Does not work for 3D.
                This takes a long time for large images. Defaults to False.
    
    Returns:
        None
    """

    if isinstance(masks, list):
        for image, mask, file_name in zip(images, masks, file_names):
            save_masks(image, mask, file_name, png=png, tif=tif, suffix=suffix,
                       dir_above=dir_above, save_flows=save_flows,
                       save_outlines=save_outlines, savedir=savedir, save_txt=save_txt,
                       in_folders=in_folders, save_mpl=save_mpl)
        return

    if masks.ndim > 2 and not tif:
        raise ValueError("cannot save 3D outputs as PNG, use tif option instead")

    if masks.max() == 0:
        if not tif:
            return
        else:
            png = False
            save_outlines = False
            save_flows = False
            save_txt = False

    if savedir is None:
        if dir_above:
            savedir = Path(file_names).parent.parent.absolute(
            )  #go up a level to save in its own folder
        else:
            savedir = Path(file_names).parent.absolute()

    basename = os.path.splitext(os.path.basename(file_names))[0]
    if in_folders:
        maskdir = os.path.join(savedir, "masks")
        outlinedir = os.path.join(savedir, "outlines")
        txtdir = os.path.join(savedir, "txt_outlines")
    else:
        maskdir = savedir
        outlinedir = savedir
        txtdir = savedir

    exts = []
    if masks.ndim > 2:
        png = False
        tif = True
    if png:
        if masks.max() < 2**16:
            masks = masks.astype(np.uint16)
            exts.append(".png")
        else:
            png = False
            tif = True
            
    if tif:
        exts.append(".tif")

    # save masks
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        for ext in exts:

            imsave(os.path.join(maskdir, basename + "_cp_masks" + suffix + ext), masks)

    if save_mpl and png and MATPLOTLIB and not min(images.shape) > 3:
        # Make and save original/segmentation/flows image

        img = images.copy()
        if img.ndim < 3:
            img = img[:, :, np.newaxis]
        elif img.shape[0] < 8:
            np.transpose(img, (1, 2, 0))

        fig = plt.figure(figsize=(12, 3))
        show_segmentation(fig, img, masks)
        fig.savefig(os.path.join(savedir, basename + "_cp_output" + suffix + ".png"),
                    dpi=300)
        plt.close(fig)

    # ImageJ txt outline files
    if masks.ndim < 3 and save_txt:
        outlines = utils.outlines_list(masks)
        outlines_to_text(os.path.join(txtdir, basename), outlines)

    # RGB outline images
    if masks.ndim < 3 and save_outlines:
        outlines = utils.masks_to_outlines(masks)
        outX, outY = np.nonzero(outlines)
        img0 = normalize99(images)
        if img0.shape[0] < 4:
            img0 = np.transpose(img0, (1, 2, 0))
        if img0.shape[-1] < 3 or img0.ndim < 3:
            img0 = plot.image_to_rgb(img0, channels=channels)
        else:
            if img0.max() <= 50.0:
                img0 = np.uint8(np.clip(img0 * 255, 0, 1))
        imgout = img0.copy()
        imgout[outX, outY] = np.array([255, 0, 0])  #pure red
        imsave(os.path.join(outlinedir, basename + "_outlines" + suffix + ".png"),
               imgout)

    # save RGB flow picture
    # if masks.ndim < 3 and save_flows:
    #     imsave(os.path.join(flowdir, basename + "_flows" + suffix + ".tif"),
    #            (flows[0] * (2**16 - 1)).astype(np.uint16))
    #     #save full flow data
    #     imsave(os.path.join(flowdir, basename + "_dP" + suffix + ".tif"), flows[1])

In [74]:
def masks_flows_to_seg(images, masks, diams, file_names, channels=None):
    """ save output of model eval to be loaded in GUI 

    can be list output (run on multiple images) or single output (run on single image)

    saved to file_names[k]+'_seg.npy'
    
    Parameters
    -------------

    images: (list of) 2D or 3D arrays
        images input into cellpose

    masks: (list of) 2D arrays, int
        masks output from cellpose_omni.eval, where 0=NO masks; 1,2,...=mask labels

    flows: (list of) list of ND arrays 
        flows output from cellpose_omni.eval

    diams: float array
        diameters used to run Cellpose

    file_names: (list of) str
        names of files of images

    channels: list of int (optional, default None)
        channels used to run Cellpose    
    
    """
    
    if channels is None:
        channels = [0,0]
    
    if isinstance(masks, list):
        if not isinstance(diams, (list, np.ndarray)):
            diams = diams * np.ones(len(masks), np.float32)
        for k, [image, mask, diam, file_name] in enumerate(zip(images, masks, diams, file_names)):
            channels_img = channels
            if channels_img is not None and len(channels) > 2:
                channels_img = channels[k]
            masks_flows_to_seg(image, mask, diam, file_name, channels_img)
        return

    if len(channels)==1:
        channels = channels[0]
    
    outlines = masks * utils.masks_to_outlines(masks)
    base = os.path.splitext(file_names)[0]
    print(base)
    if masks.ndim==3:
        np.save(base+ '_seg.npy',
                    {'outlines': outlines.astype(np.uint16) if outlines.max()<2**16-1 else outlines.astype(np.uint32),
                        'masks': masks.astype(np.uint16) if outlines.max()<2**16-1 else masks.astype(np.uint32),
                        'chan_choose': channels,
                        'img': images,
                        'ismanual': np.zeros(masks.max(), bool),
                        'filename': file_names,
                        'est_diam': diams})
    else:
        if images.shape[0]<8:
            np.transpose(images, (1,2,0))
        np.save(base+ '_seg.npy',
                    {'img': images,
                        'outlines': outlines.astype(np.uint16) if outlines.max()<2**16-1 else outlines.astype(np.uint32),
                     'masks': masks.astype(np.uint16) if masks.max()<2**16-1 else masks.astype(np.uint32),
                     'chan_choose': channels,
                     'ismanual': np.zeros(masks.max().astype(int), bool),
                     'filename': file_names,
                     'est_diam': diams})  

In [75]:
def findMatchingPairs(imageDirectory, segDirectory):
    imageFiles = {}
    segFiles = {}
    
    for filename in os.listdir(imageDirectory):
        if filename.endswith(('.jpg', '.png', '.tif')):
            baseName = filename.rsplit('.', 1)[0]
            imageFiles[baseName] = filename
    
    for filename in os.listdir(segDirectory):
        if filename.endswith('_seg.npy'):
            baseName = filename.rsplit('_seg.npy', 1)[0]
            segFiles[baseName] = filename
    
    matchingPairs = {base: (imageFiles[base], segFiles[base]) 
                     for base in imageFiles.keys() 
                     if base in segFiles}
    
    return matchingPairs

def loadCombinedData(imageDirectory, segDirectory, baseName, imageFilename, segFilename, outputPath):
    imgPath = os.path.join(imageDirectory, imageFilename)
    imgs = skio.imread(imgPath)
    
    segPath = os.path.join(segDirectory, segFilename)
    segData = np.load(segPath, allow_pickle=True).item()
    print("Input files done")
    masks = segData['masks']
    diameterParam = segData['diameter']
    channelsParam = segData['chan_choose']
    outputImagesPath = os.path.join(outputPath, "Images")
    outputSegPath = os.path.join(outputPath, "Segmentation")
    if not os.path.exists(outputImagesPath):
        os.makedirs(outputImagesPath) 
    if not os.path.exists(outputSegPath):
        os.makedirs(outputSegPath) 
    os.chdir(outputSegPath)
    filename = imgPath.split('/')[-1]

    print("Starting to save masks")
    masks_flows_to_seg(imgs, masks, diams=int(diameterParam), file_names=filename, channels=channelsParam)
    os.chdir(outputImagesPath)
    print("Finished saving masks. Starting to save png")
    save_to_png(imgs, masks, filename)
    print(f"Finished for {filename}")
    return 

In [76]:
def processAllPairs(imageDirectory, segDirectory, outputPath):
    matchingPairs = findMatchingPairs(imageDirectory, segDirectory)
    allData = {}
    # print(matchingPairs)
    for baseName, (imageFile, segFile) in matchingPairs.items():

        allData[baseName] = loadCombinedData(imageDirectory, segDirectory, baseName, imageFile, segFile, outputPath)

    return allData



In [77]:
def main():
    imageDirectory = '/Volumes/fsmresfiles/Basic_Sciences/CDB/GoyalLab/People/KeerthanaArun/ArispeLab/MoisesEtAl/extractedData/segmentationResults/resubmissionImages/2024_07_26_subconfluent_20x/Subconfluent Static/Images'
    segDirectory = '/Volumes/fsmresfiles/Basic_Sciences/CDB/GoyalLab/People/KeerthanaArun/ArispeLab/MoisesEtAl/extractedData/segmentationResults/resubmissionImages/2024_07_26_subconfluent_20x/Subconfluent Static/Segmentation'
    outputPath = '/Volumes/fsmresfiles/Basic_Sciences/CDB/GoyalLab/People/KeerthanaArun/ArispeLab/MoisesEtAl/extractedData/segmentationResults/resubmissionImages/TempAnalysis'
    processAllPairs(imageDirectory, segDirectory, outputPath)

if __name__ == "__main__":
    main()

Input files done
Starting to save masks
subconfluentstatic_20x_0016_zoverlay
Finished saving masks. Starting to save png
Finished for subconfluentstatic_20x_0016_zoverlay.jpg
Input files done
Starting to save masks
subconfluentstatic_20x_0060_zoverlay
Finished saving masks. Starting to save png
Finished for subconfluentstatic_20x_0060_zoverlay.jpg
Input files done
Starting to save masks
subconfluentstatic_20x_0064_zoverlay
Finished saving masks. Starting to save png
Finished for subconfluentstatic_20x_0064_zoverlay.jpg
Input files done
Starting to save masks
subconfluentstatic_20x_0024_zoverlay
Finished saving masks. Starting to save png
Finished for subconfluentstatic_20x_0024_zoverlay.jpg
Input files done
Starting to save masks
subconfluentstatic_20x_0028_zoverlay
Finished saving masks. Starting to save png
Finished for subconfluentstatic_20x_0028_zoverlay.jpg
Input files done
Starting to save masks
subconfluentstatic_20x_0008_zoverlay
Finished saving masks. Starting to save png
Fini