In [None]:
'''
Mask Generation Notebook
- modified to use GPU acceleration
- by Arun Chakravorty (< Lex)
- 12/8/21 
'''

In [None]:
#Install Cellpose Models
import tifffile as tf
from pathlib import Path
import numpy as np
import time, os, sys, random
from urllib.parse import urlparse
import skimage.io
from tqdm import tqdm
import matplotlib.pyplot as plt
import matplotlib as mpl
import glob
%matplotlib inline
import re
from util import pil_imread
mpl.rcParams['figure.dpi'] = 300

from urllib.parse import urlparse
import shutil

print ("Downloading Models")
from cellpose import models

#https://stackoverflow.com/questions/8924173/how-do-i-print-bold-text-in-python
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
END = '\033[0m'

#custom function
import nuclear_cyto_match as ncm

In [None]:
#Check if colab notebook instance has GPU access
if models.use_gpu() == False: 
  #Warnings from the ZeroCost StarDist notebook
    print(BOLD+UNDERLINE+'You do not have GPU access.'+END)
    print('Make sure that you asked for GPU access in the OnDemand Instance') 
    print('Expect slow performance. To access GPU try reconnecting later')
    use_GPU=False
else:
    print(BOLD+UNDERLINE+"You have access to the GPU."+END+"\nDetails are:")
    print("*************************************************")
    # Nvidia GPU specs: 
    !nvidia-smi
    use_GPU=True

print("*************************************************")
print("Libraries imported and configured")

In [None]:
import plotly.express as px

def plot_2d(img, zmax):
    """Function to generate plots with slide panel
    Parameters:
    -----------
    img = image containing ref and corrected
    zmax= set maximum intensity"""
    
    #For Plotting 2d image
    #-------------------------------------------
    fig = px.imshow(
        img,
        width=600,
        height=600,
        binary_string=True,
        binary_compression_level=4,
        binary_backend='pil',
        zmax = zmax)
    
    fig.show()

In [None]:
def plot_slideshow(img, zmax):
    """Function to generate plots with slide panel
    Parameters:
    -----------
    img = image containing ref and corrected
    zmax= set maximum intensity"""
    
    #For Plotting 2d image
    #-------------------------------------------
    fig = px.imshow(
        img,
        width=600,
        height=600,
        binary_string=True,
        binary_compression_level=4,
        animation_frame=0,
        binary_backend='pil',
        zmax = zmax)
    
    fig.show()

In [None]:
# Load the images: 
input_directory = "/groups/CaiLab/personal/Lex/raw/150genes_040122/segmentation/*.tif"
files=glob.glob(input_directory)
#organize files numerically
key = [int(re.search('MMStack_Pos(\\d+)', f).group(1)) for f in files]
files = list(np.array(files)[np.argsort(key)])

In [None]:
files

In [None]:
# Append the Images into one image. ""
imgs = []

for i in tqdm(range(len(files))):
    imgs.append(pil_imread(files[i], swapaxes=True))

In [None]:
#Z, C, X, Y 
imgs[0].shape

## View Single File

In [None]:
#import scipy.ndimage as ndimage
##use ndimage blur if you are using transcripts for segmentation

In [None]:
img = np.swapaxes(imgs[0],0,1)
#Taking max projection of the Cytoplasmic and Nuclear Channel
maxc = np.max(img[0], axis=0)
maxn = np.max(img[1],axis=0)

In [None]:
max_cell = np.array([maxc,maxn])

In [None]:
# Cytoplasmic Channel
plot_2d(max_cell[0], zmax=10000)

In [None]:
# Nuclear Channel
plot_2d(max_cell[1], zmax=500)

## Settings for Cellpose

In [None]:
Model_Choice = "Nucleus" #["Cytoplasm", "Nucleus"]
model_choice=Model_Choice
Channel_for_segmentation="1" #[0,1,2,3]
segment_channel=int(Channel_for_segmentation)

if model_choice=="Cytoplasm":
    model_type="cyto"
else:
    model_type="nuclei"

###If you choose cytoplasm, set True you have a nuclear channel
Use_nuclear_channel= True #{type:"boolean"}
Nuclear_channel="1" #[1,2,3]
nuclear_channel=int(Nuclear_channel)

#For debugging. Don't need to change anything since we check whether models have GPU access in an earlier cell.  
#Use_GPU = True #{type:"boolean"}
#use_GPU=Use_GPU

### Diameter of cell (pixels):
#### Enter 0 if you don't know and cellpose will estimate it automatically. You can define this later as well.
Diameter =  350 #{type:"number"}
diameter=Diameter

# define CHANNELS to run segementation on
# grayscale=0, R=1, G=2, B=3
# channels = [cytoplasm, nucleus]
# if NUCLEUS channel does not exist, set the second channel to 0
# channels = [0,0]
# IF ALL YOUR IMAGES ARE THE SAME TYPE, you can give a list with 2 elements
# channels = [0,0] # IF YOU HAVE GRAYSCALE
# channels = [2,3] # IF YOU HAVE G=cytoplasm and B=nucleus
# channels = [2,1] # IF YOU HAVE G=cytoplasm and R=nucleus
# or if you have different types of channels in each image
#channels = [[2,3], [0,0], [0,0]]

# channels = [cytoplasm, nucleus]
if model_choice == "Cytoplasm":
    if Use_nuclear_channel:
        channels=[segment_channel,nuclear_channel]
    else:
        channels=[segment_channel,0]

if model_choice == "Nucleus":
    channels=[segment_channel,0]


# DEFINE CELLPOSE MODEL
# model_type='cyto' or model_type='nuclei'
model = models.Cellpose(gpu=use_GPU, model_type=model_type)

# if diameter is set to None, the size of the cells is estimated on a per image basis
# you can set the average cell `diameter` in pixels yourself (recommended) 
# diameter can be a list or a single number for all images
if diameter == 0:
    diameter = None
    print("Diameter is set to None. The size of the cells will be estimated on a per image basis")

### Test Cellpose on a Single Image
The ***Flow_threshold parameter*** is  the  maximum  allowed  error  of  the  flows  for  each  mask.   The  default  is 0.4. 

*   **Increase** this threshold if cellpose is not returning as many masks as you’d expect

*   **Decrease** this threshold if cellpose is returning too many ill-shaped masks. 

The ***Cell Probability Threshold*** determines proability that a detected object is a cell.   The  default  is 0.0. 

*   **Decrease** this threshold if cellpose is not returning as many masks as you’d expect or if masks are too small

*   **Increase** this threshold if cellpose is returning too many masks esp from dull/dim areas. 

In [None]:
# define CHANNELS to run segementation on
# grayscale=0, R=1, G=2, B=3

# channels = [cytoplasm, nucleus]
# if NUCLEUS channel does not exist, set the second channel to 0

# IF ALL YOUR IMAGES ARE THE SAME TYPE, you can give a list with 2 elements
# channels = [0,0] # IF YOU HAVE GRAYSCALE
# channels = [2,3] # IF YOU HAVE G=cytoplasm and B=nucleus
# channels = [2,1] # IF YOU HAVE G=cytoplasm and R=nucleus

# or if you have different types of channels in each image
#channels = [[2,3], [0,0], [0,0]]


channels = [0,0]

masks, flows, styles, diams = model.eval(max_cell[1], diameter=200, 
                                         channels=channels, flow_threshold=2,cellprob_threshold=-1)


In [None]:
from cellpose import plot

fig = plt.figure(figsize=(12,5))
plot.show_segmentation(fig, max_cell[1], masks, flows[0], channels=channels)
plt.tight_layout()
plt.show()

### Run Cellpose on Entire Directory 

In [None]:
imgs_final = []

for i in tqdm(range(len(imgs))):
    img = np.swapaxes(imgs[i],0,1)
#     maxc = np.max(img[0], axis=0)
    maxn = np.max(img[1],axis=0)
#     max_cell = np.array([maxc,maxn])
    imgs_final.append(maxn)

In [None]:
masks, flows, styles, diams = model.eval(imgs_final, diameter=200, channels=channels, 
                                         flow_threshold=2 ,cellprob_threshold=-1)

### Show Results 

In [None]:
masks

In [None]:
image_number = 0

img1=imgs_final[image_number]
#if img1.dtype!='uint8':
#    img1=img_as_ubyte(img1)

fig = plt.figure(figsize=(12,5))
plot.show_segmentation(fig, img1, masks[image_number], flows[image_number][0], channels=channels)
plt.tight_layout()
plt.show()

### Save Files 

In [None]:
from pathlib import Path

#name of output directory
save_dir = '/groups/CaiLab/personal/Lex/raw/150genes_040122/notebook_pyfiles/labeled_images/nuc_masks/'

#save images in folder with the diameter value used in cellpose
print("Segmentation Done. Saving Masks and flows now")
print("Save Directory is: ",save_dir)
if (not os.path.exists(save_dir)):
    Path(save_dir).mkdir(parents=True, exist_ok=True)

for idx,mask in enumerate(masks):
    file_name=os.path.splitext(os.path.basename(files[idx]))[0]
    #Output name for masks
    mask_output_name=save_dir+file_name+".tif"
    #Save mask as 16-bit in case this has to be used for detecting than 255 objects
    mask=mask.astype(np.uint16)
    #Save flow as 8-bit
    skimage.io.imsave(mask_output_name,mask, check_contrast=False)

print("Files saved")

# Match masks

In [None]:
#read in masks
nuc_paths = glob.glob("/groups/CaiLab/personal/Lex/raw/150genes_040122/notebook_pyfiles/labeled_images/nuc_masks/*.tif")
cyto_paths = glob.glob("/groups/CaiLab/personal/Lex/raw/150genes_040122/notebook_pyfiles/labeled_images/cyto_masks/*.tif")

#organize files numerically
key = [int(re.search('MMStack_Pos(\\d+)', f).group(1)) for f in nuc_paths]
nuc_paths = list(np.array(nuc_paths)[np.argsort(key)])

key = [int(re.search('MMStack_Pos(\\d+)', f).group(1)) for f in cyto_paths]
cyto_paths = list(np.array(cyto_paths)[np.argsort(key)])

In [None]:
#take a look
plt.imshow(tf.imread(cyto_paths[0]))

In [None]:
#read in files
nuclear = []
cyto = []
for i in tqdm(range(len(nuc_paths))):
    nuclear.append(pil_imread(nuc_paths[i]))
    cyto.append(pil_imread(cyto_paths[i]))

In [None]:
#match nuclear and cyto masks
cyto_new = ncm.nuclear_cyto_matching(cyto,nuclear, threshold=0.05)

In [None]:
#take a look
plt.imshow(cyto_new[0])

In [None]:
#write images
i=0
for lab_img in cyto_new:
    path = Path(f"../labeled_images/matched/MMStack_Pos{i}.tif")
    path.parent.mkdir(parents=True, exist_ok=True)
    tf.imwrite(str(path),lab_img)
    i += 1