## A pipeline for processing and analyzing multiplexed images

#### Developed for related project: A spatial single-cell type map of adult human spermatogenesis (Cecilia Bergström's group)

### Import required libraries

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
import os
import time

from skimage import util, segmentation, measure, io

from stardist.models import StarDist2D, Config2D
from csbdeep.utils import normalize

import warnings
warnings.filterwarnings('ignore')

%run ../src/functions.py

### Define input path, image of interest and other parameters

In [5]:
# define input path
inputpath = '../data/' # update input path!

# set distance to be used to grow the cell region after segmentation
dist = 3

# set the segmentation method
seg_method = 'stardist' # choose between 'cellpose' of 'stardist'

# set startdist parameters
nms_thresh = 0.8
prob_thresh = 0.7

# set cellpose parameters
cellpose_diam = 30
flow_thresh = 0.9
cell_prob = 0.4
 
# pre-process OPAL channels? True or False
preprocessOPAL = False 

# apply size filter to segmented objetcts
filterByArea = True
min_size = np.array([0,100,100,100,100,100,100,0]) # define value per channel, if 0 then no filtering
max_size = np.array([100000,100000,100000,100000,100000,100000,100000,100000]) # define value per channel

# define levels of Ostu threshold per channel
multi_otsu_levels = np.array([0,2,2,2,2,2,4,0])

# set order of the channels
cols = ['DAPI','OPAL480','OPAL520','OPAL570','OPAL620','OPAL690','OPAL780','Autofluorescence']

# save output image files
save_img = False

# split image before segmentation
split = False

### Batch process input directory

In [6]:
t_start = time.time()

# create dataframe to store statistic of all files and folders
distrib_posit = pd.DataFrame()
distrib_hist = pd.DataFrame(columns=['ID','1','2','3','4','5'])
distrib_hist.set_index(['ID'], inplace=True)

for file in os.listdir(inputpath): # for each folder
    
    if os.path.isdir(inputpath+file) and ".ipynb_checkpoints" not in file:
        print("Folder to be processed: " + file)
        
        # Create output folder
        if seg_method == "stardist":
            outpath = inputpath + file + '/output_stardist_nms-' + str(nms_thresh) + '_prob-' + str(prob_thresh)
        else:
            outpath = inputpath + file + '/output_cellpose_diam-' + str(cellpose_diam) + '_flow-' + str(flow_thresh) + '_prob-' + str(cell_prob)
        if not os.path.exists(outpath):
            os.makedirs(outpath)
        
        # create a dataframe to store mean_intens and mean_intens_thres_OPAL_nonzero
        average_intens = pd.DataFrame()
        average_intens_thres = pd.DataFrame()
        
        for im in os.listdir(inputpath+file): # for each tif file in each folder            
            if not "Simple Segmentation" in im and im.endswith('.tif'):
                
                print("file: " + im)
                
                # Load image of interest and define channel(s) to be segmented
                ref_img = io.imread(os.path.join(inputpath, file, im))

                # Select channel to be segmented: DAPI + AF
                original = ref_img[0,:,:] + ref_img[7,:,:]

                # Apply pre-processing
                print('pre-processing cell image...')
                filtered = preprocess(original)
                
                # Segment nuclei
                labels = segment_nuclei(filtered, split, seg_method, [nms_thresh, prob_thresh], [cellpose_diam, flow_thresh, cell_prob])
                #labels = segment_with_stardist(normalize(filtered),nms_thresh, prob_thresh)

                #model = StarDist2D.from_pretrained('2D_versatile_fluo') # load pretrained model
                #labels, _ = model.predict_instances(normalize(filtered),nms_thresh=nms_thresh, prob_thresh=prob_thresh) # get predictions for nuclei
                                
                # Get binary mask from labels
                binary_mask = labels.copy()
                binary_mask[binary_mask > 0] = 1

                # Save segmented masks
                if save_img:
                    io.imsave(outpath + '/' + im + '_cells_labels.tif',labels)
                    io.imsave(outpath + '/' + im + '_cells_binary.tif',util.img_as_ubyte(binary_mask*255))
                
                # Expand labels to incorporate cells' neighborhoods
                expanded_labels = segmentation.expand_labels(labels, distance=dist)
                if save_img:
                    io.imsave(outpath + '/' + im + '_cells_labels_expanded.tif',expanded_labels)
                
                # Get average intensity 
                print('quantifying...')
                properties = ['label', 'intensity_mean']
                mean_intens = get_avg_intensity(ref_img, expanded_labels, cols, properties)
                
                # concatenate dataframes
                # create ID based on image file name
                file_name = im.replace('_component_data.tif','')
                ID = pd.Series(file_name)
                ID = ID.repeat(mean_intens.shape[0])
                
                # concat
                mean_intens['ID'] = ID.values
                mean_intens.reset_index(['label'],inplace=True)
                mean_intens.set_index(['ID','label'],inplace=True)                
                average_intens = pd.concat([average_intens, mean_intens], axis=0)
                
                # get expanded-labels image as a binary mask
                expanded_binary_mask = expanded_labels.copy()
                expanded_binary_mask[expanded_binary_mask > 0] = 1
                
                # OPAL quantification
                # load Ilastik mask
                ilastik_mask = im.replace('.tif','')
                ilastik_mask = ilastik_mask.replace('[','')
                ilastik_mask = ilastik_mask.replace(']','')
                ilastik_mask = inputpath + file + '/' +  ilastik_mask + '_520_Simple Segmentation.tiff'
                mean_intens_thres, thresholded, intens_masks = opal_quantification(ref_img, expanded_labels, expanded_binary_mask, ilastik_mask, cols, filterByArea, min_size, max_size, preprocessOPAL, multi_otsu_levels)
                
                # filter ['DAPI', 'Autofluorescence','OPAL520'] out
                mean_intens_thres = filter_columns(['DAPI', 'Autofluorescence'], mean_intens_thres)

                # concatenate dataframes
                ID = pd.Series(file_name)
                ID = ID.repeat(mean_intens_thres.shape[0])
                mean_intens_thres['ID'] = ID.values
                mean_intens_thres.reset_index(['label'],inplace=True)
                mean_intens_thres.set_index(['ID','label'],inplace=True)    
                average_intens_thres = pd.concat([average_intens_thres, mean_intens_thres], axis=0)
                
                cols_filtered = ['OPAL480','OPAL520','OPAL570','OPAL620','OPAL690','OPAL780']
                if save_img:
                    save_results_opal_quantification(cols_filtered, outpath + '/' + im, thresholded, intens_masks)
                
                # filter ['OPAL520'] out
                mean_intens_thres_OPAL = filter_columns(['OPAL520'], mean_intens_thres)
                
                # rearrange cols order
                cols_sorted = ['OPAL480', 'OPAL620', 'OPAL690', 'OPAL780', 'OPAL570']
                mean_intens_thres_OPAL = mean_intens_thres_OPAL[cols_sorted]
                
                # remove rows with all cols zero value
                mean_intens_thres_OPAL_nonzero = mean_intens_thres_OPAL.loc[~(mean_intens_thres_OPAL==0).all(axis=1)]
                
                # get stats of positive signals
                n, bins, signal_stats = get_hist_pos_signal(mean_intens_thres_OPAL, mean_intens_thres_OPAL_nonzero)
                
                # n and bins should be concatenated and saved together for all files and folders
                ind = distrib_hist.shape[0]
                idm = file_name
                new_row = pd.DataFrame([[n[0], n[1], n[2], n[3], n[4]]], columns=['1','2','3','4','5'], index=[idm])
                #distrib_hist = distrib_hist.append(new_row)
                distrib_hist = pd.concat([distrib_hist, new_row], ignore_index=True)
                
                # signal_stats should be saved for all files and folders
                cols_stats = ['OPAL480', 'OPAL620', 'OPAL690', 'OPAL780', 'OPAL570', 'size']
                stats_df = pd.DataFrame(signal_stats, columns=cols_stats)
                ID = pd.Series(file_name)
                ID = ID.repeat(signal_stats.shape[0])
                stats_df['ID'] = ID.values
                stats_df.set_index(['ID'], inplace=True)
                distrib_posit = pd.concat([distrib_posit, stats_df], axis=0)
                
                print(' ')
        
        average_intens.to_csv(os.path.join(outpath + '/mean_intensity.csv'), sep=';')
        average_intens_thres.to_csv(os.path.join(outpath + '/mean_intensity_threshold.csv'), sep=';')
        
distrib_hist.to_csv(os.path.join(inputpath + '/histogram.csv'), sep=';')
distrib_posit.to_csv(os.path.join(inputpath + '/distribution_per_positive.csv'), sep=';')

t_taken = time.time() - t_start
print("Time taken: ", t_taken, "s")

Folder to be processed: 20456931
file: 20456931_Core[1,2,A]_[13058,43408]_component_data.tif
pre-processing cell image...
segmenting...
Found model '2D_versatile_fluo' for 'StarDist2D'.
Loading network weights from 'weights_best.h5'.
Loading thresholds from 'thresholds.json'.
Using default values: prob_thresh=0.479071, nms_thresh=0.3.
quantifying...
channel: DAPI
channel: OPAL480
channel: OPAL520
channel: OPAL570
channel: OPAL620
channel: OPAL690
channel: OPAL780
channel: Autofluorescence
 
Time taken:  32.3654727935791 s
