# MM pipeline: Run the StarDist2D segmentation model over all folders

This notebook loads a pretrained StarDist2D segmentation model and applies the segmentation prediction on all folders within the masterfolder mainf (defined in 2nd code cell). Only microscopy chamber data containing folders should be within mainf. The segmentation is applied onto all images that end with *_PH.tif* and the segmentation image is saved into a newly created folder within each image folder named *seg_sd2*. For the moment, it assumes single-page tif files and saves single-page tif files with the exact same name as the input image used for segmentation prediction.

### Load main config file. Adapt directory

In [1]:
mainconfigname = 'jbanalysisconfig_mm';
configdir = 'C://Users/zinke/Documents/GitHub/azimages/julian/MM_pipeline';

if not mainconfigname.endswith('.json'):
    mainconfigname += '.json'
    
if not configdir.endswith('/'):
    configdir += '/'

import json
# Read JSON data
with open(configdir+mainconfigname, 'r') as file:
    data = json.load(file)

# Assign each key-value pair as a variable
for key, value in data.items():
    globals()[key] = value

### Load various packages

In [2]:
import numpy as np
import sys
import matplotlib.pyplot as plt
%matplotlib inline
from tqdm import tqdm
from tifffile import imread, imwrite
from datetime import datetime
from csbdeep.utils import Path, normalize
from skimage.measure import regionprops, regionprops_table
from skimage import io
from skimage import segmentation
from skimage import color
from stardist.matching import matching_dataset
from stardist import fill_label_holes, random_label_cmap, relabel_image_stardist, calculate_extents, gputools_available, _draw_polygons
from stardist.models import Config2D, StarDist2D, StarDistData2D
import os
from tensorflow import keras
import cv2
import pandas as pd
from datetime import date
np.random.seed(42)
lbl_cmap = random_label_cmap()

def add_prefix(props, prefix):
    return {f"{prefix}_{key}" if 'intensity' in key else key: value for key, value in props.items()}

### Check if GPU can be accessed

In [3]:
gputools_available()

If you want to compute separable approximations, please install it with
pip install scikit-tensor-py3


True

### Load in meta file and display head. Check if correct

In [4]:
meta = pd.read_csv(os.path.join(masterdir,metacsv), dtype={'stardist': str})
replicates = meta.replicate.unique()
meta.head()

Unnamed: 0,replicate,pos,strain,dt,channel,Note,Exclude,MaxFr,type,aip_type,...,rotation,StageX,StageY,PxinUmX,PxinUmY,register,stardist,stardist_fails,delta,delta_fails
0,s02,1,agr-I,5.0,2,,,,sAIP,AIP-I,...,0.0,-62860.49788,-39324.74159,0.065,0.065,Done,Done,,,
1,s03,1,agr-I,5.0,4,,,240.0,sAIP,AIP-I,...,0.0,-59819.89317,-38749.07585,0.065,0.065,Done,Done,,,
2,s06,1,agr-I,5.0,4,,,225.0,sAIP,AIP-I,...,-0.25,-60615.52819,-37553.06656,0.065,0.065,Done,Done,,,
3,s06,2,agr-I,5.0,5,,,192.0,sAIP,AIP-I,...,-0.25,-61960.15363,-35559.9919,0.065,0.065,Done,Done,,,
4,s07,1,agr-II,5.0,2,,,,sAIP,AIP-II,...,0.25,-57633.2106,-43430.1259,0.065,0.065,Done,Done,,,


### Load stardist model
Here, the model is loaded. You need to specify the dir which contains a folder named *stardist* in the config file. This *stardist* folder needs to contain the files *weigths_best.h5* as well as the *config.json* and optionally the *thresholds.json*

In [5]:
print(stardistmodeldir)
model = StarDist2D(None, name='stardist', basedir=stardistmodeldir)
axis_norm = (0,1)   # normalize channels independently

C://Users/zinke/Documents/GitHub/azimages/julian/stardist/models
Loading network weights from 'weights_best.h5'.
Loading thresholds from 'thresholds.json'.
Using default values: prob_thresh=0.586968, nms_thresh=0.3.


### Define regionprops parameters. You could add more if you want to

In [6]:
if flims:
    prop_list = ['label', 
                'area', 'centroid', 
                'axis_major_length', 'axis_minor_length',
                 'eccentricity',
                'intensity_mean', 'intensity_max']
else:
    prop_list = ['label', 
                'area', 'centroid', 
                'axis_major_length', 'axis_minor_length',
                 'eccentricity'] 

### Limit GPU RAM usage by StarDist

In [7]:
from csbdeep.utils.tf import limit_gpu_memory
# adjust as necessary: limit GPU memory to be used by TensorFlow to leave some to OpenCL-based computations
limit_gpu_memory(fraction=ramlimit, total_memory=ramsize)
# alternatively, try this:
# limit_gpu_memory(None, allow_growth=True)

## Main segmentation loop
This loop goes over each row in the meta file which is marked with completed preprocessing (Progress == 'Done') and applies the StarDist segmentation model to each position/chamber iteratively. For the moment, not paralellized but could probably benefit from that.

In [None]:
for i in range(0, meta.shape[0]): # go over all rows in the meta file
    if meta.loc[i, 'stardist'] == 'Done' or meta.loc[i, 'Exclude'] == 'excl' or not meta.loc[i, ('register')] == 'Done':
        continue

    # Directory setup for current experiment
    main_folder = os.path.join(masterdir, savedirname, meta.replicate[i], 'Chambers')
    save_directory = os.path.join(main_folder, 'stardistdata')
    os.makedirs(save_directory, exist_ok=True)

    current_directory = os.path.join(main_folder, f'Pos{str(meta.pos[i]).zfill(2)}')
    if not os.path.exists(current_directory):
        continue

    # get final chamber dir for all of the current position
    chambf = [ f.path for f in os.scandir(current_directory) if f.is_dir() ]
    chambf = [k for k in chambf if 'Chamb' in k]

    fails = []  # To record any failures
    for chambi in tqdm(range(0, len(chambf)), desc=meta.replicate[i] + ', Pos ' + str(meta.pos[i]).zfill(2)):
        inputs_folder = chambf[chambi]
        outputs_folder = os.path.join(inputs_folder, "seg_sd2")
        os.makedirs(outputs_folder, exist_ok=True)
        # Clear output folder if not empty
        for file in Path(outputs_folder).glob('*tif'):
            os.remove(file)

        # Collecting timelapse images
        images = sorted(Path(inputs_folder).glob('*Ch1*tif'))
        # Additional channels if applicable
        if flims:
            images_fl = sorted(Path(inputs_folder).glob('*Ch2*tif'))
            if n_channel > 2:
                images_fl2 = sorted(Path(inputs_folder).glob('*Ch3*tif'))

        # Determine frame range
        max_frame = meta.loc[i, 'MaxFr']
        frame_list = range(len(images)) if np.isnan(max_frame) else range(int(max_frame) - 1)

        # Process each frame
        for frame_index in frame_list:
            try:
                # Reading fluorescence images if available
                if flims:
                    fluorescence_image = imread(images_fl[frame_index])
                    if n_channel > 2:
                        fluorescence_image2 = imread(images_fl2[frame_index])

                # Main segmentation process
                main_image = imread(images[frame_index])
                normalized_image = normalize(main_image, 1, 99.8, axis=axis_norm)
                labels, details = model.predict_instances(normalized_image)

                # Save segmentation labels
                filename_segmentation = os.path.join(outputs_folder, os.path.basename(images[frame_index]))
                imwrite(filename_segmentation, labels, append=False, metadata=None)

                # Region properties calculation
                region_props = regionprops_table(labels, intensity_image=fluorescence_image if flims else None, properties=prop_list)
                if flims and n_channel > 2:
                    region_props = add_prefix(region_props, 'fluor1')
                    region_props2 = regionprops_table(labels, intensity_image=fluorescence_image2, properties=prop_list)
                    region_props2 = add_prefix(region_props2, 'fluor2')
                    # Merge intensity data for multiple fluorescence channels
                    for key, value in region_props2.items():
                        if 'intensity' in key:
                            region_props[key] = value

                # Dataframe handling: compile and format region properties
                region_props_df = pd.DataFrame(region_props)
                region_props_df.insert(0, 'frame', frame_index + 1)
                region_props_df.insert(0, 'pos', int(os.path.basename(current_directory)[-2:]))
                region_props_df.insert(0, 'replicate', meta.replicate[i])
                region_props_df.insert(2, 'chamber', int(os.path.basename(inputs_folder)[-2:]))
                region_props_df['folder'] = current_directory

                if frame_index == 0:
                    all_frames_df = region_props_df
                else:
                    all_frames_df = pd.concat([all_frames_df, region_props_df], ignore_index=True)

            except Exception as e:
                # Log any errors encountered during processing
                fails.append(f"Error processing folder {current_directory}, Frame {frame_index}: {e}")

                # Save compiled data for current position
            if 'all_frames_df' in locals():
                all_frames_df.to_csv(os.path.join(save_directory, os.path.basename(current_directory) + '.csv'), index=False)

    # Update metadata to indicate completion and log any failures
    meta = pd.read_csv(os.path.join(masterdir,metacsv), dtype={'stardist': str})
    meta.at[i, 'stardist'] = 'Done'
    if fails:
        meta.at[i, 'stardist_fails'] = '; '.join(fails)

    # Save metadata with updates
    meta.to_csv(os.path.join(masterdir, metacsv), index=False)    

s27, Pos 01: 100%|████████████████████████████████████████████████████████████████████| 12/12 [30:54<00:00, 154.57s/it]
s28, Pos 01:  42%|██████████████████████████▋                                     | 15/36 [1:10:51<2:06:21, 361.03s/it]