# MM pipeline: Run the StarDist2D segmentation model over all folders

This notebook loads a pretrained StarDist2D segmentation model and applies the segmentation prediction on all folders within the masterfolder mainf (defined in 2nd code cell). Only microscopy chamber data containing folders should be within mainf. The segmentation is applied onto all images that end with *_PH.tif* and the segmentation image is saved into a newly created folder within each image folder named *seg_sd2*. For the moment, it assumes single-page tif files and saves single-page tif files with the exact same name as the input image used for segmentation prediction.

### Load main config file. Adapt directory

In [1]:
configdir = 'G://GitHub/microfluidics-image-processing/MM_pipeline';

# uncomment the one running:
#mainconfigname = 'config_example_mixed';
mainconfigname = 'config_example_matched';


if not mainconfigname.endswith('.json'):
    mainconfigname += '.json'
    
if not configdir.endswith('/'):
    configdir += '/'

import json
# Read JSON data
with open(configdir+mainconfigname, 'r') as file:
    data = json.load(file)

# Assign each key-value pair as a variable
for key, value in data.items():
    globals()[key] = value

### Load various packages

In [2]:
import numpy as np
import sys
import matplotlib.pyplot as plt
%matplotlib inline
from tqdm import tqdm
from tifffile import imread, imwrite
from datetime import datetime
from csbdeep.utils import Path, normalize
from skimage.measure import regionprops_table
from skimage import io
from skimage import segmentation
from skimage import color
from stardist.matching import matching_dataset
from stardist import fill_label_holes, random_label_cmap, relabel_image_stardist, calculate_extents, gputools_available, _draw_polygons
from stardist.models import Config2D, StarDist2D, StarDistData2D
import os
from tensorflow import keras
import cv2
import pandas as pd
from datetime import date
import re
from pathlib import Path
np.random.seed(42)
lbl_cmap = random_label_cmap()

def add_prefix(props, prefix):
    return {f"{prefix}_{key}" if 'intensity' in key else key: value for key, value in props.items()}

### Check if GPU can be accessed

In [3]:
gputools_available()

False

### Load in meta file and display head. Check if correct

In [4]:
print(os.path.join(masterdir,metacsv))
meta = pd.read_csv(os.path.join(masterdir,metacsv), dtype={'stardist': str})
replicates = meta.replicate.unique()
meta.tail()

I://Julian/agr_rev_matched/sharing/matched\shared_matched_meta_processing.csv


Unnamed: 0,date,replicate,chip,pos,channel,Process,replicate2,Process2,rep2startdifferencemin,rep2firstframe,...,StageY,PxinUmX,PxinUmY,register,stardist,stardist_data,stardist_data_cor,stardist_fails,delta,delta_fails
15,NaT,r10,c1,1,2,3818,r10b,3931.0,228.0,37.0,...,-49316.40331,0.065,0.065,Done,,,,,,
16,NaT,r11,c1,1,1,4014,r11b,4046.0,50.0,8.0,...,-43571.36198,0.065,0.065,Done,,,,,,
17,NaT,r11,c1,2,3,4026,r11b,4058.0,50.0,8.0,...,-39732.80679,0.065,0.065,Done,,,,,,
18,NaT,r11,c1,3,4,4036,r11b,4068.0,50.0,8.0,...,-37792.56431,0.065,0.065,Done,,,,,,
19,NaT,r13,c1,4,6,2567,,,,,...,-37932.46,0.065,0.065,Done,,,,,,


### Load stardist model
Here, the model is loaded. You need to specify the dir which contains a folder named *stardist* in the config file. This *stardist* folder needs to contain the files *weigths_best.h5* as well as the *config.json* and optionally the *thresholds.json*

In [5]:
print(stardistmodeldir)
model = StarDist2D(None, name='stardist', basedir=stardistmodeldir)
axis_norm = (0,1)   # normalize channels independently

G://GitHub/microfluidics-image-processing/stardist_models/mm
Loading network weights from 'weights_best.h5'.
Loading thresholds from 'thresholds.json'.
Using default values: prob_thresh=0.586968, nms_thresh=0.3.


### Define regionprops parameters. You could add more if you want to

In [6]:
if n_channel>1:
    flims = True;
    prop_list = ['label', 
                'area', 'centroid', 
                'axis_major_length', 'axis_minor_length',
                 'eccentricity',
                'intensity_mean', 'intensity_max']
else:
    flims = False;
    prop_list = ['label', 
                'area', 'centroid', 
                'axis_major_length', 'axis_minor_length',
                 'eccentricity'] 

### Limit GPU RAM usage by StarDist

In [7]:
from csbdeep.utils.tf import limit_gpu_memory
# adjust as necessary: limit GPU memory to be used by TensorFlow to leave some to OpenCL-based computations
limit_gpu_memory(fraction=ramlimit, total_memory=ramsize)
# alternatively, try this:
# limit_gpu_memory(None, allow_growth=True)

## Main segmentation loop
This loop goes over each row in the meta file which is marked with completed preprocessing (Progress == 'Done') and applies the StarDist segmentation model to each position/chamber iteratively. For the moment, not paralellized but could probably benefit from that.

In [8]:
# Patch Keras model's predict to always use verbose=0
orig_predict = model.keras_model.predict
def predict_no_verbose(*args, **kwargs):
    kwargs['verbose'] = 0
    return orig_predict(*args, **kwargs)
model.keras_model.predict = predict_no_verbose

In [9]:
# helper to robustly extract trailing integer from a path basename
def extract_trailing_int_from_basename(path):
    name = os.path.basename(path)
    m = re.search(r'(\d+)$', name)
    return int(m.group(1)) if m else None

In [None]:
for i in range(0, meta.shape[0]):
    # reload metadata each iteration if you need the file updated by others,
    # otherwise you can read once before the loop for speed.
    meta = pd.read_csv(os.path.join(masterdir, metacsv), dtype={'stardist': str, 'stardist_data': str})

    if meta.loc[i, 'stardist'] == 'Done' or meta.loc[i, 'Exclude'] == 'excl' or not meta.loc[i, ('register')] == 'Done':
        continue

    main_folder = os.path.join(masterdir, savedirname, meta.replicate[i], 'Chambers')
    save_directory = os.path.join(main_folder, 'stardist2')
    os.makedirs(save_directory, exist_ok=True)

    current_directory = os.path.join(main_folder, f'Pos{str(meta.pos[i]).zfill(2)}')
    if not os.path.exists(current_directory):
        print(current_directory + ' not found')
        continue

    chambf = [f.path for f in os.scandir(current_directory) if f.is_dir()]
    chambf = [k for k in chambf if 'Chamb' in k]

    fails = []

    for chambi in tqdm(range(0, len(chambf)), desc=meta.replicate[i] + ', Pos ' + str(meta.pos[i]).zfill(2)):
        inputs_folder = chambf[chambi]
        outputs_folder = os.path.join(inputs_folder, "seg_sd2")
        os.makedirs(outputs_folder, exist_ok=True)
        for file in Path(outputs_folder).glob('*tif'):
            os.remove(file)

        images = sorted(Path(inputs_folder).glob('*Ch1*tif'))
        if flims:
            images_fl = sorted(Path(inputs_folder).glob('*Ch2*tif'))
            if n_channel > 2:
                images_fl2 = sorted(Path(inputs_folder).glob('*Ch3*tif'))

        max_frame = meta.loc[i, 'MaxFr']
        frame_list = range(len(images))

        # ----> Create a DataFrame for this chamber
        chamber_frames_df = None

        # derive pos and chamber numbers robustly from folder names (no +1)
        # If you want 1-based numbering, adjust here, but do so consciously.
        pos_number = extract_trailing_int_from_basename(current_directory)
        chamb_number = extract_trailing_int_from_basename(inputs_folder)

        for frame_index in frame_list:
            try:
                if flims:
                    fluorescence_image = imread(images_fl[frame_index])
                    if n_channel > 2:
                        fluorescence_image2 = imread(images_fl2[frame_index])

                main_image = imread(images[frame_index])
                normalized_image = normalize(main_image, 1, 99.8, axis=axis_norm)
                labels, details = model.predict_instances(normalized_image, verbose=0)
                filename_segmentation = os.path.join(outputs_folder, os.path.basename(images[frame_index]))
                imwrite(filename_segmentation, labels, append=False, metadata=None)

                region_props = regionprops_table(labels, intensity_image=fluorescence_image if flims else None, properties=prop_list)
                if flims and n_channel > 2:
                    region_props = add_prefix(region_props, 'fluor1')
                    region_props2 = regionprops_table(labels, intensity_image=fluorescence_image2, properties=prop_list)
                    region_props2 = add_prefix(region_props2, 'fluor2')
                    for key, value in region_props2.items():
                        if 'intensity' in key:
                            region_props[key] = value

                region_props_df = pd.DataFrame(region_props)

                # Insert columns with correct values (no erroneous +1)
                region_props_df.insert(0, 'frame', frame_index + 1)  # keep frames 1-based if desired
                # use pos_number and chamb_number extracted from folder names
                region_props_df.insert(0, 'pos', pos_number if pos_number is not None else meta.pos[i])
                region_props_df.insert(0, 'replicate', meta.replicate[i])
                # insert chamber after replicate and pos to keep a similar layout as before
                region_props_df.insert(2, 'chamber', chamb_number if chamb_number is not None else os.path.basename(inputs_folder))
                # use the actual chamber folder as the folder column (more precise)
                region_props_df['folder'] = inputs_folder

                if chamber_frames_df is None:
                    chamber_frames_df = region_props_df
                else:
                    chamber_frames_df = pd.concat([chamber_frames_df, region_props_df], ignore_index=True)

            except Exception as e:
                fails.append(f"Error processing folder {current_directory}, Chamber {inputs_folder}, Frame {frame_index}: {e}")

        # ----> Save the DataFrame for this chamber after all frames are processed
        if chamber_frames_df is not None:
            # format csv filename using the extracted integers (no +1)
            if pos_number is None:
                pos_str = str(int(meta.pos[i])).zfill(2)
            else:
                pos_str = str(int(pos_number)).zfill(2)
            if chamb_number is None:
                # fallback: extract last two chars then zfill
                chamb_str = os.path.basename(inputs_folder)[-2:].zfill(2)
            else:
                chamb_str = str(int(chamb_number)).zfill(2)

            csv_filename = f"Pos{pos_str}Chamb{chamb_str}.csv"
            chamber_frames_df.to_csv(os.path.join(save_directory, csv_filename), index=False)

    # ----> Update metadata as before
    meta = pd.read_csv(os.path.join(masterdir, metacsv), dtype={'stardist': str})
    meta.at[i, 'stardist'] = 'Done'
    if fails:
        meta.at[i, 'stardist_fails'] = '; '.join(fails)
    meta.to_csv(os.path.join(masterdir, metacsv), index=False)
# --- end processing ---

r07, Pos 01:   6%|████▍                                                                 | 3/48 [01:10<17:25, 23.23s/it]

### DONE