In [1]:
import numpy as np
import tifffile
from scipy import ndimage
from skimage.filters import gaussian
from skimage.segmentation import watershed
from skimage.morphology import disk, dilation, erosion
import os
import glob
import re
from google.colab import drive
from tqdm.notebook import tqdm

# Mount Google Drive
drive.mount('/content/drive')

# Define input and output paths
cadherin_dir = '/content/drive/MyDrive/knowledge/University/Master/Thesis/Projected/Static-A-1/Cadherins'
fused_dir = '/content/drive/MyDrive/knowledge/University/Master/Thesis/Segmented/Static-A-1/Fused'
output_dir = '/content/drive/MyDrive/knowledge/University/Master/Thesis/Segmented/Static-A-1/Cell'

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Find all Cadherin .tif files - adjust pattern to match actual filenames
cadherin_files = glob.glob(os.path.join(cadherin_dir, '*Cadherins_contrast_bg_tophat.tif'))
print(f"Found {len(cadherin_files)} Cadherin files to process")

# Function to extract the sequence prefix from filenames
def extract_sequence_prefix(filename):
    # The pattern matches the sequence identifier (e.g., denoised_0Pa_A1_19dec21_40x_L2RA_FlatA_seq005)
    match = re.match(r"(denoised_.*?seq\d+)", os.path.basename(filename))
    if match:
        return match.group(1)
    return None

# Recursively search for all fused cell mask files (including in subfolders)
fused_files = []
for root, dirs, files in os.walk(fused_dir):
    for file in files:
        if file.endswith('_segmented_cells.tif'):
            fused_files.append(os.path.join(root, file))

print(f"Found {len(fused_files)} fused cell mask files")

# Create a dictionary mapping sequence prefixes to fused file paths
fused_prefix_to_file = {}
for fused_file in fused_files:
    prefix = extract_sequence_prefix(fused_file)
    if prefix:
        fused_prefix_to_file[prefix] = fused_file
        print(f"Mapped prefix '{prefix}' to file: {os.path.basename(fused_file)}")

# Process each Cadherin file
for cadherin_file in tqdm(cadherin_files):
    # Get base filename
    filename = os.path.basename(cadherin_file)

    # Extract sequence prefix from the cadherin filename
    prefix = extract_sequence_prefix(cadherin_file)
    if not prefix:
        print(f"WARNING: Could not extract sequence prefix from {filename}, skipping")
        continue

    # Find corresponding fused cell mask file
    if prefix not in fused_prefix_to_file:
        print(f"WARNING: No matching fused cell mask found for {prefix}, skipping")
        continue

    fused_mask_file = fused_prefix_to_file[prefix]
    base_name = prefix

    print(f"\nProcessing: {filename}")
    print(f"Using fused cell mask: {os.path.basename(fused_mask_file)}")

    try:
        # Load the images
        cadherin_img = tifffile.imread(cadherin_file)
        fused_cell_masks = tifffile.imread(fused_mask_file)

        print(f"  Cadherin image shape: {cadherin_img.shape}")
        print(f"  Fused cell mask shape: {fused_cell_masks.shape}")

        # Extract Cadherin channel if needed
        if len(cadherin_img.shape) == 2:
            # Single channel image (already Cadherin)
            print("  Detected single-channel Cadherin image")
            membrane_channel = cadherin_img
        elif len(cadherin_img.shape) == 3 and cadherin_img.shape[0] == 3:
            # Format is (C, H, W)
            print("  Detected format: (C, H, W)")
            membrane_channel = cadherin_img[0]  # First channel
        elif len(cadherin_img.shape) == 3 and cadherin_img.shape[2] == 3:
            # Format is (H, W, C)
            print("  Detected format: (H, W, C)")
            membrane_channel = cadherin_img[:, :, 0]  # First channel
        else:
            print(f"  Unexpected image shape: {cadherin_img.shape}. Using first channel/plane.")
            if len(cadherin_img.shape) == 3:
                membrane_channel = cadherin_img[0] if cadherin_img.shape[0] < cadherin_img.shape[1] else cadherin_img[:, :, 0]
            else:
                membrane_channel = cadherin_img

        # Apply Gaussian blur to reduce noise
        print("  Applying Gaussian blur...")
        membrane_smoothed = gaussian(membrane_channel, sigma=1)

        # Calculate morphological gradient
        print("  Calculating morphological gradient...")
        selem = disk(1)  # Adjust radius if needed
        dilated = dilation(membrane_smoothed, selem)
        eroded = erosion(membrane_smoothed, selem)
        membrane_gradient = dilated - eroded

        # Normalize gradient to 0-1 range
        membrane_gradient_norm = (membrane_gradient - membrane_gradient.min()) / (
                    membrane_gradient.max() - membrane_gradient.min())

        # Apply watershed segmentation using fused cell masks as seeds
        print("  Performing watershed segmentation...")
        watershed_output = watershed(membrane_gradient_norm, fused_cell_masks, mask=membrane_gradient_norm > 0)

        # Save the cell mask result
        cell_mask_path = os.path.join(output_dir, f"{base_name}_cell_mask.tif")
        tifffile.imwrite(cell_mask_path, watershed_output.astype(np.uint32))
        print(f"  Saved cell mask to {cell_mask_path}")

        # Print statistics
        print(f"  Number of fused cell seeds: {len(np.unique(fused_cell_masks)) - 1}")
        print(f"  Number of segmented cells: {len(np.unique(watershed_output)) - 1}")
        print(f"  Processing complete for {filename}")

    except Exception as e:
        print(f"ERROR processing {filename}: {str(e)}")
        continue

print("All processing complete!")

Mounted at /content/drive
Found 16 Cadherin files to process
Found 16 fused cell mask files
Mapped prefix 'denoised_0Pa_A1_19dec21_20xA_L2RA_FlatA_seq011' to file: denoised_0Pa_A1_19dec21_20xA_L2RA_FlatA_seq011_segmented_cells.tif
Mapped prefix 'denoised_0Pa_A1_19dec21_40x_L2RA_FlatA_seq005' to file: denoised_0Pa_A1_19dec21_40x_L2RA_FlatA_seq005_segmented_cells.tif
Mapped prefix 'denoised_0Pa_A1_19dec21_20xA_L2RA_FlatA_seq007' to file: denoised_0Pa_A1_19dec21_20xA_L2RA_FlatA_seq007_segmented_cells.tif
Mapped prefix 'denoised_0Pa_A1_19dec21_40x_L2RA_FlatA_seq002' to file: denoised_0Pa_A1_19dec21_40x_L2RA_FlatA_seq002_segmented_cells.tif
Mapped prefix 'denoised_0Pa_A1_19dec21_20xA_L2RA_FlatA_seq006' to file: denoised_0Pa_A1_19dec21_20xA_L2RA_FlatA_seq006_segmented_cells.tif
Mapped prefix 'denoised_0Pa_A1_19dec21_20xA_L2RA_FlatA_seq009' to file: denoised_0Pa_A1_19dec21_20xA_L2RA_FlatA_seq009_segmented_cells.tif
Mapped prefix 'denoised_0Pa_A1_19dec21_40x_L2RA_FlatA_seq004' to file: denoise

  0%|          | 0/16 [00:00<?, ?it/s]


Processing: denoised_0Pa_A1_19dec21_20xA_L2RA_FlatA_seq001_Cadherins_contrast_bg_tophat.tif
Using fused cell mask: denoised_0Pa_A1_19dec21_20xA_L2RA_FlatA_seq001_segmented_cells.tif
  Cadherin image shape: (1024, 1024)
  Fused cell mask shape: (1024, 1024)
  Detected single-channel Cadherin image
  Applying Gaussian blur...
  Calculating morphological gradient...
  Performing watershed segmentation...
  Saved cell mask to /content/drive/MyDrive/knowledge/University/Master/Thesis/Segmented/Static-A-1/Cell/denoised_0Pa_A1_19dec21_20xA_L2RA_FlatA_seq001_cell_mask.tif
  Number of fused cell seeds: 320
  Number of segmented cells: 320
  Processing complete for denoised_0Pa_A1_19dec21_20xA_L2RA_FlatA_seq001_Cadherins_contrast_bg_tophat.tif

Processing: denoised_0Pa_A1_19dec21_20xA_L2RA_FlatA_seq002_Cadherins_contrast_bg_tophat.tif
Using fused cell mask: denoised_0Pa_A1_19dec21_20xA_L2RA_FlatA_seq002_segmented_cells.tif
  Cadherin image shape: (1024, 1024)
  Fused cell mask shape: (1024, 102