gg-napari-env

In [18]:
from napari_czifile2 import napari_get_reader
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import os 
import sys 
from skimage.measure import regionprops
from concurrent.futures import ThreadPoolExecutor, as_completed

In [19]:
raw_data_dirs = os.listdir('../raw-data/')
raw_data_dirs

['20250307 B1 P14 U34-B3-546 Chymotrypsin-B2-647 DAPI',
 '20250324 A1 stacks',
 '20250328 1 P14 T79-intergenic-b2-647 T79-exonic-b1-546 DAPI',
 '20250325 6 9e198-b3-488 9e197-b1-546 9e196-b2-647 dapi',
 '20250328 5 P14 LOC603-b3-488 9E108-b1-546 9E116-b2-647 DAPI',
 '20250325 5 p14 g1-b1-546 lnc7-b2-647 dapi',
 '20250328 2 P14 R2-b3-488 Q1-b1-546 Lnc6-b2-647 DAPI',
 '20250307 C2 stacks',
 '20250328 4 P14 9E129-b3-488 LOC104-b1-546 9E116-b2-647 dapi',
 '20250306 C1 stacks',
 '20250325 4 p14 u34-b3-488 lnc4-b1-546 u21-b5-647 dapi',
 '20250328 3 P14 Lnc3-b3-488 L16-b2-594 Lnc2-b5-647 DAPI',
 '20250324 A2 stacks']

In [7]:
input = 'T79'
input = [d for d in raw_data_dirs if input in d][0]
print(f'Using {input} as input directory')
input_dir = f'../raw-data/{input}/'
assert os.path.exists(input_dir), 'Input directory does not exist'
czi_files = [f for f in os.listdir(input_dir) if f.endswith('.czi')]
print(f"Found {len(czi_files)} czi files in {input_dir}")
print(czi_files)

Using 20250328 1 P14 T79-intergenic-b2-647 T79-exonic-b1-546 DAPI as input directory
Found 5 czi files in ../raw-data/20250328 1 P14 T79-intergenic-b2-647 T79-exonic-b1-546 DAPI/
['20250328 1 T79 sample 1.czi', '20250328 1 T79 sample 5.czi', '20250328 1 T79 sample 2.czi', '20250328 1 T79 sample 3.czi', '20250328 1 T79 sample 4.czi']


In [8]:
results_dir = f'../results/{input}'
if not os.path.exists(results_dir):
    os.makedirs(results_dir)

In [9]:
def normalize_stack_by_dapi(image, dapi_channel=0, method='mean', eps=1e-8):
    """
    Normalize all non-DAPI channels in a z-stack using DAPI intensity as reference.

    Parameters:
    - image: np.ndarray, shape (Z, C, X, Y)
    - dapi_channel: int, index of the DAPI channel (in the C dimension)
    - method: str, 'mean', 'median', or 'max' for calculating DAPI intensity per z-slice
    - eps: small value to avoid division by zero

    Returns:
    - corrected: np.ndarray, shape (Z, C, X, Y)
    """
    
    z, c, x, y = image.shape
    corrected = np.zeros_like(image, dtype=np.uint8)

    # Extract DAPI channel across z
    dapi_stack = image[:, dapi_channel, :, :]

    # Compute per-slice intensity
    if method == 'mean':
        dapi_intensity = dapi_stack.mean(axis=(1, 2))
    elif method == 'median':
        dapi_intensity = np.median(dapi_stack, axis=(1, 2))
    elif method == 'max':
        dapi_intensity = dapi_stack.max(axis=(1, 2))
    else:
        raise ValueError("method must be 'mean', 'median', or 'max'")

    # Normalize: scale each slice by the ratio of max intensity to current slice intensity
    dapi_ref = np.max(dapi_intensity)
    correction_factors = dapi_ref / (dapi_intensity + eps)

    for z_idx in range(z):
        for ch in range(c):
            if ch == dapi_channel:
                # Leave DAPI channel unchanged
                corrected[z_idx, ch] = image[z_idx, ch]
            else:
                corrected[z_idx, ch] = (image[z_idx, ch] * correction_factors[z_idx])

    # Clip values to ensure they are within valid range for uint8
    corrected = np.clip(corrected, 0, 255).astype(np.uint8)
    
    return corrected

In [10]:
def subtract_background_gaussian_uint8(slice_img, sigma=100):
    
    """
    Subtract background fluorescence from each channel in a single z-slice using Gaussian filtering,
    and return the result as an 8-bit image.

    Parameters
    ----------
    slice_img : numpy.ndarray
        A 3D numpy array of shape (channels, height, width) with dtype=np.uint8.
    sigma : float, optional
        Standard deviation for Gaussian filter to estimate the background.
        Adjust based on the spatial scale of the background variation; default is 100.

    Returns
    -------
    bg_subtracted : numpy.ndarray
        Background-subtracted image with dtype=np.uint8, same shape as slice_img.
    """
    from skimage.filters import gaussian
    # Prepare an output array of the same shape with dtype uint8
    bg_subtracted = np.empty_like(slice_img, dtype=np.uint8)
    
    # Process each channel independently
    for ch in range(slice_img.shape[0]):
        # Estimate the background using a Gaussian filter
        background = gaussian(slice_img[ch], sigma=sigma, preserve_range=True)
        # Subtract the background in float32 to capture potential negative values
        subtracted = slice_img[ch].astype(np.float32) - background.astype(np.float32)
        # Clip the values to ensure they fall within the valid 8-bit range and convert back to uint8
        subtracted_uint8 = np.clip(subtracted, 0, 255).astype(np.uint8)
        bg_subtracted[ch] = subtracted_uint8
    
    return bg_subtracted

In [11]:
def threshold_triangle_uint8(slice_img):
    """
    Apply Triangle threshold (automatic) on each channel in a single z-slice,
    and return the image with only the thresholded signal remaining.

    Parameters
    ----------
    slice_img : numpy.ndarray
        A 3D numpy array of shape (channels, height, width) with dtype=np.uint8.

    Returns
    -------
    thresholded : numpy.ndarray
        Thresholded image with dtype=np.uint8, same shape as slice_img.
        For each channel, pixels with values below the Triangle threshold are set to 0.
    """
    import numpy as np
    from skimage.filters import threshold_triangle 

    # Prepare an output array of the same shape with dtype uint8
    thresholded = np.empty_like(slice_img, dtype=np.uint8)

    # Process each channel independently
    for ch in range(slice_img.shape[0]):
        # Compute the threshold using the Triangle method for the current channel
        thresh = threshold_triangle(slice_img[ch])
        # Create a new channel image keeping only values above (or equal to) the threshold
        thresholded[ch] = np.where(slice_img[ch] >= thresh, slice_img[ch], 0)

    return thresholded


In [12]:
def threshold_triangle_min_area(slice_img, min_area=12):
    """
    Apply Triangle threshold (automatic) on each channel in a single z-slice,
    and return the image with only the thresholded signal remaining,
    filtered by minimum area.

    Parameters
    ----------
    slice_img : numpy.ndarray
        A 3D numpy array of shape (channels, height, width) with dtype=np.uint8.
    min_area : int
        Minimum area (in pixels) for connected components to be kept.

    Returns
    -------
    thresholded : numpy.ndarray
        Thresholded image with dtype=np.uint8, same shape as slice_img.
        For each channel, pixels with values below the Triangle threshold are set to 0.
        Only components with area > min_area are retained.
    """
    import numpy as np
    from skimage.filters import threshold_triangle
    from skimage.measure import label, regionprops
    from skimage.morphology import remove_small_objects

    # Prepare an output array of the same shape with dtype uint8
    thresholded = np.zeros_like(slice_img, dtype=np.uint8)

    # Process each channel independently
    for ch in range(slice_img.shape[0]):
        # Compute the threshold using the Triangle method for the current channel
        thresh = threshold_triangle(slice_img[ch])

        # Threshold image
        binary_mask = slice_img[ch] >= thresh

        # Remove small objects based on min_area
        cleaned_mask = remove_small_objects(binary_mask, min_size=min_area)

        # Apply cleaned mask to original image
        thresholded[ch] = np.where(cleaned_mask, slice_img[ch], 0)

    return thresholded

In [13]:
def subtract_background_rolling_ball(slice_img, radius=50):
    """
    Subtract background from a single z-slice using scikit-image's rolling ball algorithm.
    
    Parameters
    ----------
    slice_img : numpy.ndarray
        A 3D numpy array of shape (channels, height, width). 
        (Or 2D if you only have one channel.)
    radius : float, optional
        The ball radius that defines the scale of the background variation.
    
    Returns
    -------
    bg_subtracted : numpy.ndarray
        Background-subtracted image, same shape as input, with dtype float 
        (or possibly the same dtype if you prefer).
    """
    from skimage.restoration import rolling_ball
    # We’ll store the result in the same dtype as the input unless you prefer float
    bg_subtracted = np.empty_like(slice_img, dtype=np.uint8)

    # If your data is multi-channel, subtract background channel by channel
    for ch in range(slice_img.shape[0]):
        # rolling_ball expects a 2D image, so pass slice_img[ch] directly
        background = rolling_ball(slice_img[ch], radius=radius)
        subtracted = slice_img[ch] - background
        
        # Clip negative values to 0 if you want to avoid negative intensities
        # but keep it float if you'd rather preserve intensities for analysis
        subtracted = np.clip(subtracted, 0, 255).astype(np.uint8)

        bg_subtracted[ch] = subtracted
    
    return bg_subtracted

In [14]:
# Function to process each slice
def quantify_nucleus(z, image_name, image_data, nuclei_rois, channels):
    image_slice = image_data[z, :, :, :]
    nuclei_rois_slice = nuclei_rois[z, :, :]
    nuclei_rois_regions = regionprops(nuclei_rois_slice)

    # Background subtraction 
    image_slice = subtract_background_gaussian_uint8(image_slice, sigma=100)

    # Thresholding
    #image_slice = threshold_triangle_uint8(image_slice)
    image_slice = threshold_triangle_min_area(image_slice, min_area=12)
    
    # Create list to store results for each cell
    cell_nuclei_results = [] 
    for cell_region in nuclei_rois_regions:
        cell_nucleus_result = {} 
        # Save z slice and z_id (cell label)
        cell_nucleus_result['image'] = image_name
        cell_nucleus_result['z'] = z
        cell_nucleus_result['z_id'] = cell_region.label
        # Save cell area and centroid
        cell_nucleus_result['nucleus_area'] = cell_region.area 
        # x and y are flipped in the regionprops output
        cell_nucleus_result['centroid_x'] = cell_region.centroid[1] 
        cell_nucleus_result['centroid_y'] = cell_region.centroid[0]
        for channel, channel_name in channels:
            # Save Mean, Median and sum 
            cell_nucleus_result[channel_name + "-nucleus-mean"] = np.mean(image_slice[channel, nuclei_rois_slice == cell_region.label])
            cell_nucleus_result[channel_name + "-nucleus-median"] = np.median(image_slice[channel, nuclei_rois_slice == cell_region.label])
            cell_nucleus_result[channel_name + "-nucleus-sum"] = np.sum(image_slice[channel, nuclei_rois_slice == cell_region.label])
        cell_nuclei_results.append(cell_nucleus_result)
    
    # Convert the list of results to DataFrame
    cell_nuclei_results = pd.DataFrame(cell_nuclei_results)
        
    return cell_nuclei_results

# Parallelize processing of Z slices
def parallelize_quantify_nuclei(image_data, image_name, all_rois, channels, n_threads):
    # Create a list to collect results for each Z slice
    results_all_z = []
    
    with ThreadPoolExecutor(max_workers=n_threads) as executor:
        # Create a list of futures to process each slice in parallel
        futures = [executor.submit(quantify_nucleus, z, image_name, image_data, all_rois, channels) for z in range(image_data.shape[0])]
        
        # Wait for all futures to complete and collect the results
        for future in as_completed(futures):
            cell_borders_results = future.result()
            results_all_z.append(cell_borders_results)
    
    # Concatenate all results into a single DataFrame
    results_all_z = pd.concat(results_all_z, axis=0, ignore_index=True)
    
    return results_all_z

In [15]:
n_threads = 50 

In [16]:
for f in czi_files[:]:
    print(f"Processing {f}")
    file_path = os.path.join(input_dir, f)
    reader = napari_get_reader(file_path)
    if reader is not None:
        layer_data = reader(file_path)
        image_data, metadata, layer_type = layer_data[0]

        # Remove singleton dimension 
        image_data = np.squeeze(image_data)  
        print("Metadata:", metadata)
        print("Image shape:", image_data.shape)  

        # Load ROIs 
        all_rois_path = f'../results/{input}/{f.replace(".czi", "_rois.npy")}'
        all_rois = np.load(all_rois_path)
        print(f"Loaded {all_rois_path}")

        dapi_index = metadata['name'].index([name for name in metadata['name'] if 'DAPI' in name][0])
        print(f"DAPI channel index: {dapi_index}")

        non_dapi_channels = [(i, metadata['name'][i].split('-T')[0]) for i in range(len(metadata['name'])) if i != dapi_index]
        print(f"Non-DAPI channels: {non_dapi_channels}")

        image_data = normalize_stack_by_dapi(image_data, dapi_channel=dapi_index, method='mean')
        print("Normalized image data using DAPI intensity.")

        # Quantify 
        results_image = parallelize_quantify_nuclei(image_data, f, all_rois, non_dapi_channels, n_threads=n_threads)

        # Sort by image, z, and z_id 
        results_image = results_image.sort_values(by=['image', 'z', 'z_id']).reset_index(drop=True) 

        # Save the results 
        results_image_path = os.path.join(results_dir, f.replace(".czi", "_nuclei.csv"))
        print(f"Saving results to {results_image_path}")
        results_image.to_csv(results_image_path, index=False)

Processing 20250328 1 T79 sample 1.czi
Metadata: {'rgb': False, 'channel_axis': 2, 'translate': (0.0, 0.0, 29259.974395751953, 31219.198837280273), 'scale': (1.0, 1.0, 0.0974884033203125, 0.0974884033203125), 'contrast_limits': None, 'name': ['AF546-T1', 'DAPI-T2', 'AF647-T2']}
Image shape: (79, 3, 2048, 2048)
Loaded ../results/20250328 1 P14 T79-intergenic-b2-647 T79-exonic-b1-546 DAPI/20250328 1 T79 sample 1_rois.npy
DAPI channel index: 1
Non-DAPI channels: [(0, 'AF546'), (2, 'AF647')]
Normalized image data using DAPI intensity.
Saving results to ../results/20250328 1 P14 T79-intergenic-b2-647 T79-exonic-b1-546 DAPI/20250328 1 T79 sample 1_nuclei.csv
Processing 20250328 1 T79 sample 5.czi
Metadata: {'rgb': False, 'channel_axis': 2, 'translate': (0.0, 0.0, 29836.52081298828, 34079.996032714844), 'scale': (1.0, 1.0, 0.0974884033203125, 0.0974884033203125), 'contrast_limits': None, 'name': ['AF546-T1', 'DAPI-T2', 'AF647-T2']}
Image shape: (78, 3, 2048, 2048)
Loaded ../results/20250328 1

## Run on all images

In [20]:
raw_data_dirs = os.listdir('../raw-data/')
raw_data_p14 = [d for d in raw_data_dirs if '14' in d]
raw_data_p14

['20250307 B1 P14 U34-B3-546 Chymotrypsin-B2-647 DAPI',
 '20250328 1 P14 T79-intergenic-b2-647 T79-exonic-b1-546 DAPI',
 '20250328 5 P14 LOC603-b3-488 9E108-b1-546 9E116-b2-647 DAPI',
 '20250325 5 p14 g1-b1-546 lnc7-b2-647 dapi',
 '20250328 2 P14 R2-b3-488 Q1-b1-546 Lnc6-b2-647 DAPI',
 '20250328 4 P14 9E129-b3-488 LOC104-b1-546 9E116-b2-647 dapi',
 '20250325 4 p14 u34-b3-488 lnc4-b1-546 u21-b5-647 dapi',
 '20250328 3 P14 Lnc3-b3-488 L16-b2-594 Lnc2-b5-647 DAPI']

In [21]:
for input in raw_data_p14: 

    input_dir = f'../raw-data/{input}/'
    assert os.path.exists(input_dir), 'Input directory does not exist'
    czi_files = [f for f in os.listdir(input_dir) if f.endswith('.czi')]
    print(f"Found {len(czi_files)} czi files in {input_dir}")
    print(czi_files)

    results_dir = f'../results/{input}'
    if not os.path.exists(results_dir):
        os.makedirs(results_dir)
        
    for f in czi_files[:]:
        print(f"Processing {f}")
        file_path = os.path.join(input_dir, f)
        reader = napari_get_reader(file_path)
        if reader is not None:
            layer_data = reader(file_path)
            image_data, metadata, layer_type = layer_data[0]

            # Remove singleton dimension 
            image_data = np.squeeze(image_data)  
            print("Metadata:", metadata)
            print("Image shape:", image_data.shape)  

            # Load ROIs 
            all_rois_path = f'../results/{input}/{f.replace(".czi", "_rois.npy")}'
            all_rois = np.load(all_rois_path)
            print(f"Loaded {all_rois_path}")

            dapi_index = metadata['name'].index([name for name in metadata['name'] if 'DAPI' in name][0])
            print(f"DAPI channel index: {dapi_index}")

            non_dapi_channels = [(i, metadata['name'][i].split('-T')[0]) for i in range(len(metadata['name'])) if i != dapi_index]
            print(f"Non-DAPI channels: {non_dapi_channels}")

            image_data = normalize_stack_by_dapi(image_data, dapi_channel=dapi_index, method='mean')
            print("Normalized image data using DAPI intensity.")

            # Quantify 
            results_image = parallelize_quantify_nuclei(image_data, f, all_rois, non_dapi_channels, n_threads=n_threads)

            # Sort by image, z, and z_id 
            results_image = results_image.sort_values(by=['image', 'z', 'z_id']).reset_index(drop=True) 

            # Save the results 
            results_image_path = os.path.join(results_dir, f.replace(".czi", "_nuclei.csv"))
            print(f"Saving results to {results_image_path}")
            results_image.to_csv(results_image_path, index=False)

Found 5 czi files in ../raw-data/20250307 B1 P14 U34-B3-546 Chymotrypsin-B2-647 DAPI/
['20250307 B1 Sample 5 Stack.czi', '20250307 B1 Sample 4 Stack.czi', '20250307 B1 Sample 1 Stack.czi', '20250307 B1 Sample 3 Stack.czi', '20250307 B1 Sample 2 Stack.czi']
Processing 20250307 B1 Sample 5 Stack.czi
Metadata: {'rgb': False, 'channel_axis': 2, 'translate': (0.0, 0.0, 0.0, 0.0), 'scale': (1.0, 0.5, 0.0974884033203125, 0.0974884033203125), 'contrast_limits': None, 'name': ['AF546-T1', 'DAPI-T2', 'AF647-T2']}
Image shape: (202, 3, 2048, 2048)
Loaded ../results/20250307 B1 P14 U34-B3-546 Chymotrypsin-B2-647 DAPI/20250307 B1 Sample 5 Stack_rois.npy
DAPI channel index: 1
Non-DAPI channels: [(0, 'AF546'), (2, 'AF647')]
Normalized image data using DAPI intensity.
Saving results to ../results/20250307 B1 P14 U34-B3-546 Chymotrypsin-B2-647 DAPI/20250307 B1 Sample 5 Stack_nuclei.csv
Processing 20250307 B1 Sample 4 Stack.czi
Metadata: {'rgb': False, 'channel_axis': 2, 'translate': (0.0, 0.0, 0.0, 0.0