## Plot example images of "chosen OR" cells from every image 

In [1]:
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from skimage.measure import regionprops
from scipy import stats
from napari_czifile2 import napari_get_reader
from skimage import io
import os 
import pandas as pd
pd.set_option('display.max_columns', None)
import warnings
warnings.filterwarnings("ignore")

In [2]:
raw_data_dirs = os.listdir('../../../RNA-FISH-raw-data/')
# Get directories that contains 488, 546 or 647 
raw_data_dirs = [x for x in raw_data_dirs if 'P14' in x]
raw_data_dirs

['20250508 P14 T 9E89-b1-546 9E99-B2-647 DAPI',
 '20250307 B1 P14 U34-B3-546 Chymotrypsin-B2-647 DAPI',
 '20250328 1 P14 T79-intergenic-b2-647 T79-exonic-b1-546 DAPI',
 '20250325 6 P14 9E198-B3-488 9E197-B1-546 9E196-B2-647 DAPI',
 '20250328 5 P14 LOC603-b3-488 9E108-b1-546 9E116-b2-647 DAPI',
 '20250508 P14 T 9E99-b2-594 LOC105286072-B5-647 DAPI',
 '20250328 2 P14 R2-b3-488 Q1-b1-546 Lnc6-b2-647 DAPI',
 '20250325 5 P14 G1-b1-546 lnc7-B2-647 DAPI',
 '20250523 C P14 T U54-647 Lnc4-546 DAPI',
 '20250523 A P14 T 9E213-546 9E214-488 Intergenic-647 DAPI',
 '20250325 4 P14 U34-B3-488 lnc4-B1-546 U21-B5-647 DAPI',
 '20250328 3 P14 Lnc3-b3-488 L16-b2-594 Lnc2-b5-647 DAPI',
 '20250701 A P14 9E118-EXONS-B1-546 9E118-INTRONS-B2-647 9E129-B3-488 DAPI',
 '20250328 4 P14 9E129-b3-488 LOC104-b1-546 9E116-b2-647 DAPI',
 '20250701 F P14 U34-b3-546 Orco-b4-647 DAPI',
 '20250523 B P14 T 9E88-546 Lnc-647 DAPI']

In [3]:
def plot_2_channels(cell, c1, c2, c1_name, c2_name, c1_color, c2_color, 
                    image, metadata, all_rois, file_name, output_dir, 
                    pixel_bounds=100, bbox_expansion=5, 
                    dapi_max=255, c1_max=255, c2_max=255): 
    
    # Get scale for scale bar 
    x_scale_5 = 5 / metadata['scale'][2] # 5 um in pixels 

    def set_min_max(channel, min_val, max_val):
        if min_val == max_val:
            return np.zeros_like(channel)  # If min == max, return all zeros
        clipped = np.clip(channel, min_val, max_val)
        rescaled = (clipped - min_val) / (max_val - min_val) * 255
        return np.clip(rescaled, 0, 255)  # Ensure the values remain in the [0, 255] range

    # Remove -border from c1 and c2 names
    c1 = c1.split("-")[0]
    c2 = c2.split("-")[0]

    channels = [(i, metadata['name'][i]) for i in range(len(metadata['name']))]
    channels = [(i, channel) for i, channel in channels if any([name in channel for name in ["DAPI", c1, c2]])]


    # Retrieve image and z slice 
    x_bounds = int(cell['centroid_x']) - pixel_bounds, int(cell['centroid_x']) + pixel_bounds 
    y_bounds = int(cell['centroid_y']) - pixel_bounds, int(cell['centroid_y']) + pixel_bounds 
    # if any bounds are out of the image dimensions, return 
    if (y_bounds[0] < 0 or y_bounds[1] >= image.shape[2] or
        x_bounds[0] < 0 or x_bounds[1] >= image.shape[3]):
        print(f"Skipping cell {cell['z_id']} in image {file_name} due to out-of-bounds coordinates.")
        return
    # Get the cell image slice
    cell_image = image[cell['z'], :, y_bounds[0]:y_bounds[1], x_bounds[0]:x_bounds[1]] 
    
    # Create composite RGB image
    rgb_image = np.zeros((cell_image.shape[1], cell_image.shape[2], 3), dtype=float)  # (H, W, 3)

    # -- DAPI (Grey): add to all channels
    dapi = [idx for idx, channel in channels if 'DAPI' in channel][0]
    dapi_adjusted = set_min_max(cell_image[dapi], 0, dapi_max)
    rgb_image[..., 0] += dapi_adjusted  # Red
    rgb_image[..., 1] += dapi_adjusted  # Green
    rgb_image[..., 2] += dapi_adjusted  # Blue

    # C1 
    c1_index = [idx for idx, channel in channels if c1 in channel][0]
    c1_adjusted = set_min_max(cell_image[c1_index], 0, c1_max) 
    if c1_color == "magenta": 
        rgb_image[..., 0] += c1_adjusted  
        rgb_image[..., 2] += c1_adjusted  
    elif c1_color == "cyan": 
        rgb_image[..., 1] += c1_adjusted
        rgb_image[..., 2] += c1_adjusted
    elif c1_color == "yellow": 
        rgb_image[..., 0] += c1_adjusted
        rgb_image[..., 1] += c1_adjusted

    # -- Cyan (c2): add to green and blue
    c2_index = [idx for idx, channel in channels if c2 in channel][0]
    c2_adjusted = set_min_max(cell_image[c2_index], 0, c2_max)
    if c2_color == "magenta": 
        rgb_image[..., 0] += c2_adjusted  
        rgb_image[..., 2] += c2_adjusted  
    elif c2_color == "cyan": 
        rgb_image[..., 1] += c2_adjusted
        rgb_image[..., 2] += c2_adjusted
    elif c2_color == "yellow": 
        rgb_image[..., 0] += c2_adjusted
        rgb_image[..., 1] += c2_adjusted

    # Normalize composite image to [0,1]
    rgb_image = np.clip(rgb_image/255, 0, 1)

    # Load ROIs and compute bbox (adjusted relative to the cell image)
    regions = regionprops(all_rois[cell['z'], :, :])
    region_labels = [region.label for region in regions]
    region = regions[region_labels.index(cell['z_id'])]
    bbox = region.bbox
    # Expand bbox by bbox_expansion pixels     
    bbox = (bbox[0] - bbox_expansion, bbox[1] - bbox_expansion, 
            bbox[2] + bbox_expansion, bbox[3] + bbox_expansion)
    # Adjust bbox to cell_image coordinates
    bbox = (bbox[0] - y_bounds[0], bbox[1] - x_bounds[0],
            bbox[2] - y_bounds[0], bbox[3] - x_bounds[0])
    # For clarity, unpack bbox (top, left, bottom, right)
    y1, x1, y2, x2 = bbox

    # Create figure with gridspec: main image in left column (spanning all rows) 
    # and three zoom panels in the right column (each taking 1/3 height)
    # Adjust fig size and width ratios as needed.
    fig_width = 1.0   # total width (main image + zoom panels)
    fig_height = 0.75  # height of the main image (zoom panels share this height)
    # Don't show the figure in the notebook, just save it
    plt.ioff()  # Turn off interactive mode
    f = plt.figure(figsize=(fig_width, fig_height), dpi=400) 
    gs = gridspec.GridSpec(nrows=3, ncols=2, width_ratios=[4, 1])
    ax_main = f.add_subplot(gs[:, 0])
    ax_zoom_gray = f.add_subplot(gs[0, 1])
    ax_zoom_magenta = f.add_subplot(gs[1, 1])
    ax_zoom_cyan = f.add_subplot(gs[2, 1])

    # Plot main composite image
    ax_main.imshow(rgb_image)
    ax_main.axis("off")
    # Add text legends on main image
    ax_main.text(5, 25, "DAPI", color='white', fontsize=6, weight='regular')
    ax_main.text(5, 45, c1_name, color=c1_color, fontsize=6, weight='regular')
    ax_main.text(5, 65, c2_name, color=c2_color, fontsize=6, weight='regular')
    # Add scale bar on main image
    ax_main.plot([2*pixel_bounds-20-x_scale_5, 2*pixel_bounds-20], 
                [2*pixel_bounds-20, 2*pixel_bounds-20], color='white', lw=1)
    
    ax_main.add_patch(plt.Rectangle((x1, y1), x2-x1, y2-y1, fill=False, edgecolor='white', lw=1))

    # ----- Create zoom panels -----
    # Zoom for Grey (DAPI)
    zoom_dapi = dapi_adjusted[y1:y2, x1:x2]
    # Build RGB image: all channels get the DAPI intensity
    dapi_zoom = np.zeros((zoom_dapi.shape[0], zoom_dapi.shape[1], 3), dtype=float)
    dapi_zoom[..., 0] = zoom_dapi
    dapi_zoom[..., 1] = zoom_dapi
    dapi_zoom[..., 2] = zoom_dapi
    dapi_zoom = np.clip(dapi_zoom/255, 0, 1)
    ax_zoom_gray.imshow(dapi_zoom)
    ax_zoom_gray.axis("off")
    ax_zoom_gray.add_patch(plt.Rectangle((0, 0), zoom_dapi.shape[1]-1, zoom_dapi.shape[0]-1, fill=False, edgecolor='grey', lw=1))

    # Zoom for Magenta (c1)
    zoom_c1 = c1_adjusted[y1:y2, x1:x2]
    # Build an RGB image: red and blue channels from c1
    zoom_c1_rgb = np.zeros((zoom_c1.shape[0], zoom_c1.shape[1], 3), dtype=float)
    if c1_color == "magenta": 
        zoom_c1_rgb[..., 0] += zoom_c1  
        zoom_c1_rgb[..., 2] += zoom_c1  
    elif c1_color == "cyan": 
        zoom_c1_rgb[..., 1] += zoom_c1
        zoom_c1_rgb[..., 2] += zoom_c1
    elif c1_color == "yellow": 
        zoom_c1_rgb[..., 0] += zoom_c1
        zoom_c1_rgb[..., 1] += zoom_c1
    zoom_c1_rgb = np.clip(zoom_c1_rgb/255, 0, 1)
    ax_zoom_magenta.imshow(zoom_c1_rgb)
    ax_zoom_magenta.axis("off")
    ax_zoom_magenta.add_patch(plt.Rectangle((0, 0), zoom_c1.shape[1]-1, zoom_c1.shape[0]-1, fill=False, edgecolor=c1_color, lw=1))

    # Zoom for Cyan (c2)
    zoom_c2 = c2_adjusted[y1:y2, x1:x2]
    # Build an RGB image: green and blue channels from c2
    zoom_c2_rgb = np.zeros((zoom_c2.shape[0], zoom_c2.shape[1], 3), dtype=float)
    if c2_color == "magenta": 
        zoom_c2_rgb[..., 0] += zoom_c2  
        zoom_c2_rgb[..., 2] += zoom_c2  
    elif c2_color == "cyan": 
        zoom_c2_rgb[..., 1] += zoom_c2
        zoom_c2_rgb[..., 2] += zoom_c2
    elif c2_color == "yellow": 
        zoom_c2_rgb[..., 0] += zoom_c2
        zoom_c2_rgb[..., 1] += zoom_c2
    zoom_c2_rgb = np.clip(zoom_c2_rgb/255, 0, 1)
    ax_zoom_cyan.imshow(zoom_c2_rgb)
    ax_zoom_cyan.axis("off")
    ax_zoom_cyan.add_patch(plt.Rectangle((0, 0), zoom_c2.shape[1]-1, zoom_c2.shape[0]-1, fill=False, edgecolor=c2_color, lw=1))

    plt.tight_layout(pad=0)
    plt.savefig(f"{output_dir}/{file_name.replace('.czi', '')}-z-{cell['z']}-z_id-{cell['z_id']}.png", format="png", pad_inches=0, bbox_inches='tight', transparent=False)
    plt.close(f)  # Close the figure to free memory


In [4]:
def plot_3_channels(cell, c1, c2, c3, c1_name, c2_name, c3_name, c1_color, c2_color, c3_color, 
                    image, metadata, all_rois, file_name, output_dir, 
                    pixel_bounds=100, bbox_expansion=5, 
                    dapi_max=255, c1_max=255, c2_max=255, c3_max=255): 

    # Get scale for scale bar 
    x_scale_5 = 5 / metadata['scale'][2] # 5 um in pixels 

    def set_min_max(channel, min_val, max_val):
        if min_val == max_val:
            return np.zeros_like(channel)  # If min == max, return all zeros
        clipped = np.clip(channel, min_val, max_val)
        rescaled = (clipped - min_val) / (max_val - min_val) * 255
        return np.clip(rescaled, 0, 255)  # Ensure the values remain in the [0, 255] range

    # Remove -border from c1 and c2 names
    c1 = c1.split("-")[0]
    c2 = c2.split("-")[0]
    c3 = c3.split("-")[0]

    channels = [(i, metadata['name'][i]) for i in range(len(metadata['name']))]
    channels = [(i, channel) for i, channel in channels if any([name in channel for name in ["DAPI", c1, c2, c3]])]

    # Retrieve image and z slice 
    x_bounds = int(cell['centroid_x']) - pixel_bounds, int(cell['centroid_x']) + pixel_bounds 
    y_bounds = int(cell['centroid_y']) - pixel_bounds, int(cell['centroid_y']) + pixel_bounds 
    # if any bounds are out of the image dimensions, return 
    if (y_bounds[0] < 0 or y_bounds[1] >= image.shape[2] or
        x_bounds[0] < 0 or x_bounds[1] >= image.shape[3]):
        print(f"Skipping cell {cell['z_id']} in image {file_name} due to out-of-bounds coordinates.")
        return
    
    # Get the cell image slice
    cell_image = image[cell['z'], :, y_bounds[0]:y_bounds[1], x_bounds[0]:x_bounds[1]]  
    
    # Create composite RGB image
    rgb_image = np.zeros((cell_image.shape[1], cell_image.shape[2], 3), dtype=float)  # (H, W, 3)

    # -- DAPI (Grey): add to all channels
    dapi = [idx for idx, channel in channels if 'DAPI' in channel][0]
    dapi_adjusted = set_min_max(cell_image[dapi], 0, dapi_max)
    rgb_image[..., 0] += dapi_adjusted  # Red
    rgb_image[..., 1] += dapi_adjusted  # Green
    rgb_image[..., 2] += dapi_adjusted  # Blue

    # C1 
    c1_index = [idx for idx, channel in channels if c1 in channel][0]
    c1_adjusted = set_min_max(cell_image[c1_index], 0, c1_max) 
    if c1_color == "magenta": 
        rgb_image[..., 0] += c1_adjusted  
        rgb_image[..., 2] += c1_adjusted  
    elif c1_color == "cyan": 
        rgb_image[..., 1] += c1_adjusted
        rgb_image[..., 2] += c1_adjusted
    elif c1_color == "yellow": 
        rgb_image[..., 0] += c1_adjusted
        rgb_image[..., 1] += c1_adjusted

    # C2 
    c2_index = [idx for idx, channel in channels if c2 in channel][0]
    c2_adjusted = set_min_max(cell_image[c2_index], 0, c2_max)
    if c2_color == "magenta": 
        rgb_image[..., 0] += c2_adjusted  
        rgb_image[..., 2] += c2_adjusted  
    elif c2_color == "cyan": 
        rgb_image[..., 1] += c2_adjusted
        rgb_image[..., 2] += c2_adjusted
    elif c2_color == "yellow": 
        rgb_image[..., 0] += c2_adjusted
        rgb_image[..., 1] += c2_adjusted

    # C3
    c3_index = [idx for idx, channel in channels if c3 in channel][0]
    c3_adjusted = set_min_max(cell_image[c3_index], 0, c3_max)
    if c3_color == "magenta": 
        rgb_image[..., 0] += c3_adjusted  
        rgb_image[..., 2] += c3_adjusted  
    elif c3_color == "cyan": 
        rgb_image[..., 1] += c3_adjusted
        rgb_image[..., 2] += c3_adjusted
    elif c3_color == "yellow": 
        rgb_image[..., 0] += c3_adjusted
        rgb_image[..., 1] += c3_adjusted

    # Normalize composite image to [0,1]
    rgb_image = np.clip(rgb_image/255, 0, 1)

    # Load ROIs and compute bbox (adjusted relative to the cell image)
    regions = regionprops(all_rois[cell['z'], :, :])
    region_labels = [region.label for region in regions]
    region = regions[region_labels.index(cell['z_id'])]
    bbox = region.bbox
    # Expand bbox by bbox_expansion pixels     
    bbox = (bbox[0] - bbox_expansion, bbox[1] - bbox_expansion, 
            bbox[2] + bbox_expansion, bbox[3] + bbox_expansion)
    # Adjust bbox to cell_image coordinates
    bbox = (bbox[0] - y_bounds[0], bbox[1] - x_bounds[0],
            bbox[2] - y_bounds[0], bbox[3] - x_bounds[0])
    # For clarity, unpack bbox (top, left, bottom, right)
    y1, x1, y2, x2 = bbox

    # Create figure with gridspec: main image in left column (spanning all rows) 
    # and three zoom panels in the right column (each taking 1/3 height)
    # Adjust fig size and width ratios as needed.
    fig_width = 1.0   # total width (main image + zoom panels)
    fig_height = 0.75  # height of the main image (zoom panels share this height)
    plt.ioff()  # Turn off interactive mode
    f = plt.figure(figsize=(fig_width, fig_height), dpi=400)
    gs = gridspec.GridSpec(nrows=4, ncols=2, width_ratios=[4, 1])
    ax_main = f.add_subplot(gs[:, 0])
    ax_zoom_gray = f.add_subplot(gs[0, 1])
    ax_zoom_magenta = f.add_subplot(gs[1, 1])
    ax_zoom_cyan = f.add_subplot(gs[2, 1])
    ax_zoom_yellow = f.add_subplot(gs[3, 1])

    # Plot main composite image
    ax_main.imshow(rgb_image)
    ax_main.axis("off")
    # Add text legends on main image
    ax_main.text(5, 25, "DAPI", color='white', fontsize=6, weight='regular')
    ax_main.text(5, 45, c1_name, color=c1_color, fontsize=6, weight='regular')
    ax_main.text(5, 65, c2_name, color=c2_color, fontsize=6, weight='regular')
    ax_main.text(5, 85, c3_name, color=c3_color, fontsize=6, weight='regular')
    # Add scale bar on main image
    ax_main.plot([2*pixel_bounds-20-x_scale_5, 2*pixel_bounds-20], 
                [2*pixel_bounds-20, 2*pixel_bounds-20], color='white', lw=1)
    
    ax_main.add_patch(plt.Rectangle((x1, y1), x2-x1, y2-y1, fill=False, edgecolor='white', lw=1))

    # ----- Create zoom panels -----
    # Zoom for Grey (DAPI)
    zoom_dapi = dapi_adjusted[y1:y2, x1:x2]
    # Build RGB image: all channels get the DAPI intensity
    dapi_zoom = np.zeros((zoom_dapi.shape[0], zoom_dapi.shape[1], 3), dtype=float)
    dapi_zoom[..., 0] = zoom_dapi
    dapi_zoom[..., 1] = zoom_dapi
    dapi_zoom[..., 2] = zoom_dapi
    dapi_zoom = np.clip(dapi_zoom/255, 0, 1)
    ax_zoom_gray.imshow(dapi_zoom)
    ax_zoom_gray.axis("off")
    ax_zoom_gray.add_patch(plt.Rectangle((0, 0), zoom_dapi.shape[1]-1, zoom_dapi.shape[0]-1, fill=False, edgecolor='grey', lw=1))
    
    # Zoom for Magenta (c1)
    zoom_c1 = c1_adjusted[y1:y2, x1:x2]
    # Build an RGB image: red and blue channels from c1
    zoom_c1_rgb = np.zeros((zoom_c1.shape[0], zoom_c1.shape[1], 3), dtype=float)
    if c1_color == "magenta": 
        zoom_c1_rgb[..., 0] += zoom_c1  
        zoom_c1_rgb[..., 2] += zoom_c1  
    elif c1_color == "cyan": 
        zoom_c1_rgb[..., 1] += zoom_c1
        zoom_c1_rgb[..., 2] += zoom_c1
    elif c1_color == "yellow": 
        zoom_c1_rgb[..., 0] += zoom_c1
        zoom_c1_rgb[..., 1] += zoom_c1
    zoom_c1_rgb = np.clip(zoom_c1_rgb/255, 0, 1)
    ax_zoom_magenta.imshow(zoom_c1_rgb)
    ax_zoom_magenta.axis("off")
    ax_zoom_magenta.add_patch(plt.Rectangle((0, 0), zoom_c1.shape[1]-1, zoom_c1.shape[0]-1, fill=False, edgecolor=c1_color, lw=1))

    # Zoom for Cyan (c2)
    zoom_c2 = c2_adjusted[y1:y2, x1:x2]
    # Build an RGB image: green and blue channels from c2
    zoom_c2_rgb = np.zeros((zoom_c2.shape[0], zoom_c2.shape[1], 3), dtype=float)
    if c2_color == "magenta": 
        zoom_c2_rgb[..., 0] += zoom_c2  
        zoom_c2_rgb[..., 2] += zoom_c2  
    elif c2_color == "cyan": 
        zoom_c2_rgb[..., 1] += zoom_c2
        zoom_c2_rgb[..., 2] += zoom_c2
    elif c2_color == "yellow": 
        zoom_c2_rgb[..., 0] += zoom_c2
        zoom_c2_rgb[..., 1] += zoom_c2
    zoom_c2_rgb = np.clip(zoom_c2_rgb/255, 0, 1)
    ax_zoom_cyan.imshow(zoom_c2_rgb)
    ax_zoom_cyan.axis("off")
    ax_zoom_cyan.add_patch(plt.Rectangle((0, 0), zoom_c2.shape[1]-1, zoom_c2.shape[0]-1, fill=False, edgecolor=c2_color, lw=1))

    # Zoom for Cyan (c2)
    zoom_c3 = c3_adjusted[y1:y2, x1:x2]
    # Build an RGB image: green and blue channels from c2
    zoom_c3_rgb = np.zeros((zoom_c3.shape[0], zoom_c3.shape[1], 3), dtype=float)
    if c3_color == "magenta": 
        zoom_c3_rgb[..., 0] += zoom_c3  
        zoom_c3_rgb[..., 2] += zoom_c3  
    elif c3_color == "cyan": 
        zoom_c3_rgb[..., 1] += zoom_c3
        zoom_c3_rgb[..., 2] += zoom_c3
    elif c3_color == "yellow": 
        zoom_c3_rgb[..., 0] += zoom_c3
        zoom_c3_rgb[..., 1] += zoom_c3
    zoom_c3_rgb = np.clip(zoom_c3_rgb/255, 0, 1)
    ax_zoom_yellow.imshow(zoom_c3_rgb)
    ax_zoom_yellow.axis("off")
    ax_zoom_yellow.add_patch(plt.Rectangle((0, 0), zoom_c3.shape[1]-1, zoom_c3.shape[0]-1, fill=False, edgecolor=c3_color, lw=1))

    plt.tight_layout(pad=0)
    plt.savefig(f"{output_dir}/{file_name.replace('.czi', '')}-z-{cell['z']}-z_id-{cell['z_id']}.png", format="png", pad_inches=0, bbox_inches='tight', transparent=False)
    plt.close(f)  # Close the figure to free memory

In [5]:
coexpression_nuc_norm_threshold = 0.1
cyto_norm_threshold = 0.2 

## Run for one folder

In [46]:
input = 'LOC603-b3-488'
input = [d for d in raw_data_dirs if input in d][0]
print(f'Using {input} as input directory')
input_dir = f'../../../RNA-FISH-raw-data/{input}/'
assert os.path.exists(input_dir), 'Input directory does not exist'
czi_files = [f for f in os.listdir(input_dir) if f.endswith('.czi')]
print(f"Found {len(czi_files)} czi files in {input_dir}")
print(czi_files)

Using 20250328 5 P14 LOC603-b3-488 9E108-b1-546 9E116-b2-647 DAPI as input directory
Found 6 czi files in ../../../RNA-FISH-raw-data/20250328 5 P14 LOC603-b3-488 9E108-b1-546 9E116-b2-647 DAPI/
['20250328 5 ZP sample 4.czi', '20250328 5 ZP sample 3.czi', '20250328 5 ZP sample 2.czi', '20250328 5 ZP sample 5.czi', '20250328 5 ZP sample 6.czi', '20250328 5 ZP sample 1.czi']


In [47]:
figure_dir = f'../example-cells/{input}'
os.makedirs(f"../example-cells/{input}", exist_ok=True)
# Clear the directory if it already exists
for f in os.listdir(figure_dir):
    # clear all files and subdirectories and subsubdirectories
    for root, dirs, files in os.walk(figure_dir, topdown=False):
        for name in files:
            os.remove(os.path.join(root, name))
        for name in dirs:
            os.rmdir(os.path.join(root, name))

In [48]:
if "9E99-b2-594 LOC105286072-B5-647" in input:
    channels = ["AF594", "AF647"]
    channel_names = ["9E99", "LOC105286072"]
elif "U34-b3-546 Orco-b4-647" in input:
    channels = ["AF546", "AF647"]
    channel_names = ["U34", "Orco"]
elif "9E197" in input:
    channels = ['AF546', 'AF488', 'AF647']
    channel_names = ["9E197", "9E198", "9E196"]
elif "9E89-b1-546 9E99-B2-647" in input: 
    channels = ['AF546', 'AF647']
    channel_names = ["9E89", "9E99"]
elif "Chymotrypsin" in input: 
    channels = ["AF546", "AF647"]
    channel_names = ["U34", "Chymotrypsin"]
elif "LOC603-b3-488 9E108-b1-546 9E116-b2-647" in input: 
    channels = ['AF546', 'AF488', 'AF647']
    channel_names = ["9E108", "LOC105282603", "9E116"]
elif "R2-b3-488" in input: 
    channels = ['AF488', 'AF546', 'AF647']
    channel_names = ["R2/3", "Q1", "PR"]
elif "G1-b1-546 " in input: 
    channels = ["AF546", "AF647"]
    channel_names = ["G1", "LOC113562161"]
elif "U54-647 Lnc4-546" in input: 
    channels = ["AF546", "AF647"]
    channel_names = ["PR", "U54"]
elif "9E213-546 9E214-488 Intergenic-647" in input: 
    channels = ["AF546", "AF488", "AF647"]
    channel_names = ["9E213", "9E214", "IPR"]
elif "U34-B3-488 lnc4-B1-546 U21-B5-647" in input: 
    channels = ["AF488", "AF546", "AF647"]
    channel_names = ["U34", "PR", "U21"]
elif "Lnc3-b3-488 L16-b2-594 Lnc2-b5-647" in input: 
    channels = ["AF488", "AF594", "AF647"]
    channel_names = ["PR2", "L16", "PR"]
elif "9E118-EXONS-B1-546 9E118-INTRONS-B2-647 9E129-B3-488" in input: 
    channels = ["AF546", "AF647", "AF488"]
    channel_names = ["9E118 Exons", "9E118 Introns", "9E129"]
elif "9E129-b3-488 LOC104-b1-546 9E116-b2-647" in input: 
    channels = ["AF488", "AF546", "AF647"]
    channel_names = ["9E129", "Kr-h1", "9E116"]
elif "9E88-546 Lnc-647" in input: 
    channels = ["AF546", "AF647"]
    channel_names = ["9E88", "PR"]
elif "T79-intergenic-b2-647 T79-exonic-b1-546 DAPI" in input: 
    channels = ["AF546", "AF647"]
    channel_names = ["T79 Exons", "T79 Intergenics"]
else: 
    raise ValueError(f"Input {input} not recognized. Please check the input directory name.")

In [None]:
results_files = os.listdir(f'../../../RNA-FISH-raw-data/{input}')
for file_number in range(len(results_files)):
    file_name = results_files[file_number]
    print(f"Processing file {file_number+1}/{len(results_files)}: {file_name}")

    image = results_files[file_number].replace(".csv", ".czi")
    image_path = f'../../../RNA-FISH-raw-data/{input}/{image}'
    reader = napari_get_reader(image_path)
    if reader is not None:
        layer_data = reader(image_path)
        image_data, metadata, layer_type = layer_data[0]
        print(f"Loaded {image_path}")
        print("Metadata:", metadata)
        print("Image shape:", image_data.shape)  

    # Squeeze the image data to remove singleton dimensions
    image_data = image_data.squeeze()

    # Load ROIs 
    all_rois_path = f'../results/{input}/{image.replace(".czi", "_rois.npy")}'
    all_rois = np.load(all_rois_path)
    print(f"Loaded {all_rois_path}")

    # Load the results 
    results_dir = f'../results/{input}'
    results_path = os.path.join(results_dir, 'all_results_nuclei_cytoplasm_markers.csv')
    results = pd.read_csv(results_path)

    # Load melted results 
    results_melted_path = os.path.join(results_dir, 'all_results_melted.csv')
    results_melted = pd.read_csv(results_melted_path, dtype={'channel_name': str})

    # Subset results for the current image 
    results_melted = results_melted[results_melted['image'] == image].reset_index(drop=True)

    # Number of cells to sample from each category
    n_cells = 5 

    for cytoplasmic_channel in results_melted['cytoplasmic_channel'].unique():
        for channel in results_melted['channel'].unique():
            # Skip if the cytoplasmic channel is the same as the channel
            if cytoplasmic_channel == channel:
                continue

            # Get the results for this combination of channels
            results_subset = results_melted[(results_melted['cytoplasmic_channel'] == cytoplasmic_channel) & 
                                            (results_melted['channel'] == channel)]
            if results_subset.empty:
                print(f"No results found for cytoplasmic channel {cytoplasmic_channel} and channel {channel}. Skipping.")
                continue

            # Get name of the cytoplasmic channel and directory
            cytoplasmic_channel_name = results_subset['chosen_OR_name'].unique()[0]
            cytoplasmic_dir = os.path.join(figure_dir, cytoplasmic_channel_name.replace(" ", "-"))
            os.makedirs(cytoplasmic_dir, exist_ok=True)

            # Get name of the channel and directory
            channel_name = results_subset['channel_name'].unique()[0]
            channel_nuc_dir = os.path.join(cytoplasmic_dir, f"{channel_name.replace('/','-')}-nuclear")
            channel_cyto_dir = os.path.join(cytoplasmic_dir, f"{channel_name.replace('/','-')}-cytoplasmic")
            os.makedirs(channel_nuc_dir, exist_ok=True)
            os.makedirs(channel_cyto_dir, exist_ok=True)
            
            # Assess whether the cells are nuclear or cytoplasmic 
            results_subset['nuclear'] = results_subset['nuc-norm'] > coexpression_nuc_norm_threshold
            results_subset['cytoplasmic'] = (results_subset['cyto-norm'] > cyto_norm_threshold) & (results_subset['nuc-norm'] > coexpression_nuc_norm_threshold)

            # Sample n cells from each category 
            nuclear_cells = results_subset[results_subset['nuclear']].sample(n=min(n_cells, len(results_subset[results_subset['nuclear']])), random_state=0).reset_index(drop=True)
            cytoplasmic_cells = results_subset[results_subset['cytoplasmic']].sample(n=min(n_cells, len(results_subset[results_subset['cytoplasmic']])), random_state=0).reset_index(drop=True)
            
            # Export nuclear cells
            for i, cell in nuclear_cells.iterrows():
                print(f"Processing nuclear cell {i+1}/{len(nuclear_cells)}: {cell['z_id']} in image {image}")
                results_cell = results[(results['image'] == cell['image']) & 
                                    (results['z'] == cell['z']) & 
                                    (results['z_id'] == cell['z_id'])].iloc[0]
                cell['centroid_x'] = results_cell['centroid_x']
                cell['centroid_y'] = results_cell['centroid_y']
                if len(channels) == 2:
                    plot_2_channels(cell, channels[0], channels[1], channel_names[0], channel_names[1], 
                                    c1_color='magenta', c2_color='cyan', 
                                    image=image_data, metadata=metadata, all_rois=all_rois, 
                                    file_name=image, output_dir=channel_nuc_dir, 
                                    pixel_bounds=100, bbox_expansion=5, dapi_max=150, c1_max=150, c2_max=150)
                elif len(channels) == 3:
                    plot_3_channels(cell, channels[0], channels[1], channels[2], channel_names[0], channel_names[1], channel_names[2],
                                    c1_color='magenta', c2_color='cyan', c3_color='yellow', 
                                    image=image_data, metadata=metadata, all_rois=all_rois, 
                                    file_name=image, output_dir=channel_nuc_dir, 
                                    pixel_bounds=100, bbox_expansion=5, dapi_max=150, c1_max=150, c2_max=150, c3_max=150)
                    
            # Export cytoplasmic cells
            for i, cell in cytoplasmic_cells.iterrows():
                print(f"Processing cytoplasmic cell {i+1}/{len(cytoplasmic_cells)}: {cell['z_id']} in image {image}")
                results_cell = results[(results['image'] == cell['image']) & 
                                    (results['z'] == cell['z']) & 
                                    (results['z_id'] == cell['z_id'])].iloc[0]
                cell['centroid_x'] = results_cell['centroid_x']
                cell['centroid_y'] = results_cell['centroid_y']
                if len(channels) == 2:
                    plot_2_channels(cell, channels[0], channels[1], channel_names[0], channel_names[1], 
                                    c1_color='magenta', c2_color='cyan', 
                                    image=image_data, metadata=metadata, all_rois=all_rois, 
                                    file_name=image, output_dir=channel_cyto_dir, 
                                    pixel_bounds=100, bbox_expansion=5, dapi_max=150, c1_max=150, c2_max=150)
                elif len(channels) == 3:
                    plot_3_channels(cell, channels[0], channels[1], channels[2], channel_names[0], channel_names[1], channel_names[2],
                                    c1_color='magenta', c2_color='cyan', c3_color='yellow', 
                                    image=image_data, metadata=metadata, all_rois=all_rois, 
                                    file_name=image, output_dir=channel_cyto_dir, 
                                    pixel_bounds=100, bbox_expansion=5, dapi_max=150, c1_max=150, c2_max=150, c3_max=150)

Processing file 1/6: 20250328 5 ZP sample 4.czi
Loaded ../../../RNA-FISH-raw-data/20250328 5 P14 LOC603-b3-488 9E108-b1-546 9E116-b2-647 DAPI/20250328 5 ZP sample 4.czi
Metadata: {'rgb': False, 'channel_axis': 2, 'translate': (0.0, 0.0, 0.0, 0.0), 'scale': (1.0, 1.0, 0.0974884033203125, 0.0974884033203125), 'contrast_limits': None, 'name': ['AF546-T1', 'AF488-T2', 'DAPI-T3', 'AF647-T3']}
Image shape: (1, 85, 4, 2048, 2048)
Loaded ../results/20250328 5 P14 LOC603-b3-488 9E108-b1-546 9E116-b2-647 DAPI/20250328 5 ZP sample 4_rois.npy
Processing nuclear cell 1/5: 327 in image 20250328 5 ZP sample 4.czi
Processing nuclear cell 2/5: 358 in image 20250328 5 ZP sample 4.czi
Processing nuclear cell 3/5: 459 in image 20250328 5 ZP sample 4.czi
Processing nuclear cell 4/5: 254 in image 20250328 5 ZP sample 4.czi
Processing nuclear cell 5/5: 96 in image 20250328 5 ZP sample 4.czi
Processing cytoplasmic cell 1/5: 457 in image 20250328 5 ZP sample 4.czi
Processing cytoplasmic cell 2/5: 108 in image 

## Run for all images 

In [6]:
raw_data_dirs = os.listdir('../../../RNA-FISH-raw-data/')
raw_data_p14 = [d for d in raw_data_dirs if 'P14' in d]
raw_data_p14

['20250508 P14 T 9E89-b1-546 9E99-B2-647 DAPI',
 '20250307 B1 P14 U34-B3-546 Chymotrypsin-B2-647 DAPI',
 '20250328 1 P14 T79-intergenic-b2-647 T79-exonic-b1-546 DAPI',
 '20250325 6 P14 9E198-B3-488 9E197-B1-546 9E196-B2-647 DAPI',
 '20250328 5 P14 LOC603-b3-488 9E108-b1-546 9E116-b2-647 DAPI',
 '20250508 P14 T 9E99-b2-594 LOC105286072-B5-647 DAPI',
 '20250328 2 P14 R2-b3-488 Q1-b1-546 Lnc6-b2-647 DAPI',
 '20250325 5 P14 G1-b1-546 lnc7-B2-647 DAPI',
 '20250523 C P14 T U54-647 Lnc4-546 DAPI',
 '20250523 A P14 T 9E213-546 9E214-488 Intergenic-647 DAPI',
 '20250325 4 P14 U34-B3-488 lnc4-B1-546 U21-B5-647 DAPI',
 '20250328 3 P14 Lnc3-b3-488 L16-b2-594 Lnc2-b5-647 DAPI',
 '20250701 A P14 9E118-EXONS-B1-546 9E118-INTRONS-B2-647 9E129-B3-488 DAPI',
 '20250328 4 P14 9E129-b3-488 LOC104-b1-546 9E116-b2-647 DAPI',
 '20250701 F P14 U34-b3-546 Orco-b4-647 DAPI',
 '20250523 B P14 T 9E88-546 Lnc-647 DAPI']

In [8]:
for input in raw_data_p14: 

    print(f'Using {input} as input directory')
    input = [d for d in raw_data_dirs if input in d][0]
    print(f'Using {input} as input directory')
    input_dir = f'../../../RNA-FISH-raw-data/{input}/'
    assert os.path.exists(input_dir), 'Input directory does not exist'
    czi_files = [f for f in os.listdir(input_dir) if f.endswith('.czi')]
    print(f"Found {len(czi_files)} czi files in {input_dir}")
    print(czi_files)

    figure_dir = f'../example-cells/{input}'
    os.makedirs(f"../example-cells/{input}", exist_ok=True)
    # Clear the directory if it already exists
    for f in os.listdir(figure_dir):
        # clear all files and subdirectories and subsubdirectories
        for root, dirs, files in os.walk(figure_dir, topdown=False):
            for name in files:
                os.remove(os.path.join(root, name))
            for name in dirs:
                os.rmdir(os.path.join(root, name))

    if "9E99-b2-594 LOC105286072-B5-647" in input:
        channels = ["AF594", "AF647"]
        channel_names = ["9E99", "LOC105286072"]
    elif "U34-b3-546 Orco-b4-647" in input:
        channels = ["AF546", "AF647"]
        channel_names = ["U34", "Orco"]
    elif "9E197" in input:
        channels = ['AF546', 'AF488', 'AF647']
        channel_names = ["9E197", "9E198", "9E196"]
    elif "9E89-b1-546 9E99-B2-647" in input: 
        channels = ['AF546', 'AF647']
        channel_names = ["9E89", "9E99"]
    elif "Chymotrypsin" in input: 
        channels = ["AF546", "AF647"]
        channel_names = ["U34", "Chymotrypsin"]
    elif "LOC603-b3-488 9E108-b1-546 9E116-b2-647" in input: 
        channels = ['AF546', 'AF488', 'AF647']
        channel_names = ["9E108", "LOC105282603", "9E116"]
    elif "R2-b3-488" in input: 
        channels = ['AF488', 'AF546', 'AF647']
        channel_names = ["R2/3", "Q1", "PR"]
    elif "G1-b1-546 " in input: 
        channels = ["AF546", "AF647"]
        channel_names = ["G1", "LOC113562161"]
    elif "U54-647 Lnc4-546" in input: 
        channels = ["AF546", "AF647"]
        channel_names = ["PR", "U54"]
    elif "9E213-546 9E214-488 Intergenic-647" in input: 
        channels = ["AF546", "AF488", "AF647"]
        channel_names = ["9E213", "9E214", "IPR"]
    elif "U34-B3-488 lnc4-B1-546 U21-B5-647" in input: 
        channels = ["AF488", "AF546", "AF647"]
        channel_names = ["U34", "PR", "U21"]
    elif "Lnc3-b3-488 L16-b2-594 Lnc2-b5-647" in input: 
        channels = ["AF488", "AF594", "AF647"]
        channel_names = ["PR2", "L16", "PR"]
    elif "9E118-EXONS-B1-546 9E118-INTRONS-B2-647 9E129-B3-488" in input: 
        channels = ["AF546", "AF647", "AF488"]
        channel_names = ["9E118 Exons", "9E118 Introns", "9E129"]
    elif "9E129-b3-488 LOC104-b1-546 9E116-b2-647" in input: 
        channels = ["AF488", "AF546", "AF647"]
        channel_names = ["9E129", "Kr-h1", "9E116"]
    elif "9E88-546 Lnc-647" in input: 
        channels = ["AF546", "AF647"]
        channel_names = ["9E88", "PR"]
    elif "T79-intergenic-b2-647 T79-exonic-b1-546 DAPI" in input: 
        channels = ["AF546", "AF647"]
        channel_names = ["T79 Exons", "T79 Intergenics"]
    else: 
        raise ValueError(f"Input {input} not recognized. Please check the input directory name.")

    results_files = os.listdir(f'../../../RNA-FISH-raw-data/{input}')
    for file_number in range(len(results_files)):
        file_name = results_files[file_number]
        print(f"Processing file {file_number+1}/{len(results_files)}: {file_name}")

        # If "BAD" is in the file name, skip it
        if "BAD" in file_name:
            print(f"Skipping file {file_name} because it contains 'BAD'")
            continue

        image = results_files[file_number].replace(".csv", ".czi")
        image_path = f'../../../RNA-FISH-raw-data/{input}/{image}'
        reader = napari_get_reader(image_path)
        if reader is not None:
            layer_data = reader(image_path)
            image_data, metadata, layer_type = layer_data[0]
            print(f"Loaded {image_path}")
            print("Metadata:", metadata)
            print("Image shape:", image_data.shape)  

        # Squeeze the image data to remove singleton dimensions
        image_data = image_data.squeeze()

        # Load ROIs 
        all_rois_path = f'../results/{input}/{image.replace(".czi", "_rois.npy")}'
        all_rois = np.load(all_rois_path)
        print(f"Loaded {all_rois_path}")

        # Load the results 
        results_dir = f'../results/{input}'
        results_path = os.path.join(results_dir, 'all_results_nuclei_cytoplasm_markers.csv')
        results = pd.read_csv(results_path)

        # Load melted results 
        results_melted_path = os.path.join(results_dir, 'all_results_melted.csv')
        results_melted = pd.read_csv(results_melted_path, dtype={'channel_name': str})

        # Subset results for the current image 
        results_melted = results_melted[results_melted['image'] == image].reset_index(drop=True)

        # Number of cells to sample from each category
        n_cells = 10 

        for cytoplasmic_channel in results_melted['cytoplasmic_channel'].unique():
            for channel in results_melted['channel'].unique():
                # Skip if the cytoplasmic channel is the same as the channel
                if cytoplasmic_channel == channel:
                    continue

                # Get the results for this combination of channels
                results_subset = results_melted[(results_melted['cytoplasmic_channel'] == cytoplasmic_channel) & 
                                                (results_melted['channel'] == channel)]
                if results_subset.empty:
                    print(f"No results found for cytoplasmic channel {cytoplasmic_channel} and channel {channel}. Skipping.")
                    continue

                # Get name of the cytoplasmic channel and directory
                cytoplasmic_channel_name = results_subset['chosen_OR_name'].unique()[0]
                cytoplasmic_dir = os.path.join(figure_dir, cytoplasmic_channel_name.replace(" ", "-"))
                os.makedirs(cytoplasmic_dir, exist_ok=True)

                # Get name of the channel and directory
                channel_name = results_subset['channel_name'].unique()[0]
                channel_nuc_dir = os.path.join(cytoplasmic_dir, f"{channel_name.replace('/','-')}-nuclear")
                channel_cyto_dir = os.path.join(cytoplasmic_dir, f"{channel_name.replace('/','-')}-cytoplasmic")
                os.makedirs(channel_nuc_dir, exist_ok=True)
                os.makedirs(channel_cyto_dir, exist_ok=True)
                
                # Assess whether the cells are nuclear or cytoplasmic 
                results_subset['nuclear'] = results_subset['nuc-norm'] > coexpression_nuc_norm_threshold
                results_subset['cytoplasmic'] = (results_subset['cyto-norm'] > cyto_norm_threshold) & (results_subset['nuc-norm'] > coexpression_nuc_norm_threshold)

                # Sample n cells from each category 
                nuclear_cells = results_subset[results_subset['nuclear']].sample(n=min(n_cells, len(results_subset[results_subset['nuclear']])), random_state=0).reset_index(drop=True)
                cytoplasmic_cells = results_subset[results_subset['cytoplasmic']].sample(n=min(n_cells, len(results_subset[results_subset['cytoplasmic']])), random_state=0).reset_index(drop=True)
                
                # Export nuclear cells
                for i, cell in nuclear_cells.iterrows():
                    print(f"Processing nuclear cell {i+1}/{len(nuclear_cells)}: {cell['z_id']} in image {image}")
                    results_cell = results[(results['image'] == cell['image']) & 
                                        (results['z'] == cell['z']) & 
                                        (results['z_id'] == cell['z_id'])].iloc[0]
                    cell['centroid_x'] = results_cell['centroid_x']
                    cell['centroid_y'] = results_cell['centroid_y']
                    if len(channels) == 2:
                        plot_2_channels(cell, channels[0], channels[1], channel_names[0], channel_names[1], 
                                        c1_color='magenta', c2_color='cyan', 
                                        image=image_data, metadata=metadata, all_rois=all_rois, 
                                        file_name=image, output_dir=channel_nuc_dir, 
                                        pixel_bounds=100, bbox_expansion=5, dapi_max=150, c1_max=150, c2_max=150)
                    elif len(channels) == 3:
                        plot_3_channels(cell, channels[0], channels[1], channels[2], channel_names[0], channel_names[1], channel_names[2],
                                        c1_color='magenta', c2_color='cyan', c3_color='yellow', 
                                        image=image_data, metadata=metadata, all_rois=all_rois, 
                                        file_name=image, output_dir=channel_nuc_dir, 
                                        pixel_bounds=100, bbox_expansion=5, dapi_max=150, c1_max=150, c2_max=150, c3_max=150)
                        
                # Export cytoplasmic cells
                for i, cell in cytoplasmic_cells.iterrows():
                    print(f"Processing cytoplasmic cell {i+1}/{len(cytoplasmic_cells)}: {cell['z_id']} in image {image}")
                    results_cell = results[(results['image'] == cell['image']) & 
                                        (results['z'] == cell['z']) & 
                                        (results['z_id'] == cell['z_id'])].iloc[0]
                    cell['centroid_x'] = results_cell['centroid_x']
                    cell['centroid_y'] = results_cell['centroid_y']
                    if len(channels) == 2:
                        plot_2_channels(cell, channels[0], channels[1], channel_names[0], channel_names[1], 
                                        c1_color='magenta', c2_color='cyan', 
                                        image=image_data, metadata=metadata, all_rois=all_rois, 
                                        file_name=image, output_dir=channel_cyto_dir, 
                                        pixel_bounds=100, bbox_expansion=5, dapi_max=150, c1_max=150, c2_max=150)
                    elif len(channels) == 3:
                        plot_3_channels(cell, channels[0], channels[1], channels[2], channel_names[0], channel_names[1], channel_names[2],
                                        c1_color='magenta', c2_color='cyan', c3_color='yellow', 
                                        image=image_data, metadata=metadata, all_rois=all_rois, 
                                        file_name=image, output_dir=channel_cyto_dir, 
                                        pixel_bounds=100, bbox_expansion=5, dapi_max=150, c1_max=150, c2_max=150, c3_max=150)

Using 20250508 P14 T 9E89-b1-546 9E99-B2-647 DAPI as input directory
Using 20250508 P14 T 9E89-b1-546 9E99-B2-647 DAPI as input directory
Found 6 czi files in ../../../RNA-FISH-raw-data/20250508 P14 T 9E89-b1-546 9E99-B2-647 DAPI/
['20250508 9e89 9e99 sample 6.czi', '20250508 9e89 9e99 sample 1.czi', '20250508 9e89 9e99 sample 3 BAD.czi', '20250508 9e89 9e99 sample 4.czi', '20250508 9e89 9e99 sample 2.czi', '20250508 9e89 9e99 sample 5.czi']
Processing file 1/6: 20250508 9e89 9e99 sample 6.czi
Loaded ../../../RNA-FISH-raw-data/20250508 P14 T 9E89-b1-546 9E99-B2-647 DAPI/20250508 9e89 9e99 sample 6.czi
Metadata: {'rgb': False, 'channel_axis': 2, 'translate': (0.0, 0.0, 0.0, 0.0), 'scale': (1.0, 1.0, 0.0974884033203125, 0.0974884033203125), 'contrast_limits': None, 'name': ['AF546-T1', 'DAPI-T2', 'AF647-T2']}
Image shape: (1, 124, 3, 2048, 2048)
Loaded ../results/20250508 P14 T 9E89-b1-546 9E99-B2-647 DAPI/20250508 9e89 9e99 sample 6_rois.npy
Processing nuclear cell 1/10: 139 in image 20