In [1]:
%load_ext autoreload
%autoreload 2

In [32]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import cv2
import sys
import os

os.environ['NOVA_HOME'] = '/home/projects/hornsteinlab/Collaboration/NOVA'

sys.path.insert(1, os.getenv('NOVA_HOME'))
print(f"NOVA_HOME: {os.getenv('NOVA_HOME')}")

from src.preprocessing.preprocessing_utils import rescale_intensity, fit_image_shape

%matplotlib inline

NOVA_HOME: /home/projects/hornsteinlab/Collaboration/NOVA


# Raw

In [None]:
# You may change this block
batch = "batch1"
root_path = f"/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/NOVA_d18_neurons_sorted/{batch}/"
n_images = 3

In [2]:
# UTILS

def plot_images(root_folder, n_sites=3):
    for foldername, _, filenames in os.walk(root_folder):
        image_count = 0
        images = []
        names = []
        for filename in filenames:
            if filename.endswith('.tiff') or filename.endswith('.tif'):
                # if 'DMSO' not in filename:
                #     continue
                image_count += 1
                image_path = os.path.join(foldername, filename)
                image = cv2.imread(image_path, cv2.IMREAD_ANYDEPTH)
                image = fit_image_shape(image, (1024, 1024))
                rescale_image = rescale_intensity(image)
                print(rescale_image.shape)
                images.append(rescale_image)
                names.append(filename)
                if image_count == n_sites:
                    break
        if image_count == n_sites:
            plt.figure(figsize=(20, 5))
            plt.suptitle(f'{foldername}')
            for i in range(n_sites):
                plt.subplot(1, n_sites, i + 1)
                plt.imshow(images[i], cmap='gray')
                plt.axis('off')
            plt.show()


In [None]:
plot_images(root_path, n_images)

In [None]:
# You may change this block
batch = "batch2"
root_path = f"/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/NOVA_d18_neurons_sorted/{batch}/"
n_images = 3

In [None]:
plot_images(root_path, n_images)

# Processed

In [33]:
processed_root_path = "/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/AlyssaCoyne/batch1"#"/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/240323_day29_neurons/batch1/"

In [34]:
# UTILS

def plot_images(root_folder, cell_line=None, marker=None, n_tiles_per_marker=10):
    print(f"rootfolder = {root_folder}, cell_line = {cell_line}, marker={marker}, n_tiles_per_marker={n_tiles_per_marker}")
    for foldername, _, filenames in os.walk(root_folder):
        image_count = 0
        images = []
        images_filenames = []
        for filename in filenames:
            if filename.endswith('.npy'):
                image_path = os.path.join(foldername, filename)
                if cell_line is not None and cell_line not in image_path:
                    continue
                if marker is not None and marker != os.path.basename(foldername):
                    continue
                image_count += 1
                image = np.load(image_path)
                tile_indx = np.random.choice(np.arange(len(image)))
                image = image[tile_indx]
                images.append(image)
                images_filenames.append(f"{filename}_{tile_indx}")
                if image_count == n_tiles_per_marker:
                    break
        if image_count == 0:
            continue
        if image_count < n_tiles_per_marker:
            print(f"Found only {image_count} images for {foldername}, skipping...")
            n_tiles_per_marker = image_count
            
        for i in range(n_tiles_per_marker):
            channels = [os.path.basename(foldername), 'nucleus']
            fig, ax = plt.subplots(1, len(channels), figsize=(7,7))
            fig.suptitle(f'{foldername}')
            for j in range(len(channels)):
                ax[j].imshow(images[i][...,j], cmap='gray', vmin=0, vmax=1)
                if j == 1:
                    ax[j].set_title(f"{channels[j]}\n{images_filenames[i]}", y=0, pad=-25)
                else:
                    ax[j].set_title(f"{channels[j]}\n{images_filenames[i]}")
                ax[j].set_axis_off()
        plt.show()


In [None]:
plot_images(processed_root_path, cell_line='sALSNegativeCytoTDP43', marker='Map2', n_tiles_per_marker=20)

In [None]:
processed_root_path_opera = "/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/Opera/batch1/"#KOLF/Untreated/"
plot_images(processed_root_path_opera, cell_line='DMSO', marker='LAMP1', n_tiles_per_marker=40)

### Save chosen tiles as tiffs 

In [5]:
# --- SAVE CHOSEN TILES AS TIFFS (use full .npy paths like ".../file.npy_2") ---
import os
import re
import numpy as np
from tifffile import imwrite

def _scale_to_uint16(img):
    """Convert float [0,1] -> uint16 (ImageJ-friendly). Leave int types as-is."""
    if np.issubdtype(img.dtype, np.floating):
        img = np.nan_to_num(img, nan=0.0, posinf=1.0, neginf=0.0)
        img = np.clip(img, 0.0, 1.0)
        return (img * 65535).round().astype(np.uint16)
    return img

def _save_tile_array_as_tiff(tile_array_hw_c, out_path, imagej=False, force_uint16=True):
    """
    tile_array_hw_c: (H, W, C) with C=2 here.
    imagej=True -> save as planes (C,H,W) ImageJ stack; else interleaved (H,W,C).
    """
    os.makedirs(os.path.dirname(out_path), exist_ok=True)
    arr = tile_array_hw_c
    if force_uint16:
        arr = _scale_to_uint16(arr)

    if imagej:
        imwrite(out_path, np.moveaxis(arr, -1, 0), imagej=True)  # (C,H,W)
    else:
        imwrite(out_path, arr)  # (H,W,C)
    print(f"[saved] {out_path}")

_label_re = re.compile(r"^(?P<path>.*\.npy)_(?P<idx>\d+)$")

def save_tiles_from_fullpaths(
    labels,
    out_dir,
    imagej=False,
    force_uint16=True,
    prefix_parents=2,
):
    """
    Save tiles given labels that include the FULL npy path + tile index, e.g.:
        "/.../NIH/batch2/WT/Untreated/TDP43/sample_001.npy_7"
    - Loads the npy at the exact path; no folder search is done.
    - Expects arrays shaped (N, H, W, 2); saves tile 'idx' to TIFF in out_dir.
    - Output filename includes up to `prefix_parents` parent folders to avoid collisions.
    """
    os.makedirs(out_dir, exist_ok=True)

    for label in labels:
        m = _label_re.match(label)
        if not m:
            print(f"[skip] Bad label format (expect '/full/path/file.npy_X'): {label}")
            continue

        npy_path = m.group("path")
        tile_idx = int(m.group("idx"))

        if not os.path.isfile(npy_path):
            print(f"[miss] Not found: {npy_path}")
            continue

        try:
            arr = np.load(npy_path)  # expect (N, H, W, 2)
        except Exception as e:
            print(f"[error] Failed to load {npy_path}: {e}")
            continue

        if arr.ndim != 4 or arr.shape[-1] != 2:
            print(f"[skip] Unexpected shape for {npy_path}: {arr.shape} (need (N,H,W,2))")
            continue

        if tile_idx < 0 or tile_idx >= arr.shape[0]:
            print(f"[skip] Index {tile_idx} out of range 0..{arr.shape[0]-1} for {npy_path}")
            continue

        tile = arr[tile_idx]  # (H, W, 2)

        base = os.path.basename(npy_path)  # "file.npy"
        # Build a disambiguated name by prefixing last `prefix_parents` directories
        parts = []
        d = os.path.dirname(npy_path)
        for _ in range(max(prefix_parents, 0)):
            head, tail = os.path.split(d)
            if not tail:
                break
            parts.append(tail)
            d = head
        parts = list(reversed(parts))  # from higher to lower level
        out_name = f"{'__'.join(parts + [base])}_{tile_idx}.tif" if parts else f"{base}_{tile_idx}.tif"
        out_path = os.path.join(out_dir, out_name)

        _save_tile_array_as_tiff(tile, out_path, imagej=imagej, force_uint16=force_uint16)

# --- Example usage ---
# tiles_to_save = [
#     "/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/NIH/batch2/WT/Untreated/TDP43/sample_001.npy_7",
#     "/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/NIH/batch2/WT/Treated/TDP43/sample_001.npy_3",
# ]
# save_tiles_from_fullpaths(
#     labels=tiles_to_save,
#     out_dir="/home/projects/hornsteinlab/tiles_out/selected",
#     imagej=False,        # True -> (C,H,W) ImageJ stack
#     force_uint16=True,   # scale [0,1] floats to uint16
#     prefix_parents=3     # include last 3 parent dirs in filename to avoid collisions
# )

In [42]:
processed_root_path = "/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/AlyssaCoyne/batch1"

tiles_to_save = [
    "/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/AlyssaCoyne/batch1/sALSNegativeCytoTDP43/Untreated/Map2/rep1_R11_w2confmCherry_s66_panelA_sALSNegativeCytoTDP43_processed.npy_2"
]

save_tiles_from_fullpaths(
    labels=tiles_to_save,
    out_dir="/home/projects/hornsteinlab/Collaboration/NOVA/manuscript/rep_images/Coyne_Fig5",
    imagej=True,        # True -> (C,H,W) ImageJ stack
    force_uint16=True,   # scale [0,1] floats to uint16
    prefix_parents=3     # include last 3 parent dirs in filename to avoid collisions
)

[saved] /home/projects/hornsteinlab/Collaboration/NOVA/manuscript/rep_images/Coyne_Fig5/sALSNegativeCytoTDP43__Untreated__Map2__rep1_R11_w2confmCherry_s66_panelA_sALSNegativeCytoTDP43_processed.npy_2.tif


#### Read scale from tiff ?

In [24]:
import tifffile as tiff

tif_path = "/home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/AlyssaCoyne/MOmaps_iPSC_patients_TDP43_PB_CoyneLab/batch1/c9orf72ALSPatients/panelA/Untreated/rep1/DCP1A/R11_w2confCy5_s01.tif"

with tiff.TiffFile(tif_path) as tf:
    page = tf.pages[0]
    # Pixels per unit (RATIONAL)
    xnum, xden = page.tags["XResolution"].value
    ynum, yden = page.tags["YResolution"].value
    ppu_x = xnum / xden
    ppu_y = ynum / yden

    # ResolutionUnit: 1=No absolute unit, 2=Inch, 3=Centimeter
    unit = page.tags["ResolutionUnit"].value if "ResolutionUnit" in page.tags else 1

    if unit == 2:
        um_per_px_x = 25400.0 / ppu_x  # µm per inch / pixels per inch
        um_per_px_y = 25400.0 / ppu_y
        unit_name = "inch"
    elif unit == 3:
        um_per_px_x = 10000.0 / ppu_x  # µm per cm / pixels per cm
        um_per_px_y = 10000.0 / ppu_y
        unit_name = "centimeter"
    else:
        um_per_px_x = um_per_px_y = None
        unit_name = "unknown"

    print(f"ResolutionUnit: {unit} ({unit_name})")
    print(f"XResolution: {ppu_x:.3f} px/{unit_name}, YResolution: {ppu_y:.3f} px/{unit_name}")
    print(f"Pixel size: {um_per_px_x} µm/px (X), {um_per_px_y} µm/px (Y)")


ResolutionUnit: 2 (inch)
XResolution: 300.000 px/inch, YResolution: 300.000 px/inch
Pixel size: 84.66666666666667 µm/px (X), 84.66666666666667 µm/px (Y)


In [None]:
"""
for tiles that are downsized from 128x128 to 100x100:
1 pixel = 0.1083x(128/100) = 0.1386 um
"""