In [2]:
#!/usr/bin/env python
import os
import re
import time
import logging
import scanpy as sc
import matplotlib.pyplot as plt
import numpy as np
import cv2
from pathlib import Path
import bin2cell as b2c
import tifffile
import itertools

In [3]:
Sample = ['IMMUNEX002']

base_sample_path = Path("/scratch/IMMUNEX/OUTPUT")
base_he_image_path = Path("/scratch/IMMUNEX/PJ2410310_250214/IMAGE/HE_nanozoomer_tif")
base_output_dir = Path("../../results/intermediate/bin2cell")
mpp = 0.7
gene_filter_min_cells = 1
cell_filter_min_counts = 1
grid_image_sigma = 5

# TIFF loader patch
def patched_load_image(image_path, **kwargs):
    print(f"Loading image via tifffile: {image_path}")
    img = tifffile.imread(image_path)
    if img.ndim == 2:
        img = np.stack([img] * 3, axis=-1)
    elif img.shape[0] == 3 and img.ndim == 3:
        img = np.moveaxis(img, 0, -1)
    return img

# Logging helper
def print_log(message, type="info"):
    print(message)
    if type == "error":
        logging.error(message)
    elif type == "warning":
        logging.warning(message)
    elif type == "success":
        logging.info(message)
    else:
        logging.info(message)


In [9]:
# Match HE images
nanozoomer_tif = {}

for file in base_he_image_path.glob("*.tif"):
    match = re.match(r"(IMMUNEX\d+)(.*)\.tif", file.name)
    if match:
        sample_id, suffix = match.groups()
        nanozoomer_tif[sample_id] = suffix
    
sample_folders = sorted(base_sample_path.glob("Visium_NSCLC_*"))

sample_folders_dict = {
    folder.name.split("Visium_NSCLC_")[1]: folder
    for folder in sample_folders
}


mask_regions = [
    {"x": 250, "y": 250, "w": 150, "h": 150}
]

stardist_he = {
    "stardist_model": "2D_versatile_he",
    "prob_thresh": 0.0001,
    "block_size": 4096,
    "min_overlap": 64,
    "context": 128
}

stardist_gex = {
    "stardist_model": "2D_versatile_fluo",
    "prob_thresh": 0.1,
    "nms_thresh": 0.3,
    "block_size": 4096,
    "min_overlap": 64,
    "context": 128,
    "show_progress": True
}

parameter_sweep = []


In [15]:
sample_folders


[PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX001'),
 PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX002'),
 PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX003'),
 PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX004'),
 PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX005'),
 PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX006'),
 PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX007'),
 PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX008'),
 PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX009'),
 PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX010'),
 PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX011'),
 PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX012'),
 PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX013'),
 PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX014'),
 PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX015'),
 PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNE

In [12]:
config_name = (
    f"he{str(stardist_he['prob_thresh']).replace('.', '')}_"
    f"gex{str(stardist_gex['prob_thresh']).replace('.', '')}_"
    f"nms{str(stardist_gex['nms_thresh']).replace('.', '')}_"
    f"mask{mask_regions[0]['x']}_{mask_regions[0]['y']}_{mask_regions[0]['w']}x{mask_regions[0]['h']}"
)

IMMUNEXID = Sample[0]
# IMMUNEXID = sample_folder.name.split("_")[-1]

print('Processing: ',IMMUNEXID, sample_folder)

start_time_sample = time.time()
step_time = time.time()
path = sample_folder / "outs/binned_outputs/square_002um/"
source_image_path = base_he_image_path / f"{IMMUNEXID}{nanozoomer_tif[IMMUNEXID]}.tif"
output_dir = base_output_dir / f"{IMMUNEXID}__{config_name}"
output_dir.mkdir(parents=True, exist_ok=True)
print(output_dir)
os.getcwd()

NameError: name 'sample_folder' is not defined

In [None]:

log_path = f"{output_dir}/{IMMUNEXID}_{config_name}.log"
logging.basicConfig(filename=log_path, level=logging.INFO,
                    format="%(asctime)s [%(levelname)s] %(message)s")

print_log(f"Starting processing for {IMMUNEXID}")
print_log(f"==== Starting parameter set: {config_name} ====")


images_dir = output_dir / "images"
images_dir.mkdir(exist_ok=True)
he_img_out = images_dir / "he.tiff"
he_seg_out = images_dir / "he.npz"
gex_img_out = images_dir / "gex.tiff"
gex_seg_out = images_dir / "gex.npz"


adata = b2c.read_visium(path, source_image_path=source_image_path)
adata.var_names_make_unique()
print_log(adata)

sc.pp.filter_genes(adata, min_cells=gene_filter_min_cells)
print_log(adata)

sc.pp.filter_cells(adata, min_counts=cell_filter_min_counts)
print_log(adata)

b2c.bin2cell.load_image = patched_load_image
print_log(adata)


In [7]:
b2c.destripe(adata)
print(adata)

b2c.scaled_he_image(adata, mpp=mpp, save_path=he_img_out)
print(adata)


NameError: name 'adata' is not defined

In [None]:
mask_region = mask_regions[0]

mask = (
            (adata.obs['array_row'] >= mask_region["y"]) &
            (adata.obs['array_row'] < mask_region["y"] + mask_region["h"]) &
            (adata.obs['array_col'] >= mask_region["x"]) &
            (adata.obs['array_col'] < mask_region["x"] + mask_region["w"])
        )
bdata = adata[mask]
print(bdata)

sc.set_figure_params(figsize=[10,10], dpi=100)
sc.pl.spatial(bdata, color=[None, "n_counts", "n_counts_adjusted"], img_key=f"{mpp}_mpp_150_buffer", basis="spatial_cropped_150_buffer", cmap='Reds', show=False)
plt.savefig(output_dir / "spatial_destriping.png")
plt.close()
plt.show()


In [None]:
b2c.stardist(str(he_img_out), str(he_seg_out), **stardist_he)
b2c.insert_labels(adata, str(he_seg_out), basis="spatial", spatial_key="spatial_cropped_150_buffer", mpp=mpp, labels_key="labels_he")
print(adata)

In [None]:
b2c.expand_labels(adata, labels_key="labels_he", expanded_labels_key="labels_he_expanded")
print(adata)

In [None]:
crop = b2c.get_crop(adata[mask], basis="spatial", spatial_key="spatial_cropped_150_buffer", mpp=mpp)
rendered = b2c.view_stardist_labels(image_path=he_img_out, labels_npz_path=he_seg_out, crop=crop, alpha_boundary=1, normalize_img=True, alpha=0.1)
plt.imshow(rendered)
plt.axis("off")
plt.tight_layout()
plt.savefig(output_dir / "he_segmentation_overlay.png", dpi=300, bbox_inches='tight', pad_inches=0)
plt.show()
plt.close()

In [None]:
import importlib
importlib.reload(b2c.bin2cell)
img = b2c.grid_image(adata, "n_counts_adjusted", mpp=mpp, sigma=grid_image_sigma)
cv2.imwrite(str(gex_img_out), img)


In [None]:
b2c.stardist(str(gex_img_out), str(gex_seg_out), **stardist_gex)

In [None]:

b2c.insert_labels(adata, str(gex_seg_out), basis="array", mpp=mpp, labels_key="labels_gex")
print(adata)

In [None]:

bdata = adata[mask]
bdata = bdata[bdata.obs['labels_gex'] > 0]
print(bdata)
bdata.obs['labels_gex'] = bdata.obs['labels_gex'].astype(str)
print(bdata)

sc.pl.spatial(bdata, color=[None, "labels_gex"], img_key=f"{mpp}_mpp_150_buffer", basis="spatial_cropped_150_buffer", show=False)
plt.savefig(output_dir / "gex_segmentation_labels_gex_overlay.png", dpi=300, bbox_inches='tight', pad_inches=0)
plt.show()
plt.close()

In [None]:

crop = b2c.get_crop(bdata, basis="array", mpp=mpp)
rendered = b2c.view_labels(image_path=gex_img_out, labels_npz_path=gex_seg_out, crop=crop, stardist_normalize=True)
plt.imshow(rendered)
plt.axis("off")
plt.tight_layout()
plt.savefig(output_dir / "gex_segmentation_overlay_normalized.png", dpi=300, bbox_inches='tight', pad_inches=0)
plt.show()
plt.close()


In [None]:

b2c.salvage_secondary_labels(adata, primary_label="labels_he_expanded", secondary_label="labels_gex", labels_key="labels_joint")

bdata = adata[mask]
bdata = bdata[bdata.obs['labels_joint'] > 0]
bdata.obs['labels_joint'] = bdata.obs['labels_joint'].astype(str)
sc.pl.spatial(bdata, color=[None, "labels_joint_source", "labels_joint"], img_key=f"{mpp}_mpp_150_buffer", basis="spatial_cropped_150_buffer", show=False)
plt.savefig(output_dir / "labels_joint_overlay.png", dpi=300, bbox_inches='tight', pad_inches=0)
plt.show()
plt.close()


In [None]:
ddata

In [None]:
ddata = b2c.bin_to_cell(bdata, labels_key="labels_joint", spatial_keys=["spatial", "spatial_cropped_150_buffer"])
sc.pl.spatial(ddata, color=["bin_count","labels_joint_source"], basis="spatial_cropped_150_buffer", img_key=f"{mpp}_mpp_150_buffer", show=False)
plt.savefig(output_dir / "spatial_cell_density.pdf")
plt.show()
plt.close()

In [None]:
cdata = b2c.bin_to_cell(adata, labels_key="labels_joint", spatial_keys=["spatial", "spatial_cropped_150_buffer"])
sc.pl.spatial(cdata, color=["bin_count",'labels_joint_source'], basis="spatial_cropped_150_buffer", img_key=f"{mpp}_mpp_150_buffer", show=False)
plt.savefig(output_dir / "spatial_cell_density.pdf")
plt.show()
plt.close()

In [None]:

num_bins = adata.n_obs
num_genes = adata.n_vars
total_umis = adata.X.sum()
avg_umis_per_bin = total_umis / num_bins if num_bins > 0 else 0

num_cells = cdata.n_obs
avg_bin_per_cell = np.mean(cdata.obs['bin_count']) if 'bin_count' in cdata.obs else 0

logging.info(f"STATISTICS for {IMMUNEXID}:")
logging.info(f"- Binned data: {num_bins} bins, {num_genes} genes")
logging.info(f"- Total UMIs: {total_umis:.0f}, Avg UMIs per bin: {avg_umis_per_bin:.2f}")
logging.info(f"- Segmented cells: {num_cells}, Avg bins per cell: {avg_bin_per_cell:.2f}")

elapsed = time.time() - start_time_sample
print_log(f"SUCCESS: {IMMUNEXID} processed in {elapsed:.2f}s")