In [6]:
import os
import re
import time
import logging
from pathlib import Path

import numpy as np
import tifffile
import scanpy as sc
import bin2cell as b2c  # Ensure your environment has `bin2cell` available

# ---------------- CONFIG ---------------- #


base_sample_path = Path("/scratch/IMMUNEX/OUTPUT")
base_he_image_path = Path("/scratch/IMMUNEX/PJ2410310_250214/IMAGE/HE_nanozoomer_tif")
base_output_dir = Path("../../results/intermediate/bin2cell")
mpp = 0.7
gene_filter_min_cells = 1
cell_filter_min_counts = 1
grid_image_sigma = 5

# Extract sample names like IMMUNEX001 from folder names
Samples = sorted([
    folder.name.split("Visium_NSCLC_")[1]
    for folder in base_sample_path.glob("Visium_NSCLC_*")
    if folder.is_dir() and folder.name.startswith("Visium_NSCLC_IMMUNEX")
])

mask_regions = [{"x": 250, "y": 250, "w": 150, "h": 150}]

stardist_he = {
    "stardist_model": "2D_versatile_he",
    "prob_thresh": 0.3,
    "nms_thresh": 0.5,
    "block_size": 4096,
    "min_overlap": 64,
    "context": 128,
    "show_progress": True
}

stardist_gex = {
    "stardist_model": "2D_versatile_fluo",
    "prob_thresh": 0.3,
    "nms_thresh": 0.5,
    "block_size": 4096,
    "min_overlap": 64,
    "context": 128,
    "show_progress": True
}

Samples

['IMMUNEX001',
 'IMMUNEX002',
 'IMMUNEX003',
 'IMMUNEX004',
 'IMMUNEX005',
 'IMMUNEX006',
 'IMMUNEX007',
 'IMMUNEX008',
 'IMMUNEX009',
 'IMMUNEX010',
 'IMMUNEX011',
 'IMMUNEX012',
 'IMMUNEX013',
 'IMMUNEX014',
 'IMMUNEX015',
 'IMMUNEX016',
 'IMMUNEX017',
 'IMMUNEX018']

In [7]:

# -------------- UTILITY ----------------- #
def patched_load_image(image_path, **kwargs):
    print(f"Loading image via tifffile: {image_path}")
    img = tifffile.imread(image_path)
    if img.ndim == 2:
        img = np.stack([img] * 3, axis=-1)
    elif img.shape[0] == 3 and img.ndim == 3:
        img = np.moveaxis(img, 0, -1)
    return img

def print_log(message, type="info"):
    print(message)
    if type == "error":
        logging.error(message)
    elif type == "warning":
        logging.warning(message)
    elif type == "success":
        logging.info(message)
    else:
        logging.info(message)

# -------------- MATCH HE FILES ---------------- #
nanozoomer_tif = {}
for file in base_he_image_path.glob("*.tif"):
    match = re.match(r"(IMMUNEX\d+)(.*)\.tif", file.name)
    if match:
        sample_id, suffix = match.groups()
        nanozoomer_tif[sample_id] = suffix

sample_folders = sorted(base_sample_path.glob("Visium_NSCLC_*"))
sample_folders_dict = {
    folder.name.split("Visium_NSCLC_")[1]: folder
    for folder in sample_folders
}
sample_folders_dict

{'IMMUNEX001': PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX001'),
 'IMMUNEX002': PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX002'),
 'IMMUNEX003': PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX003'),
 'IMMUNEX004': PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX004'),
 'IMMUNEX005': PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX005'),
 'IMMUNEX006': PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX006'),
 'IMMUNEX007': PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX007'),
 'IMMUNEX008': PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX008'),
 'IMMUNEX009': PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX009'),
 'IMMUNEX010': PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX010'),
 'IMMUNEX011': PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX011'),
 'IMMUNEX012': PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX012'),
 'IMMUNEX013': PosixPath('/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX013'),

In [8]:
from tqdm import tqdm
for IMMUNEXID in tqdm(Samples):
    if IMMUNEXID not in sample_folders_dict:
        continue

    sample_folder = sample_folders_dict[IMMUNEXID]
    path = sample_folder / "outs/binned_outputs/square_008um/"
    source_image_path = base_he_image_path / f"{IMMUNEXID}{nanozoomer_tif[IMMUNEXID]}.tif"

    output_dir = base_output_dir
    output_dir.mkdir(parents=True, exist_ok=True)

    try:
        b2c.bin2cell.load_image = patched_load_image

        adata = b2c.read_visium(path, source_image_path=source_image_path)
        adata.var_names_make_unique()

        sc.pp.filter_genes(adata, min_cells=gene_filter_min_cells)
        sc.pp.filter_cells(adata, min_counts=cell_filter_min_counts)

        b2c.destripe(adata)

        adata_out_dir = output_dir / "adata_destriped"
        adata_out_dir.mkdir(exist_ok=True)
        adata.write(adata_out_dir / f"{IMMUNEXID}_destriped.h5ad")
        # sc.write_10x_mtx(adata, path=adata_out_dir, gene_symbols='gene_names')

    except Exception as e:
        print(f"Error processing {IMMUNEXID}: {e}")


  0%|                                                                                                                           | 0/18 [00:00<?, ?it/s]

/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX001/outs/binned_outputs/square_008um/filtered_feature_bc_matrix.h5


anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
  6%|██████▍                                                                                                            | 1/18 [00:06<01:48,  6.37s/it]

/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX002/outs/binned_outputs/square_008um/filtered_feature_bc_matrix.h5


anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
 11%|████████████▊                                                                                                      | 2/18 [00:12<01:36,  6.00s/it]

/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX003/outs/binned_outputs/square_008um/filtered_feature_bc_matrix.h5


anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
 17%|███████████████████▏                                                                                               | 3/18 [00:19<01:39,  6.66s/it]

/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX004/outs/binned_outputs/square_008um/filtered_feature_bc_matrix.h5


anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
 22%|█████████████████████████▌                                                                                         | 4/18 [00:23<01:17,  5.53s/it]

/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX005/outs/binned_outputs/square_008um/filtered_feature_bc_matrix.h5


anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
 28%|███████████████████████████████▉                                                                                   | 5/18 [00:30<01:17,  5.95s/it]

/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX006/outs/binned_outputs/square_008um/filtered_feature_bc_matrix.h5


anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
 33%|██████████████████████████████████████▎                                                                            | 6/18 [00:41<01:33,  7.76s/it]

/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX007/outs/binned_outputs/square_008um/filtered_feature_bc_matrix.h5


anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
 39%|████████████████████████████████████████████▋                                                                      | 7/18 [00:45<01:10,  6.44s/it]

/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX008/outs/binned_outputs/square_008um/filtered_feature_bc_matrix.h5


anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
 44%|███████████████████████████████████████████████████                                                                | 8/18 [00:51<01:04,  6.40s/it]

/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX009/outs/binned_outputs/square_008um/filtered_feature_bc_matrix.h5


anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
 50%|█████████████████████████████████████████████████████████▌                                                         | 9/18 [00:55<00:50,  5.66s/it]

/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX010/outs/binned_outputs/square_008um/filtered_feature_bc_matrix.h5


anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
 56%|███████████████████████████████████████████████████████████████▎                                                  | 10/18 [00:59<00:42,  5.31s/it]

/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX011/outs/binned_outputs/square_008um/filtered_feature_bc_matrix.h5


anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
 61%|█████████████████████████████████████████████████████████████████████▋                                            | 11/18 [01:03<00:34,  4.92s/it]

/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX012/outs/binned_outputs/square_008um/filtered_feature_bc_matrix.h5


anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
 67%|████████████████████████████████████████████████████████████████████████████                                      | 12/18 [01:09<00:30,  5.02s/it]

/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX013/outs/binned_outputs/square_008um/filtered_feature_bc_matrix.h5


anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
 72%|██████████████████████████████████████████████████████████████████████████████████▎                               | 13/18 [01:12<00:22,  4.45s/it]

/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX014/outs/binned_outputs/square_008um/filtered_feature_bc_matrix.h5


anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
 78%|████████████████████████████████████████████████████████████████████████████████████████▋                         | 14/18 [01:18<00:19,  4.87s/it]

/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX015/outs/binned_outputs/square_008um/filtered_feature_bc_matrix.h5


anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
 83%|███████████████████████████████████████████████████████████████████████████████████████████████                   | 15/18 [01:22<00:14,  4.85s/it]

/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX016/outs/binned_outputs/square_008um/filtered_feature_bc_matrix.h5


anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
 89%|█████████████████████████████████████████████████████████████████████████████████████████████████████▎            | 16/18 [01:26<00:09,  4.58s/it]

/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX017/outs/binned_outputs/square_008um/filtered_feature_bc_matrix.h5


anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
 94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▋      | 17/18 [01:31<00:04,  4.60s/it]

/scratch/IMMUNEX/OUTPUT/Visium_NSCLC_IMMUNEX018/outs/binned_outputs/square_008um/filtered_feature_bc_matrix.h5


anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
anndata.py (1840): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [01:36<00:00,  5.34s/it]


In [9]:
adata_out_dir

PosixPath('../../results/intermediate/bin2cell/adata_destriped')

In [11]:
import shutil
import os
from pathlib import Path

base_output_dir = Path("../../results/intermediate/bin2cell")

adata_out_dir = base_output_dir / "adata_destriped"

zip_path = adata_out_dir / "destriped_samples"

shutil.make_archive(base_name=zip_path, format='zip', root_dir=adata_out_dir)

# Confirm the zip file path
zip_path


PosixPath('../../results/intermediate/bin2cell/adata_destriped/destriped_samples')