The data is from the hiPSC single cell image dataset from the Allen Institute for Cell Science: https://open.quiltdata.com/b/allencell/packages/aics/hipsc_single_cell_image_dataset. Original publication of the data: 

Viana, M.P., Chen, J., Knijnenburg, T.A. et al. Integrated intracellular organization and its variations in human iPS cells. Nature 613, 345–354 (2023). https://doi.org/10.1038/s41586-022-05563-7

There will be two experiemnts in this demo: instance segmentation from bright field images and instance segmentation from fluorescent images.

In [None]:
# you need quilt3 package to download the data:
! pip install quilt3

In [None]:
import pandas as pd
import quilt3
from pathlib import Path
from bioio import BioImage
from bioio.writers import OmeTiffWriter
from random import random
import numpy as np

In [None]:
# we use the lamin B1 cell line for this demo
cline = "LMNB1"

# set up path
parent_path = Path("../../data/instance3D")
parent_path.mkdir(exist_ok=True, parents=True)

raw_path = parent_path / Path("download")
raw_path.mkdir(exist_ok=True)
train_bf_path = parent_path / Path("train_bf")
train_bf_path.mkdir(exist_ok=True)
holdout_bf_path = parent_path / Path("holdout_bf")
holdout_bf_path.mkdir(exist_ok=True)
train_fluo_path = parent_path / Path("train_fluo")
train_fluo_path.mkdir(exist_ok=True)
holdout_fluo_path = parent_path / Path("holdout_fluo")
holdout_fluo_path.mkdir(exist_ok=True)

In [None]:
# connect to quilt and load meta table
pkg = quilt3.Package.browse(
    "aics/hipsc_single_cell_image_dataset", registry="s3://allencell"
)
meta_df_obj = pkg["metadata.csv"]
meta_df_obj.fetch(parent_path / "meta.csv")
meta_df = pd.read_csv(parent_path / "meta.csv")

# fetch the data of the specific cell line
meta_df_line = meta_df.query("structure_name==@cline")

# collapse the data table based on FOVId
meta_df_line.drop_duplicates(subset="FOVId", inplace=True)

# reset index
meta_df_line.reset_index(drop=True, inplace=True)

In [None]:
# download the images and segmentation

# we only need a small amount of data for the purpose of demonstration
num_of_sample = 60  # choose the amount you need

for row in meta_df_line.itertuples():
    if row.Index > num_of_sample:
        break

    # fetch the raw image (multi-channel)
    subdir_name = row.fov_path.split("/")[0]
    file_name = row.fov_path.split("/")[1]

    local_fn = raw_path / f"{row.FOVId}_original.tiff"
    pkg[subdir_name][file_name].fetch(local_fn)

    # extract the bf and DNA dye channel
    reader = BioImage(local_fn)
    bf_img = reader.get_image_data(
        "ZYX", C=row.ChannelNumberBrightfield, S=0, T=0
    )
    dna_img = reader.get_image_data(
        "ZYX", C=row.ChannelNumber405, S=0, T=0
    )

    if random() < 0.2:
        bf_path = holdout_bf_path
        fluo_path = holdout_fluo_path
    else:
        bf_path = train_bf_path
        fluo_path = train_fluo_path

    # fetch dna and cell segmentation
    subdir_name = row.fov_seg_path.split("/")[0]
    file_name = row.fov_seg_path.split("/")[1]

    local_fn = raw_path / f"{row.FOVId}_seg.tiff"
    pkg[subdir_name][file_name].fetch(local_fn)

    # extract all valid cells of this FOV
    fov_id = row.FOVId
    cell_df = meta_df.query("FOVId==@fov_id")
    all_cells = cell_df["this_cell_index"].tolist()

    # extract the DNA segmentation
    reader = BioImage(local_fn)
    dna_seg = reader.get_image_data(
        "ZYX", C=0, S=0, T=0
    ).astype(np.uint8)

    # extract the Cell segmentation
    cell_seg = reader.get_image_data(
        "ZYX", C=1, S=0, T=0
    ).astype(np.int8)

    # creata mask from cell segmentation by setting pixels covered by all 
    # valid cells as one and everywhere else as zero
    for cid in all_cells:
        cell_seg[cell_seg == cid] = 0

    cell_seg[cell_seg > 0] = -1
    cell_seg = cell_seg + 1

    # save the data
    im_bf_fn = bf_path / f"{row.FOVId}_IM.tiff"
    gt_bf_fn = bf_path / f"{row.FOVId}_GT.tiff"
    cm_bf_fn = bf_path / f"{row.FOVId}_CM.tiff"
    OmeTiffWriter.save(bf_img, im_bf_fn, dim_order="ZYX")
    OmeTiffWriter.save(dna_seg, gt_bf_fn, dim_order="ZYX")
    OmeTiffWriter.save(cell_seg, cm_bf_fn, dim_order="ZYX")

    im_fluo_fn = fluo_path / f"{row.FOVId}_IM.tiff"
    gt_fluo_fn = fluo_path / f"{row.FOVId}_GT.tiff"
    cm_fluo_fn = fluo_path / f"{row.FOVId}_CM.tiff"
    OmeTiffWriter.save(dna_img, im_fluo_fn, dim_order="ZYX")
    OmeTiffWriter.save(dna_seg, gt_fluo_fn, dim_order="ZYX")
    OmeTiffWriter.save(cell_seg, cm_fluo_fn, dim_order="ZYX")

In [None]:
# you may remove the download folder now.
from shutil import rmtree
import os
rmtree(raw_path)
os.remove(parent_path / "meta.csv")
