In [None]:
import pandas as pd
import quilt3
from pathlib import Path
from aicsimageio import AICSImage
from aicsimageio.writers import OmeTiffWriter
from random import random
import numpy as np

# we use the NUP153 cell line for this demo
cline = "FBL"

# set up path
parent_path = Path("../../data/semantic3D")
parent_path.mkdir(exist_ok=True, parents=True)

raw_path = parent_path / Path("download")
raw_path.mkdir(exist_ok=True, parents=True)
train_path = parent_path / Path("train_fbl")
train_path.mkdir(exist_ok=True)
holdout_path = parent_path / Path("holdout_fbl")
holdout_path.mkdir(exist_ok=True)

# set up path
parent_path_2 = Path("../../data/instance3D")

raw_path_2 = parent_path_2 / Path("download")
raw_path_2.mkdir(exist_ok=True)
train_path_2 = parent_path_2 / Path("train_fbl")
train_path_2.mkdir(exist_ok=True)
holdout_path_2 = parent_path_2 / Path("holdout_fbl")
holdout_path_2.mkdir(exist_ok=True)

In [None]:
from skimage.morphology import remove_small_objects
from aicssegmentation.core.pre_processing_utils import (
    intensity_normalization,
    image_smoothing_gaussian_3d,
)
from aicssegmentation.core.seg_dot import dot_slice_by_slice

In [None]:
# connect to quilt and load meta table
pkg = quilt3.Package.browse(
    "aics/hipsc_single_cell_image_dataset", registry="s3://allencell"
)
meta_df_obj = pkg["metadata.csv"]
meta_df_obj.fetch(parent_path / "meta.csv")
meta_df = pd.read_csv(parent_path / "meta.csv")

# fetch the data of the specific cell line
meta_df_line = meta_df.query("structure_name==@cline")

# collapse the data table based on FOVId
meta_df_line.drop_duplicates(subset="FOVId", inplace=True)

# reset index
meta_df_line.reset_index(drop=True, inplace=True)

In [None]:
# download the images and segmentation
num_sample = 10 #150
train_counter = 0
for row in meta_df_line.itertuples():
    if train_counter >= num_sample:
        break
    # fetch the raw image (multi-channel)
    subdir_name = row.fov_path.split("/")[0]
    file_name = row.fov_path.split("/")[1]

    local_fn = raw_path / f"{row.FOVId}_original.tiff"
    pkg[subdir_name][file_name].fetch(local_fn)

    # extract the fbl channel
    reader = AICSImage(local_fn)
    img = reader.get_image_data(
        "ZYX", C=row.ChannelNumberStruct, S=0, T=0
    )

    mean_intensity = img.mean() 
    if mean_intensity < 450 or mean_intensity > 500:
        continue

    if random() < 0.2:
        out_path = holdout_path
        out_path_2 = holdout_path_2
    else:
        out_path = train_path
        out_path_2 = train_path_2
        train_counter += 1

    # fetch cell segmentation
    subdir_name = row.fov_seg_path.split("/")[0]
    file_name = row.fov_seg_path.split("/")[1]

    local_fn = raw_path / f"{row.FOVId}_seg.tiff"
    pkg[subdir_name][file_name].fetch(local_fn)

    # extract the Cell segmentation
    reader = AICSImage(local_fn)
    cell_seg = reader.get_image_data(
        "ZYX", C=1, S=0, T=0
    ).astype(np.uint8)

    # generate structure segmentation
    struct_img = intensity_normalization(img.copy(), scaling_param=[0.5, 18])
    structure_img_smooth = image_smoothing_gaussian_3d(
        struct_img,
        sigma=1,
        truncate_range=3.0,
    )
    response2d = dot_slice_by_slice(structure_img_smooth, log_sigma=1)
    struct_seg = remove_small_objects(response2d > 0.01, min_size=5, connectivity=1)
    struct_seg = struct_seg.astype(np.uint8)
    struct_seg[struct_seg > 0] = 1

    struct_seg_label = struct_seg * cell_seg

    # save the data
    im_fn = out_path / f"{row.FOVId}_IM.tiff"
    gt_fn = out_path / f"{row.FOVId}_GT.tiff"
    OmeTiffWriter.save(img, im_fn, dim_order="ZYX")
    OmeTiffWriter.save(struct_seg, gt_fn, dim_order="ZYX")

    im_fn = out_path_2 / f"{row.FOVId}_IM.tiff"
    gt_fn = out_path_2 / f"{row.FOVId}_GT.tiff"
    OmeTiffWriter.save(img, im_fn, dim_order="ZYX")
    OmeTiffWriter.save(struct_seg_label, gt_fn, dim_order="ZYX")

In [None]:
# you may remove the download folder now.
from shutil import rmtree
import os
rmtree(raw_path)
os.remove(parent_path / "meta.csv")