The dataset was downloaded from https://bbbc.broadinstitute.org/BBBC010

In [None]:
import pooch
from skimage.io import imread
from bioio import BioImage
from bioio.writers import OmeTiffWriter
import zipfile
from pathlib import Path
from random import random
import numpy as np


data_path = Path("../../data/instance2D")
data_path.mkdir(exist_ok=True, parents=True)

download_path = data_path / Path("download")
download_path.mkdir(exist_ok=True)
train_path = data_path / Path("train")
train_path.mkdir(exist_ok=True)
test_path = data_path / Path("test")
test_path.mkdir(exist_ok=True)

In [None]:
source_im = pooch.retrieve(
    url="https://data.broadinstitute.org/bbbc/BBBC010/BBBC010_v2_images.zip",
    fname="source_im.zip",
    path=data_path / Path("download"),
    known_hash="77a82c74d12c0707e861d9b324b47e6a74e316aefe25a3501f596c7a80a0b4f4"
)

source_gt = pooch.retrieve(
    url="https://data.broadinstitute.org/bbbc/BBBC010/BBBC010_v1_foreground_eachworm.zip",
    fname="source_gt.zip",
    path=data_path / Path("download"),
    known_hash="19b7ceef05d4a21bb3eec9988ee0b61dd0eeb940fb690125f14bb5919ae8ae44"
)

In [None]:
# unzip the data
with zipfile.ZipFile(source_im,"r") as zip_ref:
    zip_ref.extractall(data_path / Path("download"))

with zipfile.ZipFile(source_gt,"r") as zip_ref:
    zip_ref.extractall(data_path / Path("download"))

In [None]:
filenames = sorted(download_path.glob("*_w2_*.tif"))
gt_path = download_path / Path("BBBC010_v1_foreground_eachworm")
for fn in filenames:
    fn_key = fn.name[33:36]

    reader = BioImage(fn)
    raw = reader.get_image_data("YX", Z=0, C=0, T=0)

    gt = np.zeros(raw.shape, dtype=np.uint8)
    gt_filenames = sorted(Path(gt_path).glob(f"{fn_key}_*.png"))
    for gt_idx, gt_fn in enumerate(gt_filenames):
        gt_item = imread(gt_fn)
        gt[gt_item > 0] = gt_idx + 1

    if random() < 0.05:
        out_path = test_path
    else:
        out_path = train_path

    out_raw = out_path / f"img_{fn_key}_IM.tiff"
    out_gt = out_path / f"img_{fn_key}_GT.tiff"

    OmeTiffWriter.save(raw, out_raw, dim_order="YX")
    OmeTiffWriter.save(gt, out_gt, dim_order="YX")


In [None]:
# you may remove the download folder now.
from shutil import rmtree
rmtree(download_path)