# bioimageio.core usage examples

In [None]:
import os
import hashlib

import bioimageio.core
import imageio
# we use napari for visualising images, you can install it via `pip install napari` or`conda install napari`
import napari
import numpy as np
import xarray as xr

from bioimageio.core.prediction_pipeline import create_prediction_pipeline

In [None]:
# helper function for showing multiple images in napari
def show_images(*images, names=None):
    v = napari.Viewer()
    for i, im  in enumerate(images):
        name = None if names is None else names[i]
        if isinstance(im, str):
            im = imageio.imread(im)
        v.add_image(im, name=name)

## Loading a model

We will use a model that predicts foreground and boundaries in images of nuclei from the [kaggle nucles segmentation challenge](https://www.kaggle.com/c/data-science-bowl-2018).
Find the model on bioimage.io here: https://bioimage.io/#/?id=10.5072%2Fzenodo.881940

First, we will use `bioimageio.core.load_resource_description` to load the model and inspec the obtained model resource.

In [None]:
# the model can be loaded using different representations:

# the doi of the zenodo entry corresponding to the model
rdf_doi = "10.5281/zenodo.6287342"

# the url of the yaml file containing the model resource description
rdf_url = "https://zenodo.org/record/6287342/files/rdf.yaml"

# filepath to the downloaded model (either zipped package or yaml)
# to download it from the website:
# - go to https://bioimage.io/#/?id=10.5281%2Fzenodo.5764892%2F5764893
# - click the download icon
# - select "ilastik" weight format
rdf_path = "/home/pape/Downloads/nuclei-segmentation-boundarymodel_pytorch_state_dict.zip"

In [None]:
# load model from link to rdf.yaml
model_resource = bioimageio.core.load_resource_description(rdf_url)

In [None]:
# load model from doi
model_resource = bioimageio.core.load_resource_description(rdf_doi)

In [None]:
# load model from path to the zipped model files
model_resource = bioimageio.core.load_resource_description(rdf_path)

In [None]:
# the "model_resource" instance returned by load_resource_description
# contains the information stored in the resource description (see https://github.com/bioimage-io/spec-bioimage-io/blob/gh-pages/model_spec_latest.md)

# we can e.g. check what weight formats are available in the model (pytorch_state_dict for the model used here)
print("Available weight formats for this model:", model_resource.weights.keys())
# or how the weight files are stored
print("Pytorch state dict weights are stored at:", model_resource.weights["pytorch_state_dict"].source)
print()
# or what inputs the model expects
print("The model requires as inputs:")
for inp in model_resource.inputs:
    print("Input with axes:", inp.axes, "and shape", inp.shape)
print()
# and what the model outputs are
print("The model returns the following outputs:")
for out in model_resource.outputs:
    print("Output with axes:", out.axes, "and shape", out.shape)

In [None]:
# the function 'test_model' from 'bioimageio.core.resource_tests' can be used to fully test the model,
# including running prediction for the test input(s) and checking that they agree with the test output(s)
# before using a model, it is recommended to check that it properly works with this function
# 'test_model' returns a dict, if there are any errros they will be in the key "error"
# if the model passes it will be None
from bioimageio.core.resource_tests import test_model
test_result = test_model(model_resource)
if test_result["error"]:
    print("The model test failed with:", test_result["error"])
    print("with the traceback:", test_result["traceback"])
else:
    print("The model passed all tests")

## Prediction with the model

`bioimageio.core` implements functionality to run predictions with a model in bioimage.io format.
This includes functions to run prediction with numpy arrays (more precisely xarray DataArrays) and convenience functions to run predictions for inputs stored on disc.

In [None]:
# load the example image for this model, which is stored in numpy file format
input_image = np.load(model_resource.test_inputs[0])

In [None]:
# define a function to run prediction on a numpy input
# "devices" can be used to run prediction on a gpu instead of the cpu
# "weight_format" to specify which weight format to use in case the model contains different weight formats
def predict_numpy(model, input_, devices=None, weight_format=None):
    # the prediction pipeline combines preprocessing, prediction and postprocessing.
    # it should always be used for prediction with a bioimageio model
    pred_pipeline = create_prediction_pipeline(
        bioimageio_model=model, devices=devices, weight_format=weight_format
    )

    # the prediction pipeline expects inputs as xarray.DataArrays.
    # these are similar to numpy arrays, but allow for named dimensions (the dims keyword argument)
    # in bioimage.io the dims have to agree with the input axes required by the model
    axes = tuple(model.inputs[0].axes)
    input_tensor = xr.DataArray(input_, dims=axes)
    
    # the prediction pipeline call expects the same number of inputs as the number of inputs required by the model
    # in the case here, the model just expects a single input. in the case of multiple inputs use
    # prediction = pred_pipeline(input1, input2, ...)
    # or, if you have the inputs in a list or tuple
    # prediction = pred_pipeline(*inputs)
    # the call returns a list of output tensors, corresponding to the output tensors of the model
    # (in this case, we just have a single output)
    prediction = pred_pipeline(input_tensor)[0]
    return prediction

In [None]:
# run prediction for the test input and show the result
prediction = predict_numpy(model_resource, input_image)
show_images(input_image, prediction, names=["image", "prediction"])

In [None]:
# the utility function `predict_image` can be used to run prediction with an image stored on disc
from bioimageio.core.prediction import predict_image

# the filepath where the output should be stored, supports most common image formats as well as npy fileformat
outputs = ["prediction.tif"]
predict_image(
    model_resource, model_resource.test_inputs, outputs
)

# the output tensor contains 2 channels, which is not supported by normal tif.
# thus, these 2 channels are stored as 2 separate images
fg_pred = imageio.imread("prediction-c0.tif")
bd_pred = imageio.imread("prediction-c1.tif")
show_images(input_image, fg_pred, bd_pred,
            names=["image", "foreground-prediction", "boundary-prediction"])

In [None]:
# the utility function `predict_images` can be use to run prediction for a batch of images stored on disc
# note: this only works for models which have a single input and output!
from bioimageio.core.prediction import predict_images

# here, we use a subset of the dsb challenge data for prediction from the stardist (https://github.com/stardist/stardist)
# you can obtain it from: https://github.com/stardist/stardist/releases/download/0.1.0/dsb2018.zip

# select all images in the "test" subfolder
from glob import glob
folder = "/home/pape/Downloads/dsb2018(1)/dsb2018/test"
inputs = glob(os.path.join(folder, "images", "*.tif"))

# create an output folder and specify the output path for each image
output_folder = os.path.join(folder, "predictions")
os.makedirs(output_folder, exist_ok=True)
outputs = [os.path.join(output_folder, os.path.split(inp)[1]) for inp in inputs]

print(len(inputs), "images for prediction were found")

In [None]:
# the model at hand can only predict images which have a xy-size that is
# a multiple of 16. To run with arbitrary size images, we pass the `padding`
# argument to `predict_images` and specify that the input is padded to the next bigger
# size that is divisible by 16 (mode: dynamic)
# as an alternative `"mode": "fixed"` will pad to a fixed shape, e.g.
# `{"x": 512, "y": 512, "mode": "fixed"}` will always pad to a size of 512x512
# the padding is cropped again after the prediction
padding = {"x": 16, "y": 16, "mode": "dynamic"}
predict_images(
    model_resource, inputs, outputs, padding=padding, verbose=True
)

# check the first input/output
show_images(inputs[0], outputs[0].replace(".tif", "-c0.tif"), outputs[0].replace(".tif", "-c1.tif"))

In [None]:
# instead of padding, we can also use tiling.
# here, we specify a tile size of 224 and a halo (= extension of tile on both sides)
# size of 16, which results in an effective tile shale of 256 = 224 + 2*16
tiling = {
    "tile": {"x": 224, "y": 224},
    "halo": {"x": 16, "y": 16},
}
predict_images(
    model_resource, inputs, outputs, tiling=tiling, verbose=True
)

# check the first input/output
show_images(inputs[0], outputs[0].replace(".tif", "-c0.tif"), outputs[0].replace(".tif", "-c1.tif"))

## Create a biomiage.io model package

`bioimageio.core` also implements functionality to create a model package compatible with the [bioimageio model spec](https://github.com/bioimage-io/spec-bioimage-io/blob/gh-pages/model_spec_latest.md) ready to be shared via
the [bioimage.io model zoo](https://bioimage.io/#/).
Here, we will use this functionality to create two models, one that adds thresholding as post-processing to the outputs and another one that also adds weights in torchscript format.

In [None]:
# get the python file defining the architecture.
# this is only required for models with pytorch_state_dict weights
def get_architecture_source(rdf):
    # here, we need the raw resource, which contains the information from the resource description
    # before evaluation, e.g. the file and name of the python file with the model architecture
    raw_resource = bioimageio.core.load_raw_resource_description(rdf)
    # the python file defining the architecture for the pytorch weihgts
    model_source = raw_resource.weights["pytorch_state_dict"].architecture
    # download the source file if necessary
    source_file = bioimageio.core.resource_io.utils.resolve_source(
        model_source.source_file
    )
    # if the source file path does not exist, try combining it with the root path of the model
    if not os.path.exists(source_file):
        source_file = os.path.join(raw_resource.root_path, os.path.split(source_file)[1])
    assert os.path.exists(source_file), source_file
    class_name = model_source.callable_name
    return f"{source_file}:{class_name}"

In [None]:
# first new model: add thresholding of outputs as post-processing
# the convenience function `build_model` creates a biomageio model spec compatible package (=zipped folder)
from bioimageio.core.build_spec import build_model

# create a subfolder to store the files for the new model
model_root = "./new_model"
os.makedirs(model_root, exist_ok=True)

# create the expected output tensor (= outputs thresholded at 0.5)
threshold = 0.5
new_output = prediction > threshold
new_output_path = f"{model_root}/new_test_output.npy"
np.save(new_output_path, new_output)

# add thresholding as post-processing procedure to our model
preprocessing = [[{"name": prep.name, "kwargs": prep.kwargs} for prep in inp.preprocessing] for inp in model_resource.inputs]
postprocessing = [[{"name": "binarize", "kwargs": {"threshold": threshold}}]]

# get the model architecture
# note that this is only necessary for pytorch state dict models
model_source = get_architecture_source(rdf_doi)

# we use the `parent` field to indicate that the new model is created based on
# the nucleus segmentation model we have obtained from bioimage.io
# this field is optional and only needs to be given for models that are created based on other models from bioimage.io
# the parent is specified via it's doi and the hash of its rdf file
model_root_folder = os.path.split(model_resource.weights["pytorch_state_dict"].source)[0]
rdf_file = os.path.join(model_root_folder, "rdf.yaml")
with open(rdf_file, "rb") as f:
    rdf_hash = hashlib.sha256(f.read()).hexdigest()
parent = {"uri": rdf_doi, "sha256": rdf_hash}

# the name of the new model and where to save the zipped model package
name = "new-model1"
zip_path = os.path.join(model_root, f"{name}.zip")

# `build_model` needs some additional information about the model, like citation information
# all this additional information is passed as plain python types and will be converted into the bioimageio representation internally  
# for more informantion, check out the function signature
# https://github.com/bioimage-io/core-bioimage-io-python/blob/main/bioimageio/core/build_spec/build_model.py#L252
cite = [{"text": cite_entry.text, "url": cite_entry.url} for cite_entry in model_resource.cite]

# the training data used for the model can also be specified by linking to a dataset available on bioimage.io
training_data = {"id": "ilastik/stradist_dsb_training_data"}

# the axes descriptions for the inputs / outputs
input_axes = ["bcyx"]
output_axes = ["bcyx"]

# the pytorch_state_dict weight file
weight_file = model_resource.weights["pytorch_state_dict"].source

# the path to save the new model with torchscript weights
zip_path = f"{model_root}/new_model2.zip"

# build the model! it will be saved to 'zip_path'
new_model_raw = build_model(
    weight_uri=weight_file,
    test_inputs=model_resource.test_inputs,
    test_outputs=[new_output_path],
    input_axes=input_axes,
    output_axes=output_axes,
    output_path=zip_path,
    name=name,
    description="nucleus segmentation model with thresholding",
    authors=[{"name": "Jane Doe"}],
    license="CC-BY-4.0",
    documentation=model_resource.documentation,
    covers=[str(cover) for cover in model_resource.covers],
    tags=["nucleus-segmentation"],
    cite=cite,
    parent=parent,
    architecture=model_source,
    model_kwargs=model_resource.weights["pytorch_state_dict"].kwargs,
    preprocessing=preprocessing,
    postprocessing=postprocessing,
    training_data=training_data,
)

In [None]:
# load the new model from the zipped package, run prediction and check the result
new_model = bioimageio.core.load_resource_description(zip_path)
prediction = predict_numpy(new_model, input_image)
show_images(input_image, prediction, names=["input", "binarized-prediction"])

## Add different weight format and package model with new weights

In [None]:
# `convert_weigths_to_pytorch_script` creates torchscript weigths based on the weights loaded from pytorch_state_dict
from bioimageio.core.weight_converter.torch import convert_weights_to_torchscript
# `add_weights` adds new weights to the model specification
from bioimageio.core.build_spec import add_weights

# the path to save the newly created torchscript weights
weight_path = os.path.join(model_root, "weights.torchscript")
convert_weights_to_torchscript(new_model, weight_path)

# the path to save the new model with torchscript weights
zip_path = f"{model_root}/new_model2.zip"
new_model2_raw = add_weights(new_model_raw, weight_path, weight_type="torchscript", output_path=zip_path)

In [None]:
# load the new model from the zipped package, run prediction and check the result
new_model = bioimageio.core.load_resource_description(zip_path)
prediction = predict_numpy(new_model, input_image, weight_format="torchscript")
show_images(input_image, prediction, names=["input", "binarized-prediction"])

In [None]:
# models in the biomageio.core format can also directly be exported as zipped packages
# using `bioimageio.core.export_resource_package`
bioimageio.core.export_resource_package(new_model2_raw, output_path="another_model.zip")