# bioimageio.core usage examples

In [None]:
import os
import hashlib

import bioimageio.core
import imageio

# we use napari for visualising images, you can install it via `pip install napari` or`conda install napari`
import napari
import numpy as np
import xarray as xr

In [None]:
# helper function for showing multiple images in napari
def show_images(*images, names=None):
    v = napari.Viewer()
    for i, im in enumerate(images):
        name = None if names is None else names[i]
        if isinstance(im, str):
            im = imageio.imread(im)
        v.add_image(im, name=name)

## Loading a model

We will use a model that predicts foreground and boundaries in images of nuclei from the [kaggle nucles segmentation challenge](https://www.kaggle.com/c/data-science-bowl-2018).
Find the model on bioimage.io here: https://bioimage.io/#/?id=10.5281%2Fzenodo.5764892

First, we will use `bioimageio.core.load_resource_description` to load the model and inspec the obtained model resource.

In [None]:
# the model can be loaded using different representations:

# the doi of the zenodo entry corresponding to the model
rdf_doi = "10.5281/zenodo.6287342"

# the url of the yaml file containing the model resource description
rdf_url = "https://zenodo.org/record/6287342/files/rdf.yaml"

# filepath to the downloaded model (either zipped package or yaml)
# to download it from the website:
# - go to https://bioimage.io/#/?id=10.5281%2Fzenodo.5764892%2F5764893
# - click the download icon
# - select "ilastik" weight format
rdf_path = (
    "/home/pape/Downloads/nuclei-segmentation-boundarymodel_pytorch_state_dict.zip"
)

In [None]:
# load model from link to rdf.yaml
model_resource = bioimageio.core.load_resource_description(rdf_url)

In [None]:
# load model from doi
model_resource = bioimageio.core.load_resource_description(rdf_doi)

In [None]:
# load model from path to the zipped model files
model_resource = bioimageio.core.load_resource_description(rdf_path)

In [None]:
# the "model_resource" instance returned by load_resource_description
# contains the information stored in the resource description (see https://github.com/bioimage-io/spec-bioimage-io/blob/gh-pages/model_spec_latest.md)

# we can e.g. check what weight formats are available in the model (pytorch_state_dict for the model used here)
print("Available weight formats for this model:", model_resource.weights.keys())
# or where the (downloaded) weight files are stored
print(
    "Pytorch state dict weights are stored at:",
    model_resource.weights["pytorch_state_dict"].source,
)
print()
# or what inputs the model expects
print("The model requires as inputs:")
for inp in model_resource.inputs:
    print("Input with axes:", inp.axes, "and shape", inp.shape)
print()
# and what the model outputs are
print("The model returns the following outputs:")
for out in model_resource.outputs:
    print("Output with axes:", out.axes, "and shape", out.shape)

In [None]:
# the function 'test_model' from 'bioimageio.core.resource_tests' can be used to fully test the model,
# including running prediction for the test input(s) and checking that they agree with the test output(s)
# before using a model, it is recommended to check that it properly works with this function
# 'test_model' returns a dict with 'status'='passed'/'failed' and more detailed information
from bioimageio.core.resource_tests import test_model

test_result = test_model(model_resource)
if test_result["status"] == "failed":
    print("model test:", test_result["name"])
    print("The model test failed with:", test_result["error"])
    print("with the traceback:")
    print("".join(test_result["traceback"]))
else:
    test_result["status"] == "passed"
    print("The model passed all tests")

## Prediction with the model

`bioimageio.core` implements functionality to run prediction with models in the `bioimage.io` format.
This includes functions to run prediction with `xarray.DataArrays` as input and convenience functions to run predictions for images stored on disc.

In [None]:
# Load the example image for this model, which is stored in numpy file format.
input_image = np.load(model_resource.test_inputs[0])

In [None]:
# Create an xarray.DataArray from the input image.
# DataArrays are like numpy arrays, but they have annotated axes.
# The axes are used to validate that the axes of the input image match the axes expected by a model.
input_array = xr.DataArray(input_image, dims=tuple(model_resource.inputs[0].axes))
# print the axis annotations ('dims') and the shape of the input array
print(input_array.dims)
print(input_array.shape)

In [None]:
# Next, create a 'prediction_pipeline'. The prediction_pipeline is used to run prediction with a given model.
# This means it applies the preprocessing, runs inference with the model and applies the postprocessing.

# The 'devices' argument can be used to specify which device(s) to use for inference with the model.
# Hence it can be used to specify whether to use the cpu, a single gpu or multiple gpus (not implemented yet).
# By default (devices=None) a gpu will be used if available and otherwise the cpu will be used.
devices = None

# The 'weight_format' argument can be used to specify which weight format available in the model to use.
# By default (weight_format=None) the weight format with highest priority (as defined by bioimageio.core) will be used.
weight_format = None

prediction_pipeline = bioimageio.core.create_prediction_pipeline(
    model_resource, devices=devices, weight_format=weight_format
)

In [None]:
# Use the prediction pipeline to run prediction for the image we loaded before.
# The prediction pipeline always returns a tuple (even if the model only has a single output tensor).
# So we access the first element of the prediction to get the predicted tensor.
prediction = prediction_pipeline(input_array)[0]
show_images(
    input_image, prediction, names=["image", "prediction"]
)  # show the prediction result

In [None]:
# The prediction pipeline expects inputs to have a shape that fits the model exactly.
# So if the input does not fit the expected input shape the prediction will fail.
# E.g. if we crop the input to shape [1, 1, 250, 250] it will not work for our example model,
# which expects a spatial shape that is a multiple of 16
cropped_image = input_image[:, :, :250, :250]
cropped_array = xr.DataArray(cropped_image, dims=tuple(model_resource.inputs[0].axes))

In [None]:
# Applying the prediction pipeline to an image with the wrong shape will fail!
prediction_pipeline(cropped_array)

In [None]:
# Instead, we can use the function `predict_with_padding`, which will pad the image to a shape that fits the model.
prediction = bioimageio.core.predict_with_padding(prediction_pipeline, cropped_array)
show_images(
    cropped_image, prediction, names=["image", "prediction"]
)  # show the prediction result

In [None]:
# There is also the function `predict_with_tiling`, which will run prediction for patches in a sliding window fashion.
# This is especially helpful for large inputs that do not fit into the model as a single input.

# The `tiling` argument is used to specify the tile size and the `halo`, which is the part of the patch
# that is cropped in order to reduce boundary artifacts.
# Alternatively, `tiling` can also be set to `True`, than the tile size and halo will be deduced from the model config
# (this is also the default behavior when the `tiling` parameter is not passed).
tiling = {
    "tile": {"x": 128, "y": 128},
    "halo": {"x": 16, "y": 16},
}  # use a tile size of 128x128 and crop a halo of 16 pixels

# if `verbose` is set to True a progress bar will be printed
prediction = bioimageio.core.predict_with_tiling(
    prediction_pipeline, cropped_array, tiling=tiling, verbose=True
)
show_images(cropped_image, prediction, names=["image", "prediction"])

### Convenience prediction functions

`bioimageio.core` also contains a few convenience functions to directly predict images that are stored on disc:
- `predict_image` can be used to run prediction for a single image
- `predict_images` to run prediction for many images

In [None]:
# The convenience function `predict_image` can be used to run prediction for an image stored on disc.
from bioimageio.core.prediction import predict_image

# The filepath where the output should be stored; supports most common image formats as well as npy fileformat.
outputs = ["prediction.tif"]
predict_image(model_resource, model_resource.test_inputs, outputs)

# The output tensor contains 2 channels, which is not supported by normal tif.
# Thus, these 2 channels are stored as 2 separate images.
fg_pred = imageio.imread("prediction-c0.tif")
bd_pred = imageio.imread("prediction-c1.tif")
show_images(
    input_image,
    fg_pred,
    bd_pred,
    names=["image", "foreground-prediction", "boundary-prediction"],
)

In [None]:
# The convenience function `predict_images` can be use to run prediction for many images stored on disc
# Note: this only works for models which have a single input and output!
from bioimageio.core.prediction import predict_images

# Here we use a small subset of the dsb challenge data for prediction.
# The original data is available at https://github.com/stardist/stardist/releases/download/0.1.0/dsb2018.zip.
# We have added a few images to the repository so that the notebook runs out of the box.

# Get all paths to the images in the "example-images" folder.
from glob import glob

inputs = glob("./example-images/*.png")

# Create an output folder and specify the output path for each image.
output_folder = "./predictions"
os.makedirs(output_folder, exist_ok=True)
outputs = [os.path.join(output_folder, os.path.split(inp)[1]) for inp in inputs]

print(len(inputs), "images for prediction were found")

In [None]:
# The model at hand can only predict images which have a spatial shape that is
# a multiple of 16. To run with images of other sizes we pass the `padding`
# argument to `predict_images` and specify that the input is padded to the next bigger
# size that is divisible by 16 (mode: dynamic).
# As an alternative `"mode": "fixed"` will pad to a fixed shape, e.g.
# `{"x": 512, "y": 512, "mode": "fixed"}` will always pad to a size of 512x512.
# The padding is cropped again after the prediction to restore the input shape.
padding = {"x": 16, "y": 16, "mode": "dynamic"}
predict_images(model_resource, inputs, outputs, padding=padding, verbose=True)

# check the first input/output
show_images(
    inputs[0],
    outputs[0].replace(".png", "-c0.png"),
    outputs[0].replace(".png", "-c1.png"),
)

In [None]:
# Instead of padding, we can also use tiling.
# Here, we specify a tile size of 256 and a halo (= what's cropped from the tile on either side) of 16.
tiling = {
    "tile": {"x": 256, "y": 256},
    "halo": {"x": 16, "y": 16},
}
predict_images(model_resource, inputs, outputs, tiling=tiling, verbose=True)

# Check the first input output pair.
show_images(
    inputs[0],
    outputs[0].replace(".png", "-c0.png"),
    outputs[0].replace(".png", "-c1.png"),
)