<h2>Import packages and install histomics_detect</h2>

In [None]:
# install histomics_detect
!pip install -e /tf/notebooks/histomics_detect

# install histomics_stream
!pip install -e /tf/notebooks/histomics_stream

# add to system path
import sys

sys.path.append("/tf/notebooks/histomics_detect/")
sys.path.append("/tf/notebooks/histomics_stream/")

In [None]:
import os
import re
import numpy as np
import tensorflow as tf

# import dataset related packages
from histomics_detect.io import dataset
from histomics_detect.augmentation import crop, flip, jitter, shrink
from histomics_detect.visualization import plot_inference

# import whole-slide image handling pipeline
import histomics_stream as hs

number_epochs = 50  # Set to a number smaller than 50 for speed during debug

<h2>Define dataset parameters and create datasets - DCC example</h2>

In [None]:
# input data path
path = "/tf/notebooks/DCC/data/"

# training parameters
train_tile = 224  # input image size
min_area_thresh = 0.5  # % of object area that must be in random crop to be included
width = tf.constant(train_tile, tf.int32)
height = tf.constant(train_tile, tf.int32)
min_area = tf.constant(min_area_thresh, tf.float32)

# split dataset into training and validation
cases = [
    "131458",
    "91315_leica_at2_40x",
    "135062",
    "93094",
    "131453",
    "131450",
    "135060",
    "131463",
    "131459",
    "131440",
    "131460",
    "93096",
    "131449",
    "131457",
    "131461",
    "93098",
    "131447",
    "93092",
    "131443",
    "93095",
    "131448",
    "93099",
    "91316_leica_at2_40x",
    "131462",
    "93091",
    "135065",
    "131446",
    "131441",
    "101626",
    "93093",
    "131454",
    "93097",
    "131445",
    "131444",
    "131456",
    "93090",
]
id = np.argsort(np.random.rand(len(cases) - 1))[0 : np.ceil(0.9 * len(cases)).astype(np.int32)]
training = [cases[i] for i in id]
validation = list(set(cases).difference(training))

# define parser for filenames
def parser(file):
    name = os.path.splitext(file)[0]
    case = name.split(".")[2]
    roi = ".".join([name.split(".")[1]] + name.split(".")[-3:])
    return case, roi


# generate training, validation datasets
ds_train_roi = dataset(path, parser, parser, train_tile, training)
ds_validation_roi = dataset(path, parser, parser, 0, validation)

# build training dataset
ds_train_roi = ds_train_roi.map(lambda x, y, z: (*crop(x, y, width, height, min_area_thresh), z))
ds_train_roi = ds_train_roi.map(lambda x, y, z: (*flip(x, y), z))
ds_train_roi = ds_train_roi.map(lambda x, y, z: (x, jitter(y, 0.05), z))
ds_train_roi = ds_train_roi.map(lambda x, y, z: (x, shrink(y, 0.05), z))
ds_train_roi = ds_train_roi.prefetch(tf.data.experimental.AUTOTUNE)

# build validation datasets
ds_validation_roi = ds_validation_roi.prefetch(tf.data.experimental.AUTOTUNE)

<h2>Create and train detection model - DCC example</h2>

In [None]:
# import network generation and training packages
from histomics_detect.networks.rpns import rpn
from histomics_detect.models.faster_rcnn import FasterRCNN

# choices for anchor sizes - all anchors 1:1 aspect ratio
anchor_px = tf.constant([32, 64, 96], dtype=tf.int32)  # width/height of square anchors in pixels at input mag.

# feature network parameters
backbone_stride = 1  # strides in feature generation network convolution
backbone_blocks = 14  # number of residual blocks to use in backbone
backbone_dimension = 256  # number of features generated by rpn convolution

# rpn network parameters
rpn_kernel = [3]  # kernel size for rpn convolution
rpn_act_conv = ["relu"]  # activation for rpn convolutional layers

# anchor filtering parameters
neg_max = 128  # maximum number of negative/positive anchors to keep in each roi
pos_max = 128
rpn_lmbda = 10.0  # weighting for rpn regression loss
roialign_tiles = 3.0  # roialign - number of horizontal/vertical tiles in a proposal
roialing_pool = 2.0  # roialign - number of horizontal/vertical samples in each tile

# create backbone and rpn networks
resnet50 = tf.keras.applications.ResNet50(
    include_top=False,
    weights="imagenet",
    input_tensor=None,
    input_shape=(train_tile, train_tile, 3),
    pooling=None,
)
rpnetwork, backbone = rpn(
    resnet50,
    n_anchors=tf.size(anchor_px),
    stride=backbone_stride,
    blocks=backbone_blocks,
    kernels=rpn_kernel,
    dimensions=[backbone_dimension],
    activations=rpn_act_conv,
)

# create FasterRCNN keras model
model = FasterRCNN(rpnetwork, backbone, [width, height], anchor_px, rpn_lmbda)

# compile FasterRCNN model with losses
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss=[
        tf.keras.losses.BinaryCrossentropy(from_logits=True),
        tf.keras.losses.Huber(),
    ],
)

# fit FasterRCNN model
model.fit(
    x=ds_train_roi,
    batch_size=1,
    epochs=number_epochs,
    verbose=1,
    validation_data=ds_validation_roi,
    validation_freq=number_epochs,
)

<h2>Define dataset parameters and create datasets - DLBCL example</h2>

In [None]:
# import dataset related packages
from histomics_detect.io import dataset, resize
from histomics_detect.augmentation import crop, flip, jitter, shrink
from histomics_detect.visualization import plot_inference
import numpy as np
import os

# input data path
path = "/tf/notebooks/DLBCL/detection/"

# training parameters
train_tile = 224  # input image size
min_area_thresh = 0.5  # % of object area that must be in crop to be included
width = tf.constant(train_tile, tf.int32)
height = tf.constant(train_tile, tf.int32)
min_area = tf.constant(min_area_thresh, tf.float32)

# define filename parsers
def png_parser(png):
    file = os.path.splitext(png)[0]
    case = file.split(".")[0]
    roi = ".".join(file.split(".")[1:])
    return case, roi


def csv_parser(csv):
    file = os.path.splitext(csv)[0]
    case = file.split(".")[0]
    roi = ".".join(file.split(".")[1:2] + file.split(".")[-3:])
    return case, roi


training = [
    "DCBT_2_CMYC",
    "DCBT_3_CMYC",
    "DCBT_5_CMYC",
    "DCBT_9_CMYC",
    "DCBT_10_CMYC",
    "DCBT_12_CMYC",
    "DCBT_14_CMYC",
    "DCBT_18_CMYC",
    "DCBT_19_CMYC",
    "DCBT_20_CMYC",
    "DCBT_21_CMYC",
    "DCBT_22_CMYC",
]
validation = [
    "DCBT_1_CMYC",
    "DCBT_4_CMYC",
    "DCBT_6_CMYC",
    "DCBT_8_CMYC",
    "DCBT_11_CMYC",
    "DCBT_13_CMYC",
    "DCBT_15_CMYC",
    "DCBT_16_CMYC",
    "DCBT_17_CMYC",
]


# generate training, validation datasets
ds_train_roi = dataset(path, png_parser, csv_parser, train_tile, training)
ds_validation_roi = dataset(path, png_parser, csv_parser, 0, validation)

# build training dataset
ds_train_roi = ds_train_roi.map(lambda x, y, z: (*resize(x, y, 2.0), z))
ds_train_roi = ds_train_roi.map(lambda x, y, z: (*crop(x, y, width, height, min_area_thresh), z))
ds_train_roi = ds_train_roi.map(lambda x, y, z: (*flip(x, y), z))
ds_train_roi = ds_train_roi.map(lambda x, y, z: (x, jitter(y, 0.05), z))
ds_train_roi = ds_train_roi.map(lambda x, y, z: (x, shrink(y, 0.05), z))
ds_train_roi = ds_train_roi.prefetch(tf.data.experimental.AUTOTUNE)

# build validation datasets
ds_validation_roi = ds_validation_roi.map(lambda x, y, z: (*resize(x, y, 2.0), z))
ds_validation_roi = ds_validation_roi.prefetch(tf.data.experimental.AUTOTUNE)

<h2>Create and train detection model - DLBCL example</h2>

In [None]:
# import network generation and training packages
from histomics_detect.networks.rpns import rpn
from histomics_detect.models.faster_rcnn import FasterRCNN

# choices for anchor sizes - all anchors 1:1 aspect ratio
anchor_px = tf.constant([32, 48, 64], dtype=tf.int32)  # width/height of square anchors in pixels at input mag.

# feature network parameters
backbone_stride = 1  # strides in feature generation network convolution
backbone_blocks = 14  # number of residual blocks to use in backbone
backbone_dimension = 256  # number of features generated by rpn convolution

# rpn network parameters
rpn_kernel = [3]  # kernel size for rpn convolution
rpn_act_conv = ["relu"]  # activation for rpn convolutional layers

# anchor filtering parameters
neg_max = 128  # maximum number of negative/positive anchors to keep in each roi
pos_max = 128
rpn_lmbda = 10.0  # weighting for rpn regression loss
roialign_tiles = 3.0  # roialign - number of horizontal/vertical tiles in a proposal
roialing_pool = 2.0  # roialign - number of horizontal/vertical samples in each tile

# create backbone and rpn networks
resnet50 = tf.keras.applications.ResNet50(
    include_top=False,
    weights="imagenet",
    input_tensor=None,
    input_shape=(train_tile, train_tile, 3),
    pooling=None,
)
rpnetwork, backbone = rpn(
    resnet50,
    n_anchors=tf.size(anchor_px),
    stride=backbone_stride,
    blocks=backbone_blocks,
    kernels=rpn_kernel,
    dimensions=[backbone_dimension],
    activations=rpn_act_conv,
)

# create FasterRCNN keras model
model = FasterRCNN(rpnetwork, backbone, [width, height], anchor_px, rpn_lmbda)

# compile FasterRCNN model with losses
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss=[
        tf.keras.losses.BinaryCrossentropy(from_logits=True),
        tf.keras.losses.Huber(),
    ],
)

# fit FasterRCNN model
model.fit(
    x=ds_train_roi,
    batch_size=1,
    epochs=number_epochs,
    verbose=1,
    validation_data=ds_validation_roi,
    validation_freq=number_epochs,
)

<h2>Inference on a single image - model.call() </h2>

In [None]:
# generate and visualize thresholded, roialign outputs
data = ds_validation_roi.shuffle(100).take(1).get_single_element()
rgb = tf.cast(data[0], tf.uint8)
regressions = model(rgb, tau=0.5, nms_iou=0.3)
plot_inference(rgb, regressions)

<h2>Raw inference on a single image - model.raw() </h2>

In [None]:
# generate raw rpn outputs
objectness, boxes, features = model.raw(rgb)

# threshold rpn proposals
boxes_positive, objectness_positive, positive = model.threshold(boxes, objectness, model.tau)

# perform non-max suppression on rpn positive predictions
boxes_nms, objectness_nms, selected = model.nms(boxes_positive, objectness_positive, model.nms_iou)

# generate roialign predictions for rpn positive predictions
align_boxes = model.align(boxes_nms, features, model.field, model.pool, model.tiles)

# apply thresholding, nms, and roialign
plot_inference(rgb, align_boxes)

<h2>Batch inference using tf.data.Dataset.map </h2>

In [None]:
# mapping model using data.Dataset.map keeps outputs from different images separate
map_output = ds_validation_roi.take(5).map(lambda x, y, z: (model(x), y, z))
map_output = [element for element in map_output]

# compare to using model.predict which merges the outputs from all images
predict_output = model.predict(ds_validation_roi.take(5))

<h2>Batch evaluation - model.evaluate() </h2>

In [None]:
# performance evaluation on multiple images from a tf.data.Dataset
metrics = model.evaluate(ds_validation_roi)

<h2>Read in one or more whole-slide images and create a tensorflow dataset of tiles.</h2>

In [None]:
print("TensorFlow version: ", tf.__version__)
device_name = tf.test.gpu_device_name()
if not device_name:
    raise SystemError("GPU device not found")
print("Found GPU at: {}".format(device_name))

# Options for tensorflow dataset map operations
dataset_map_options = {
    "num_parallel_calls": tf.data.experimental.AUTOTUNE,
    "deterministic": False,
}

# The list of whole-slide images to process
all_wsi_images = ["/tf/notebooks/histomics_detect/example/DCBT_10_CMYC.svs"]
print(f"Image source = {all_wsi_images}")

# We will use a mask to determine which tiles of a slide to process.  In this example we will
# build the mask name from the WSI file name, by inserting "-mask" and changing the file type to
# "png", but generally any file name and any file type that we can read as an image will do.
all_masks = [re.sub(r"^(.*)\.([^\.]*)$", r"\1-mask.png", wsi) for wsi in all_wsi_images]
# Or, instead use no masks
all_masks = ["" for wsi in all_wsi_images]
print(f"all_masks = {all_masks}")

# Create a tensorflow dataset that knows something about our whole-slide images
header = dict(
    hs.ds.init.Header(
        slides="DCBT_10_CMYC",
        filenames=all_wsi_images,
        cases="DCBT_10",
        magnifications=20.0,
        read_modes="tiled",
        mask_filenames=all_masks,
    )
)
slides = tf.data.Dataset.from_tensor_slices(header)

# For the desired magnification, find the best level stored in the image file, and its associated
# factor, width, and height.
compute_read_parameters = hs.dsm.wsi.ComputeReadParameters()
slides = slides.map(compute_read_parameters, **dataset_map_options)

# Specify size, overlap, etc. information about the tiles that we want to analyze.
add_tile_description = hs.dsm.wsi.AddTileDescription(
    tile_width=tf.constant(224, dtype=tf.int32),
    tile_height=tf.constant(224, dtype=tf.int32),
    overlap_width=tf.constant(0, dtype=tf.int32),
    overlap_height=tf.constant(0, dtype=tf.int32),
    chunk_width_factor=tf.constant(8, dtype=tf.int32),
    chunk_height_factor=tf.constant(8, dtype=tf.int32),
)
slides = slides.map(add_tile_description, **dataset_map_options)

# If there are any then read in the masks, one per slide, that specify tile selction.  If they are
# not already then downsample (or upsample) the masks to be one pixel per tile.
compute_resampled_mask = hs.dsm.wsi.ComputeResampledMask()
slides = slides.map(compute_resampled_mask, **dataset_map_options)

# Split each element (e.g. each slide) into a batch of multiple rows, one per chunk to be read.
# Note that the width `cw` or height `ch` of a row (chunk) may decreased from the requested value if
# a chunk is near the edge of an image.  Note that it is important to call `.unbatch()` when it is
# desired that the chunks be not batched by slide.
compute_chunk_positions = hs.dsm.wsi.ComputeChunkPositions()
chunks = slides.map(compute_chunk_positions, **dataset_map_options).prefetch(tf.data.experimental.AUTOTUNE).unbatch()

# Read and split the chunks into the tile size we want.  Note that it is important to call
# `.unbatch()` when it is desired that the tiles be not batched by chunk.
read_and_split_chunk = hs.dsm.chunk.ReadAndSplitChunk()
tiles = chunks.map(read_and_split_chunk, **dataset_map_options).prefetch(tf.data.experimental.AUTOTUNE).unbatch()

# Export the tile's pixel data from the dictionary to top level
tiles = tiles.map(lambda elem: (elem.pop("tile"), elem), **dataset_map_options)

# Convert pixel data to uint8
tiles = tiles.map(lambda x, y: (tf.cast(x, tf.uint8), y), **dataset_map_options)

# Run the model on the tiles
tiles = tiles.map(lambda x, y: (x, model(x, tau=0.5, nms_iou=0.3), y), **dataset_map_options)

<h3>Find a tile with many detections</h3>

In [None]:
tiles2 = tiles
tiles2 = tiles2.map(lambda x, p, y: (x, p, tf.shape(p)[0], y), **dataset_map_options)
tiles2 = tiles2.take(1000)

max_number_detections = 0
for tile in tiles2:
    rgb, regressions, number_detections, _ = tile
    if number_detections >= tf.maximum(1, max_number_detections):
        max_number_detections = number_detections
        tf.print(f"Found number_detections = {number_detections}")

tf.print(f"max_number_detections = {max_number_detections}")

for tile in tiles2:
    rgb, regressions, number_detections, _ = tile
    if number_detections == max_number_detections:
        tf.print(f"max_number_detections = {max_number_detections}")
        plot_inference(rgb, regressions)

<h2>Save and Load Model Weights</h2>

In [None]:
# save checkpoint
model.save_weights("/tf/notebooks/histomics_detect/example/saved_model/")

# create dummy network for restore
restored = FasterRCNN(rpnetwork, backbone, [width, height], anchor_px, rpn_lmbda)
restored.load_weights("/tf/notebooks/histomics_detect/example/saved_model/")

# check that outputs are same
assert tf.math.reduce_all(tf.math.equal(restored(rgb), model(rgb)))