<h2>Import packages and install histomics_detect</h2>

In [None]:
import sys
import tensorflow as tf

#install histomics_detect
!pip install -e /tf/notebooks/histomics_detect

#add to system path
sys.path.append('/tf/notebooks/histomics_detect/')

<h2>Define dataset parameters and create datasets - DCC example</h2>

In [None]:
#import dataset related packages
from histomics_detect.io import dataset, resize
from histomics_detect.augmentation import crop, flip, jitter, shrink
from histomics_detect.visualization import plot_inference
import numpy as np
import os

#input data path
path = '/tf/notebooks/DCC/data/'

#training parameters
train_tile = 224 #input image size
min_area_thresh = 0.5 # % of object area that must be in random crop to be included
width = tf.constant(train_tile, tf.int32)
height = tf.constant(train_tile, tf.int32)
min_area = tf.constant(min_area_thresh, tf.float32)

#split dataset into training and validation
cases = ['131458', '91315_leica_at2_40x', '135062', '93094',
         '131453', '131450', '135060', '131463', '131459',
         '131440', '131460', '93096', '131449', '131457',
         '131461', '93098', '131447', '93092', '131443',
         '93095', '131448', '93099', '91316_leica_at2_40x', '131462',
         '93091', '135065', '131446', '131441', '101626',
         '93093', '131454', '93097', '131445', '131444',
         '131456', '93090']
id = np.argsort(np.random.rand(len(cases)-1))[0:np.ceil(0.9*len(cases)).astype(np.int32)]
training = [cases[i] for i in id]
validation = list(set(cases).difference(training))

#define parser for filenames
def parser(file):
    name = os.path.splitext(file)[0]
    case = name.split('.')[2]
    roi = '.'.join([name.split('.')[1]] + name.split('.')[-3:])
    return case, roi

#generate training, validation datasets
ds_train_roi = dataset(path, parser, parser, train_tile, training)
ds_validation_roi = dataset(path, parser, parser, 0, validation)

#build training dataset
ds_train_roi = ds_train_roi.map(lambda x, y, z: (*crop(x, y, width, height, 
                                                                 min_area_thresh), z))
ds_train_roi = ds_train_roi.map(lambda x, y, z: (*flip(x, y), z))
ds_train_roi = ds_train_roi.map(lambda x, y, z: (x, jitter(y, 0.05), z))
ds_train_roi = ds_train_roi.map(lambda x, y, z: (x, shrink(y, 0.05), z))
ds_train_roi = ds_train_roi.prefetch(tf.data.experimental.AUTOTUNE)

#build validation datasets
ds_validation_roi = ds_validation_roi.prefetch(tf.data.experimental.AUTOTUNE)

<h2>Create and train detection model - DCC example</h2>

In [None]:
#import network generation and training packages
from histomics_detect.networks.rpns import rpn
from histomics_detect.models.faster_rcnn import FasterRCNN

#choices for anchor sizes - all anchors 1:1 aspect ratio
anchor_px = tf.constant([32, 64, 96], dtype=tf.int32) #width/height of square anchors in pixels at input mag.

#feature network parameters
backbone_stride = 1 #strides in feature generation network convolution
backbone_blocks = 14 #number of residual blocks to use in backbone
backbone_dimension = 256 #number of features generated by rpn convolution

#rpn network parameters
rpn_kernel = [3] #kernel size for rpn convolution
rpn_act_conv = ['relu'] #activation for rpn convolutional layers

#anchor filtering parameters
neg_max = 128 #maximum number of negative/positive anchors to keep in each roi
pos_max = 128
rpn_lmbda = 10.0 #weighting for rpn regression loss
roialign_tiles = 3.0 #roialign - number of horizontal/vertical tiles in a proposal
roialing_pool = 2.0 #roialign - number of horizontal/vertical samples in each tile

#create backbone and rpn networks
resnet50 = tf.keras.applications.ResNet50(
    include_top=False, weights='imagenet', input_tensor=None,
    input_shape=(train_tile, train_tile, 3), pooling=None)
rpnetwork, backbone = rpn(resnet50, n_anchors=tf.size(anchor_px),
                          stride=backbone_stride, blocks=backbone_blocks, 
                          kernels=rpn_kernel, dimensions=[backbone_dimension],
                          activations=rpn_act_conv)

#create FasterRCNN keras model
model = FasterRCNN(rpnetwork, backbone, [width, height], anchor_px, rpn_lmbda)

#compile FasterRCNN model with losses
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
              loss=[tf.keras.losses.BinaryCrossentropy(from_logits=True),
                    tf.keras.losses.Huber()])

#fit FasterRCNN model
model.fit(x=ds_train_roi, batch_size=1, epochs=50, verbose=1,
          validation_data=ds_validation_roi, validation_freq=50)

<h2>Define dataset parameters and create datasets - DLBCL example</h2>

In [None]:
#import dataset related packages
from histomics_detect.io import dataset
from histomics_detect.augmentation import crop, flip, jitter, shrink
import numpy as np
import os

#input data path
path = '/tf/notebooks/DLBCL/detection/'

#training parameters
train_tile = 224 #input image size
min_area_thresh = 0.5 # % of object area that must be in crop to be included
width = tf.constant(train_tile, tf.int32)
height = tf.constant(train_tile, tf.int32)
min_area = tf.constant(min_area_thresh, tf.float32)

#define filename parsers
def png_parser(png):
    file = os.path.splitext(png)[0]
    case = file.split('.')[0]
    roi = '.'.join(file.split('.')[1:])
    return case, roi

def csv_parser(csv):
    file = os.path.splitext(csv)[0]    
    case = file.split('.')[0]
    roi = '.'.join(file.split('.')[1:2] + file.split('.')[-3:])
    return case, roi

training = ['DCBT_2_CMYC', 'DCBT_3_CMYC', 'DCBT_5_CMYC',
            'DCBT_9_CMYC', 'DCBT_10_CMYC', 'DCBT_12_CMYC', 
            'DCBT_14_CMYC', 'DCBT_18_CMYC', 'DCBT_19_CMYC', 
            'DCBT_20_CMYC', 'DCBT_21_CMYC', 'DCBT_22_CMYC']
validation = ['DCBT_1_CMYC', 'DCBT_4_CMYC', 'DCBT_6_CMYC',
              'DCBT_7_CMYC', 'DCBT_8_CMYC', 'DCBT_11_CMYC',
              'DCBT_13_CMYC', 'DCBT_15_CMYC', 'DCBT_16_CMYC',
              'DCBT_17_CMYC']


#generate training, validation datasets
ds_train_roi = dataset(path, png_parser, csv_parser, train_tile, training)
ds_validation_roi = dataset(path, png_parser, csv_parser, 0, validation)

#build training dataset
ds_train_roi = ds_train_roi.map(lambda x, y, z: (*resize(x, y, 2.0), z))
ds_train_roi = ds_train_roi.map(lambda x, y, z: (*crop(x, y, width, height, 
                                                                 min_area_thresh), z))
ds_train_roi = ds_train_roi.map(lambda x, y, z: (*flip(x, y), z))
ds_train_roi = ds_train_roi.map(lambda x, y, z: (x, jitter(y, 0.05), z))
ds_train_roi = ds_train_roi.map(lambda x, y, z: (x, shrink(y, 0.05), z))
ds_train_roi = ds_train_roi.prefetch(tf.data.experimental.AUTOTUNE)

#build validation datasets
ds_validation_roi = ds_validation_roi.map(lambda x, y, z: (*resize(x, y, 2.0), z))
ds_validation_roi = ds_validation_roi.prefetch(tf.data.experimental.AUTOTUNE)

<h2>Create and train detection model - DLBCL example</h2>

In [None]:
#import network generation and training packages
from histomics_detect.networks.rpns import rpn
from histomics_detect.models.faster_rcnn import FasterRCNN

#choices for anchor sizes - all anchors 1:1 aspect ratio
anchor_px = tf.constant([32, 64, 96], dtype=tf.int32) #width/height of square anchors in pixels at input mag.

#feature network parameters
backbone_stride = 1 #strides in feature generation network convolution
backbone_blocks = 14 #number of residual blocks to use in backbone
backbone_dimension = 256 #number of features generated by rpn convolution

#rpn network parameters
rpn_kernel = [3] #kernel size for rpn convolution
rpn_act_conv = ['relu'] #activation for rpn convolutional layers

#anchor filtering parameters
neg_max = 128 #maximum number of negative/positive anchors to keep in each roi
pos_max = 128
rpn_lmbda = 10.0 #weighting for rpn regression loss
roialign_tiles = 3.0 #roialign - number of horizontal/vertical tiles in a proposal
roialing_pool = 2.0 #roialign - number of horizontal/vertical samples in each tile

#create backbone and rpn networks
resnet50 = tf.keras.applications.ResNet50(
    include_top=False, weights='imagenet', input_tensor=None,
    input_shape=(train_tile, train_tile, 3), pooling=None)
rpnetwork, backbone = rpn(resnet50, n_anchors=tf.size(anchor_px),
                          stride=backbone_stride, blocks=backbone_blocks, 
                          kernels=rpn_kernel, dimensions=[backbone_dimension],
                          activations=rpn_act_conv)

#create FasterRCNN keras model
model = FasterRCNN(rpnetwork, backbone, [width, height], anchor_px, rpn_lmbda)

#compile FasterRCNN model with losses
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
              loss=[tf.keras.losses.BinaryCrossentropy(from_logits=True),
                    tf.keras.losses.Huber()])

#fit FasterRCNN model
model.fit(x=ds_train_roi, batch_size=1, epochs=50, verbose=1,
          validation_data=ds_validation_roi, validation_freq=50)

<h2>Inference on a single image - model.call() </h2>

In [None]:
#generate and visualize thresholded, roialign outputs
data = list(ds_validation_roi.shuffle(100).take(1).as_numpy_iterator())[0]
rgb = data[0]
regressions = model(rgb, threshold=0.5, nms_iou=0.3)
plot_inference(rgb, regressions)

<h2>Raw inference on a single image - model.raw() </h2>

In [None]:
#generate and visualize raw rpn outputs
_, regressions, _ = model.raw(rgb)
plot_inference(rgb, regressions)

<h2>Batch inference using tf.data.Dataset.map </h2>

In [None]:
#mapping model using data.Dataset.map keeps outputs from different images separate 
map_output = list(ds_validation_roi.take(5).map(lambda x, y, z: (model(x), y, z)).as_numpy_iterator())

#compare to using model.predict which merges the outputs from all images
predict_output = model.predict(ds_validation_roi.take(5))

<h2>Batch evaluation - model.evaluate() </h2>

In [None]:
#performance evaluation on multiple images from a tf.data.Dataset
metrics = model.evaluate(ds_validation_roi)

<h2>Save and Load Model Weights</h2>

In [None]:
weight_path = "cpk"
model.save_weights(weight_path)
model_reload = FasterRCNN(rpnetwork, backbone, [width, height], anchor_px, rpn_lmbda)
model_reload.load_weights(weight_path)

align_reg = model.fastrcnn(interpolated)
align_reg_reload = model_reload.fastrcnn(interpolated)

assert tf.reduce_sum(tf.cast(align_reg_reload == align_reg, tf.int32)) == tf.math.reduce_prod(tf.shape(align_reg))