### ROI-pooling layer tutorial CPU tensorflow

We compare two implementations of the ROI pooling layer to understand their differences in convenience/performance. This is meant for CPU only.

#### Loading the libraries

After having compiled, with make in the roi-pooling directory:

In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops

wd = os.getcwd() # get the working directory
# wd = wd + '/example/of/new/path/'

In [2]:
roi_pooling_module_1 = tf.load_op_library(wd+"/module_1/roi_pooling.so")
roi_pooling_op_1 = roi_pooling_module_1.roi_pooling1
roi_pooling_grad_op_1 = roi_pooling_module_1.roi_pooling1_grad

In [None]:
roi_pooling_module_2 = tf.load_op_library(wd+"/module_2/roi_pooling.so")
roi_pooling_op_2 = roi_pooling_module_2.roi_pooling2
roi_pooling_module_2_grad = tf.load_op_library(wd+"/module_2/roi_pooling_op_grad.so")
roi_pooling_grad_op_2 = roi_pooling_module_2_grad.roi_pooling2_grad

In [3]:
init = tf.global_variables_initializer()
sess = tf.Session()

#### Checking the inference on simple examples [Forward pass]

We perform <a href="https://blog.deepsense.ai/region-of-interest-pooling-explained/">ROI pooling</a> only caring about the forward pass in the first place.

In [4]:
w,h = 7,7
test_array = np.arange(1,1+w).reshape(-1,1)*np.arange(1,1+h).reshape(1,-1)
test_array

array([[ 1,  2,  3,  4,  5,  6,  7],
       [ 2,  4,  6,  8, 10, 12, 14],
       [ 3,  6,  9, 12, 15, 18, 21],
       [ 4,  8, 12, 16, 20, 24, 28],
       [ 5, 10, 15, 20, 25, 30, 35],
       [ 6, 12, 18, 24, 30, 36, 42],
       [ 7, 14, 21, 28, 35, 42, 49]])

In [5]:
input_1 = test_array.reshape(1,w,h,1).astype(np.float32)
print("Input shape: ",input_1.shape)
input_tf_1 = tf.constant(input_1)
rois_tf_1 = tf.constant([(0,0,0, h, w)], dtype=tf.float32)
output_tf_1 = roi_pooling_op_1(input_tf_1, rois_tf_1,2,2,1.0)[0]
output_1 = sess.run(output_tf_1)
print("Output shape: ",output_1.shape)
output_1.squeeze()

('Input shape: ', (1, 7, 7, 1))
('Output shape: ', (1, 2, 2, 1))


array([[ 16.,  28.],
       [ 28.,  49.]], dtype=float32)

In [6]:
input_2 = test_array.reshape(1,1,h,w).astype(np.float32)
print("Input shape: ",input_2.shape)
input_tf_2 = tf.constant(input_2)
rois_tf_2 = tf.constant([[(0, 0, h, w)]])
output_shape_tf_2 = tf.constant((2, 2))
output_tf_2, argmax_tf_2 = roi_pooling_op_2(input_tf_2, rois_tf_2, output_shape_tf_2)
output_2, argmax_2 = sess.run([output_tf_2, argmax_tf_2])
print("Output shape: ",output_2.shape)
output_2.squeeze()

('Input shape: ', (1, 1, 7, 7))
('Output shape: ', (1, 1, 1, 2, 2))


array([[  9.,  18.],
       [ 18.,  36.]], dtype=float32)

#### Checking the gradient  [Backward pass]

In [7]:
@ops.RegisterShape("RoiPooling1")
def _roi_pool_shape(op):
    dims_data = op.inputs[0].get_shape().as_list()
    channels = dims_data[3]
    dims_rois = op.inputs[1].get_shape().as_list()
    num_rois = dims_rois[0]

    pooled_height = op.get_attr('pooled_height')
    pooled_width = op.get_attr('pooled_width')

    output_shape = tf.TensorShape([num_rois, pooled_height, pooled_width, channels])
    return [output_shape, output_shape]

@ops.RegisterGradient("RoiPooling1")
def _roi_pool_grad(op, grad, _):
    data = op.inputs[0]
    rois = op.inputs[1]
    argmax = op.outputs[1]
    pooled_height = op.get_attr('pooled_height')
    pooled_width = op.get_attr('pooled_width')
    spatial_scale = op.get_attr('spatial_scale')

    # compute gradient - changed - roi_pooling_op.roi_pool_grad
    data_grad = roi_pooling_grad_op_1(data, rois, argmax, grad, pooled_height, pooled_width, spatial_scale)

    return [data_grad, None]  # List of one Tensor, since we have one input

In [8]:
# Here we register our gradient op as the gradient function for our ROI pooling op. 
@ops.RegisterGradient("RoiPooling2")
def _roi_pooling_grad(op, grad0, grad1):
    # The input gradients are the gradients with respect to the outputs of the pooling layer
    input_grad = grad0
    
    # We need the argmax data to compute the gradient connections
    argmax = op.outputs[1]
    
    # Grab the shape of the inputs to the ROI pooling layer
    input_shape = array_ops.shape(op.inputs[0])
    
    # Compute the gradient -> roi_pooling_op_grad is not defined ...
    backprop_grad = roi_pooling_grad_op_2(input_grad, argmax, input_shape)
    
    # Return the gradient for the feature map, but not for the other inputs
    return [backprop_grad, None, None]

In [9]:
variable_1 = tf.get_variable(
    "variable_1",
    shape=[1,w,h,1],
    dtype=tf.float32,
    initializer=tf.constant_initializer(input_1))

In [10]:
variable_2 = tf.get_variable(
    "variable_2",
    shape=[1,1,h,w],
    dtype=tf.float32,
    initializer=tf.constant_initializer(input_2))

In [11]:
init=tf.global_variables_initializer()
sess.run(init)

In [12]:
output_tf_1 = roi_pooling_op_1(variable_1, rois_tf_1,2,2,1.0)[0]
loss_tf_1 = tf.reduce_sum(output_tf_1)
grad_tf_1 = tf.gradients(loss_tf_1,variable_1)
grad_1 = sess.run(grad_tf_1[0])
np.squeeze(grad_1)

array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.,  0.,  1.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.,  0.,  1.]], dtype=float32)

In [13]:
output_tf_2, argmax_tf_2 = roi_pooling_op_2(variable_2, rois_tf_2, output_shape_tf_2)
output_2, argmax_2 = sess.run([output_tf_2, argmax_tf_2])
loss_tf_2 = tf.reduce_sum(output_tf_2)
grad_tf_2 = tf.gradients(loss_tf_2,variable_2)
grad_2 = sess.run(grad_tf_2[0])
grad_2.squeeze()

array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.]], dtype=float32)