# Pooling


## Maximum Pooling and Average Pooling


In [None]:
import tensorflow as tf

def pool2d(X, pool_size, mode='max'):
    p_h, p_w = pool_size
    Y = tf.Variable(tf.zeros((X.shape[0] - p_h + 1, X.shape[1] - p_w + 1)))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if mode == 'max':
                Y[i, j].assign(tf.reduce_max(X[i:i + p_h, j:j + p_w]))
            elif mode == 'avg':
                Y[i, j].assign(tf.reduce_mean(X[i:i + p_h, j:j + p_w]))
    return Y

In [None]:
X = tf.constant([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
pool2d(X, (2, 2))

In [None]:
pool2d(X, (2, 2), 'avg')

## [**Padding and Stride**]



In [None]:
X = tf.reshape(tf.range(16, dtype=tf.float32), (1, 4, 4, 1))
X

In [None]:
pool2d = tf.keras.layers.MaxPool2D(pool_size=[3, 3])
pool2d(X)

In [None]:
paddings = tf.constant([[0, 0], [1, 0], [1, 0], [0, 0]])
X_padded = tf.pad(X, paddings, "CONSTANT")
pool2d = tf.keras.layers.MaxPool2D(pool_size=[3, 3], padding='valid',
                                   strides=2)
pool2d(X_padded)

In [None]:
paddings = tf.constant([[0, 0], [0, 0], [1, 1], [0, 0]])
X_padded = tf.pad(X, paddings, "CONSTANT")

pool2d = tf.keras.layers.MaxPool2D(pool_size=[2, 3], padding='valid',
                                   strides=(2, 3))
pool2d(X_padded)

## Multiple Channels


In [None]:
X = tf.concat([X, X + 1],
              3)  # Concatenate along `dim=3` due to channels-last syntax

In [None]:
paddings = tf.constant([[0, 0], [1, 0], [1, 0], [0, 0]])
X_padded = tf.pad(X, paddings, "CONSTANT")
pool2d = tf.keras.layers.MaxPool2D(pool_size=[3, 3], padding='valid',
                                   strides=2)
pool2d(X_padded)


## Exercises (Optional)

1. Can you implement average pooling as a special case of a convolution layer? If so, do it.
1. Can you implement maximum pooling as a special case of a convolution layer? If so, do it.
1. What is the computational cost of the pooling layer? Assume that the input to the pooling layer is of size $c\times h\times w$, the pooling window has a shape of $p_h\times p_w$ with a padding of $(p_h, p_w)$ and a stride of $(s_h, s_w)$.
1. Why do you expect maximum pooling and average pooling to work differently?
1. Do we need a separate minimum pooling layer? Can you replace it with another operation?
1. Is there another operation between average and maximum pooling that you could consider (hint: recall the softmax)? Why might it not be so popular?
