<a href="https://colab.research.google.com/github/Dmitri9149/TensorFlow-PyTorch-basics/blob/master/TensorFlow_Padding_Stride_MultiChannels_Pooling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [244]:
# -U: Upgrade all packages to the newest available version
!pip install -U d2l
from d2l import tensorflow as d2l
import tensorflow as tf

Requirement already up-to-date: d2l in /usr/local/lib/python3.6/dist-packages (0.15.1)


Exercises and some code modification/experimentation for the d2l.ai book : http://d2l.ai/ 

Padding

In [245]:
### initialize convolutional layer , make some changes in sizes of the layer 
### with the help of padding
def comp_conv2d(conv2d, X):
  ### (1,1) correspond to batch size and num of channels
  X = tf.reshape(X, (1,) +  X.shape + (1,))
  Y = conv2d(X)
### elim dimentions we do not need
  return tf.reshape(Y,Y.shape[1:3])

In [246]:
conv2d=tf.keras.layers.Conv2D(1, kernel_size = 3, padding = 'same')
X = tf.random.uniform(shape=(8,8))
comp_conv2d(conv2d,X).shape

TensorShape([8, 8])

Stride

In [247]:
conv2d = tf.keras.layers.Conv2D(1, kernel_size=3, padding='same', strides=2)
comp_conv2d(conv2d, X).shape

TensorShape([4, 4])

In [248]:
conv2d = tf.keras.layers.Conv2D(1, kernel_size=(3,5), padding='valid',
                                strides=(3, 4))
comp_conv2d(conv2d, X).shape

TensorShape([2, 1])

MultiChannels

In [249]:
### input X and kernel K , compute cross correlation
def corr2d(X, K):  #@save
    h, w = K.shape
    Y = tf.Variable(tf.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1)))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j].assign(tf.reduce_sum(
                X[i: i + h, j: j + w] * K))
    return Y

In [250]:
def corr2d_multi_in(X,K):
## iterate via the first dimention (channels); sum results together
## zip 'list' of 2D inputs and kernels:
  return tf.reduce_sum([corr2d(x,k) for x,k in zip(X,K) ], axis = 0)

In [251]:
X = tf.constant([[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]],
               [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]])
K = tf.constant([[[0.0, 1.0], [2.0, 3.0]], [[1.0, 2.0], [3.0, 4.0]]])

corr2d_multi_in(X, K)

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[ 56.,  72.],
       [104., 120.]], dtype=float32)>

In [252]:
### multiple output and input
### size of input is (i, h, w) size of kernel is (o,i,h_k,w_k) 
def corr2d_multi_in_out(X,K):
## iterate via o -> first dim of kernel and at every step make cross correlation 
### with input X
############# iterate over first dim of K 
  return tf.stack([corr2d_multi_in(X,k) for k in K], 0)

In [253]:
K = tf.stack([K,K+1,K+2])
K.shape

TensorShape([3, 2, 2, 2])

In [254]:
corr2d_multi_in_out(X, K)

<tf.Tensor: shape=(3, 2, 2), dtype=float32, numpy=
array([[[ 56.,  72.],
        [104., 120.]],

       [[ 76., 100.],
        [148., 172.]],

       [[ 96., 128.],
        [192., 224.]]], dtype=float32)>

1x1 Convolutional Layer

In [255]:
def corr2d_multi_in_out_1x1(X,K):
  i,h,w=X.shape
  o=K.shape[0]
  X=tf.reshape(X,(i,h*w))
  K=tf.reshape(K,(o,i))
  Y=tf.matmul(K,X)
  return tf.reshape(Y,(o,h,w))



In [256]:
X = tf.random.normal((2, 3, 3), 0, 1)
K = tf.random.normal((4, 2, 1, 1), 0, 1)

In [257]:
Y1 = corr2d_multi_in_out_1x1(X, K)
Y2 = corr2d_multi_in_out(X, K)
assert float(tf.reduce_sum(tf.abs(Y1 - Y2))) < 1e-6

In [258]:
Y1

<tf.Tensor: shape=(4, 3, 3), dtype=float32, numpy=
array([[[-0.8358883 ,  0.03057848,  0.20615911],
        [-0.32611662,  0.08338056,  0.31907773],
        [ 0.16972256, -0.370436  , -0.16872945]],

       [[ 1.2980917 ,  0.03476962, -0.3857175 ],
        [-0.04552719,  0.12434784, -0.4914438 ],
        [-0.17279609, -0.2023155 ,  0.02436534]],

       [[-0.58255965, -0.05034504,  0.20079352],
        [ 0.25355658, -0.16301186,  0.21883303],
        [ 0.03920908,  0.4192086 ,  0.08944239]],

       [[-1.7173921 , -0.12125629,  0.57029223],
        [ 0.5652247 , -0.39674374,  0.64646524],
        [ 0.14556289,  0.9790698 ,  0.1851998 ]]], dtype=float32)>

Pooling

In [259]:
def pool2d(X, pool_size, mode='max'):
  h,w=pool_size
  Y=tf.Variable(tf.zeros((X.shape[0]-h+1,X.shape[1]-w+1)))
  for i in range (Y.shape[0]):
    for j in range (Y.shape[1]):
      if mode == 'max':
        Y[i,j].assign(tf.reduce_max(X[i:i+h, j:j+w]))
      if mode == 'avg':
        Y[i,j].assign(tf.reduce_mean(X[i:i+h, j:j+w]))

  return Y




In [260]:
X = tf.constant([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
pool2d(X, (2, 2))

<tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
array([[4., 5.],
       [7., 8.]], dtype=float32)>

In [261]:
pool2d(X, (2, 2), 'avg')

<tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
array([[2., 3.],
       [5., 6.]], dtype=float32)>

Padding and Stride


In [262]:
Z = tf.range(16, dtype = tf.float32)
#Z -> (4,4) size tensor, 
# X reshape Z to 4d tensor with 1 sample, num of channels = 1, (4,4) input
X = tf.reshape(Z,(1,4,4,1))
X

<tf.Tensor: shape=(1, 4, 4, 1), dtype=float32, numpy=
array([[[[ 0.],
         [ 1.],
         [ 2.],
         [ 3.]],

        [[ 4.],
         [ 5.],
         [ 6.],
         [ 7.]],

        [[ 8.],
         [ 9.],
         [10.],
         [11.]],

        [[12.],
         [13.],
         [14.],
         [15.]]]], dtype=float32)>

In [263]:
## defaulr padding and stride
pool2d = tf.keras.layers.MaxPool2D(pool_size = [3,3])
pool2d(X)

<tf.Tensor: shape=(1, 1, 1, 1), dtype=float32, numpy=array([[[[10.]]]], dtype=float32)>

In [264]:
## specify padding and stride manually
pool2d = tf.keras.layers.MaxPool2D(pool_size = [3,3], padding = 'same', strides = 2)
pool2d(X)


<tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy=
array([[[[10.],
         [11.]],

        [[14.],
         [15.]]]], dtype=float32)>

In [265]:
pool2d = tf.keras.layers.MaxPool2D(pool_size=[2, 3], padding='same',
                                   strides=(2, 3))
pool2d(X)

<tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy=
array([[[[ 5.],
         [ 7.]],

        [[13.],
         [15.]]]], dtype=float32)>

Multiple channels : pooling pools each input channel separatelly, number of channels is not changing.

In [266]:
## construct tensor with 3 channels , num of sample = 1, (4,4) 2d input
Z = tf.stack([X,X+1,X+2],0)
X = tf.reshape(Z,(1,4,4,3))
X

<tf.Tensor: shape=(1, 4, 4, 3), dtype=float32, numpy=
array([[[[ 0.,  1.,  2.],
         [ 3.,  4.,  5.],
         [ 6.,  7.,  8.],
         [ 9., 10., 11.]],

        [[12., 13., 14.],
         [15.,  1.,  2.],
         [ 3.,  4.,  5.],
         [ 6.,  7.,  8.]],

        [[ 9., 10., 11.],
         [12., 13., 14.],
         [15., 16.,  2.],
         [ 3.,  4.,  5.]],

        [[ 6.,  7.,  8.],
         [ 9., 10., 11.],
         [12., 13., 14.],
         [15., 16., 17.]]]], dtype=float32)>

In [267]:
pool2d = tf.keras.layers.MaxPool2D(pool_size = (2,2), padding='same', strides=2)
pool2d(X)

<tf.Tensor: shape=(1, 2, 2, 3), dtype=float32, numpy=
array([[[[15., 13., 14.],
         [ 9., 10., 11.]],

        [[12., 13., 14.],
         [15., 16., 17.]]]], dtype=float32)>

In [268]:
### it seems d2l.ai book hs mistakes at 6.5.3 chapter
## the channel dimention is the last dimention (dim =4) in tf
## due to the code they demonstrate the number of channels changes 
## the right code is like this

X = tf.reshape(tf.range(16, dtype=tf.float32), (1, 4, 4, 1))
X


<tf.Tensor: shape=(1, 4, 4, 1), dtype=float32, numpy=
array([[[[ 0.],
         [ 1.],
         [ 2.],
         [ 3.]],

        [[ 4.],
         [ 5.],
         [ 6.],
         [ 7.]],

        [[ 8.],
         [ 9.],
         [10.],
         [11.]],

        [[12.],
         [13.],
         [14.],
         [15.]]]], dtype=float32)>

In [269]:
## the dim 4 -> number of channels = 2
## (4,4) -> 2d features 
X = tf.reshape(tf.stack([X, X+1], 0), (1,4, 4,2))
X

<tf.Tensor: shape=(1, 4, 4, 2), dtype=float32, numpy=
array([[[[ 0.,  1.],
         [ 2.,  3.],
         [ 4.,  5.],
         [ 6.,  7.]],

        [[ 8.,  9.],
         [10., 11.],
         [12., 13.],
         [14., 15.]],

        [[ 1.,  2.],
         [ 3.,  4.],
         [ 5.,  6.],
         [ 7.,  8.]],

        [[ 9., 10.],
         [11., 12.],
         [13., 14.],
         [15., 16.]]]], dtype=float32)>

In [270]:
## the dim 4 -> number of channels = 2
## (2,2) -> 2d features the size of 2d input has changed becaus of striding
pool2d = tf.keras.layers.MaxPool2D(3, padding='same', strides=2)
pool2d(X)

<tf.Tensor: shape=(1, 2, 2, 2), dtype=float32, numpy=
array([[[[12., 13.],
         [14., 15.]],

        [[13., 14.],
         [15., 16.]]]], dtype=float32)>