# **Intro to common CNN APIs**
此份程式碼會介紹在 CNN model 當中常使用的 Layers。

## 本章節大綱
* [Conv2D( filters, kernel_size, strides, use_bias)](#Conv2D)
  * [use_bias](#use-bias)
  * [Multi-Channels](#Multi-Channels-with-1-Filter)
  * [filters](#filters)
  * [kernel_size](#kernel-_-size)
  * [strides](#strides)
* [Flatten](#Flatten)
* [Padding](#Padding)
* [Pooling](#Pooling)

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Conv2D

In [1]:
# 下載並解壓縮課程所需檔案
!wget -q "https://github.com/TA-aiacademy/course_3.0/releases/download/CVCNN_Data/cnn_part2_data.zip"
!unzip -q cnn_part2_data.zip

In [3]:
input_img = np.array([[0, 0, 0, 0, 0, 0],
                      [0, 0, 0, 1, 1, 0],
                      [0, 1, 1, 1, 1, 0],
                      [0, 0, 1, 0, 1, 0],
                      [0, 0, 0, 1, 0, 0],
                      [0, 0, 0, 0, 0, 0]], dtype='float32')

In [4]:
input_img.shape

(6, 6)

In [5]:
input_img = input_img[np.newaxis, ..., np.newaxis]
print(input_img.shape)
print("(batch_size, height, width, channel)")

(1, 6, 6, 1)
(batch_size, height, width, channel)


* ## Conv2D
![](https://i.imgur.com/ziscEhS.gif)

In [6]:
def kernel_init(shape, dtype=None):
    filter_init = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype='float32')
    # height, width, channel, filters
    filter_init = filter_init.reshape((3, 3, 1, 1))
    return tf.Variable(filter_init)

In [7]:
conv_result = Conv2D(filters=1, kernel_size=(3, 3), strides=(1, 1),
                     kernel_initializer=kernel_init)(input_img)

2023-03-08 15:27:50.084893: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-08 15:27:50.765821: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10417 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:04:00.0, compute capability: 6.1
2023-03-08 15:27:51.237649: I tensorflow/stream_executor/cuda/cuda_dnn.cc:368] Loaded cuDNN version 8101


In [8]:
conv_result = conv_result.numpy()

In [10]:
print(conv_result.shape)
print(conv_result.squeeze())

(1, 4, 4, 1)
[[1. 1. 2. 1.]
 [2. 1. 2. 2.]
 [0. 3. 1. 2.]
 [0. 0. 2. 0.]]


[(back...)](#Convolution2D)

* ## use bias
![](https://i.imgur.com/3x4wMGO.gif)

In [11]:
bias_result = Conv2D(filters=1, kernel_size=(3, 3), strides=(1, 1),
                     kernel_initializer=kernel_init,
                     use_bias=True,
                     bias_initializer='ones')(input_img)

bias_result = bias_result.numpy()

In [12]:
print(bias_result.shape)
print(bias_result.squeeze())

(1, 4, 4, 1)
[[2. 2. 3. 2.]
 [3. 2. 3. 3.]
 [1. 4. 2. 3.]
 [1. 1. 3. 1.]]


[(back...)](#Convolution2D)

* ## Multi Channels with 1 Filter
![](https://i.imgur.com/NCivRaq.gif)
![](https://i.imgur.com/QEjI0jq.png)

In [13]:
input_img = np.load("./data/conv2d_multichannel_input.npy")
print(input_img.shape)
print(input_img.dtype)

(6, 6, 3)
int64


In [14]:
input_img = input_img[np.newaxis, ...]
print(input_img.shape)
print("(Batch_size, Height, Width, Channel)")

(1, 6, 6, 3)
(Batch_size, Height, Width, Channel)


In [15]:
input_img = input_img.astype("float32")
print(input_img.dtype)

float32


In [16]:
filter_init = np.load("./data/conv2d_multichannelfilter.npy")
print(filter_init.shape)
print("(Height, Width, Channel, Num of Filters)")

(3, 3, 3, 1)
(Height, Width, Channel, Num of Filters)


In [17]:
kernel_init = tf.constant_initializer(filter_init)

In [18]:
multichannel = Conv2D(filters=1, kernel_size=(3, 3), strides=(1, 1),
                      kernel_initializer=kernel_init)(input_img)

multichannel = multichannel.numpy()

In [19]:
print(multichannel.shape)
print(multichannel.squeeze())

(1, 4, 4, 1)
[[2. 3. 5. 3.]
 [5. 3. 5. 5.]
 [3. 4. 5. 6.]
 [1. 2. 5. 2.]]


[(back...)](#Convolution2D)

* ## filters
![](https://i.imgur.com/NCivRaq.gif)

In [20]:
multi_filter_init = np.zeros((3, 3, 3, 8))
for i in range(8):
    multi_filter_init[:, :, :, i] = filter_init.squeeze()
multi_filter_init = multi_filter_init.astype('float32')

print(multi_filter_init.shape)

(3, 3, 3, 8)


In [22]:
kernel_init = tf.constant_initializer(multi_filter_init)

In [23]:
multifilter = Conv2D(8, (3, 3), strides=(1, 1),
                     kernel_initializer=kernel_init)(input_img)

multifilter = multifilter.numpy()

In [24]:
print(multifilter.shape)
print(multifilter.squeeze())

(1, 4, 4, 8)
[[[2. 2. 2. 2. 2. 2. 2. 2.]
  [3. 3. 3. 3. 3. 3. 3. 3.]
  [5. 5. 5. 5. 5. 5. 5. 5.]
  [3. 3. 3. 3. 3. 3. 3. 3.]]

 [[5. 5. 5. 5. 5. 5. 5. 5.]
  [3. 3. 3. 3. 3. 3. 3. 3.]
  [5. 5. 5. 5. 5. 5. 5. 5.]
  [5. 5. 5. 5. 5. 5. 5. 5.]]

 [[3. 3. 3. 3. 3. 3. 3. 3.]
  [4. 4. 4. 4. 4. 4. 4. 4.]
  [5. 5. 5. 5. 5. 5. 5. 5.]
  [6. 6. 6. 6. 6. 6. 6. 6.]]

 [[1. 1. 1. 1. 1. 1. 1. 1.]
  [2. 2. 2. 2. 2. 2. 2. 2.]
  [5. 5. 5. 5. 5. 5. 5. 5.]
  [2. 2. 2. 2. 2. 2. 2. 2.]]]


[(back...)](#Convolution2D)

* ## strides
![](https://i.imgur.com/8XWHNqI.gif)

In [25]:
input_img = np.load("./data/conv2d_1channel_input.npy")

In [26]:
filter_init = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype='float32')
filter_init = filter_init.reshape((3, 3, 1, 1))
kernel_init = tf.constant_initializer(filter_init)

In [27]:
stride_result = Conv2D(1, (3, 3), strides=(2, 2),
                       kernel_initializer=kernel_init)(input_img)

stride_result = stride_result.numpy()

In [28]:
print(stride_result.shape)
print(stride_result.squeeze())

(1, 2, 2, 1)
[[1. 2.]
 [0. 1.]]


![](https://i.imgur.com/2XmNAct.jpg)

# Flatten

* [Way1-Reshape](#Way1---Reshape)
* [Way2-Flatten](#Way2---Flatten)

In [29]:
import numpy as np
from tensorflow.keras.layers import Flatten, Reshape

In [30]:
input_img1 = np.array([[0, 1, 2, 3],
                       [4, 5, 6, 7],
                       [8, 9, 10, 11],
                       [12, 13, 14, 15]], dtype='float32')
input_img1 = input_img1[np.newaxis, ..., np.newaxis]

* ## Way1 - Reshape

In [31]:
reshape_result = Reshape(target_shape=(-1,))(input_img1)
reshape_result = reshape_result.numpy()

In [32]:
print(input_img1.shape)
print(reshape_result.shape)
print(reshape_result)

(1, 4, 4, 1)
(1, 16)
[[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15.]]


In [33]:
input_img2 = input_img1.copy()
for _ in range(3):
    input_img2 = np.concatenate([input_img2, input_img2], -1)
print(input_img2.shape)

(1, 4, 4, 8)


In [35]:
reshape_result = Reshape(target_shape=(-1,))(input_img2)
reshape_result = reshape_result.numpy()
print(reshape_result.shape)
print(reshape_result)

(1, 128)
[[ 0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  1.  1.  1.  1.  1.  1.  2.  2.
   2.  2.  2.  2.  2.  2.  3.  3.  3.  3.  3.  3.  3.  3.  4.  4.  4.  4.
   4.  4.  4.  4.  5.  5.  5.  5.  5.  5.  5.  5.  6.  6.  6.  6.  6.  6.
   6.  6.  7.  7.  7.  7.  7.  7.  7.  7.  8.  8.  8.  8.  8.  8.  8.  8.
   9.  9.  9.  9.  9.  9.  9.  9. 10. 10. 10. 10. 10. 10. 10. 10. 11. 11.
  11. 11. 11. 11. 11. 11. 12. 12. 12. 12. 12. 12. 12. 12. 13. 13. 13. 13.
  13. 13. 13. 13. 14. 14. 14. 14. 14. 14. 14. 14. 15. 15. 15. 15. 15. 15.
  15. 15.]]


[(back...)](#Flatten)

* ## Way2 - Flatten

![](https://i.imgur.com/MvwO4a0.gif)

In [36]:
flatten_result = Flatten()(input_img1)
flatten_result = flatten_result.numpy()

In [37]:
print(input_img1.shape)
print(flatten_result.shape)
print(flatten_result)

(1, 4, 4, 1)
(1, 16)
[[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15.]]


![](https://i.imgur.com/FDh4d0L.gif)

In [38]:
flatten_result = Flatten()(input_img2)
flatten_result = flatten_result.numpy()

In [39]:
print(flatten_result.shape)
print(flatten_result)

(1, 128)
[[ 0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  1.  1.  1.  1.  1.  1.  2.  2.
   2.  2.  2.  2.  2.  2.  3.  3.  3.  3.  3.  3.  3.  3.  4.  4.  4.  4.
   4.  4.  4.  4.  5.  5.  5.  5.  5.  5.  5.  5.  6.  6.  6.  6.  6.  6.
   6.  6.  7.  7.  7.  7.  7.  7.  7.  7.  8.  8.  8.  8.  8.  8.  8.  8.
   9.  9.  9.  9.  9.  9.  9.  9. 10. 10. 10. 10. 10. 10. 10. 10. 11. 11.
  11. 11. 11. 11. 11. 11. 12. 12. 12. 12. 12. 12. 12. 12. 13. 13. 13. 13.
  13. 13. 13. 13. 14. 14. 14. 14. 14. 14. 14. 14. 15. 15. 15. 15. 15. 15.
  15. 15.]]


[(back...)](#Flatten)

# Padding

* [padding='VALID'](#padding='VALID')
* [padding='SAME'](#padding='SAME')
* [ZeroPadding](#ZeroPadding)

In [40]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, ZeroPadding2D

In [41]:
input_img = np.array([[0, 0, 0, 0, 0, 0],
                      [0, 0, 0, 1, 1, 0],
                      [0, 1, 1, 1, 1, 0],
                      [0, 0, 1, 0, 1, 0],
                      [0, 0, 0, 1, 0, 0],
                      [0, 0, 0, 0, 0, 0]], dtype='float32')
input_img = input_img[np.newaxis, ..., np.newaxis]

In [42]:
def kernel_init(shape, dtype=None):
    filter_init = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
    filter_init = filter_init.reshape((3, 3, 1, 1))
    return tf.Variable(filter_init, dtype=tf.float32)

* ## padding='VALID'

In [43]:
nopad_result = Conv2D(1, (3, 3), padding='VALID',
                      kernel_initializer=kernel_init)(input_img)

nopad_result = nopad_result.numpy()

In [45]:
print(input_img.shape)
print(nopad_result.shape)
print(nopad_result.squeeze())

(1, 6, 6, 1)
(1, 4, 4, 1)
[[1. 1. 2. 1.]
 [2. 1. 2. 2.]
 [0. 3. 1. 2.]
 [0. 0. 2. 0.]]


[(back...)](#Padding)

* ## padding='SAME'
![](https://i.imgur.com/vZWnAvN.gif)

In [46]:
pad_result = Conv2D(1, (3, 3), padding='SAME',
                    kernel_initializer=kernel_init)(input_img)

pad_result = pad_result.numpy()

In [47]:
print(input_img.shape)
print(pad_result.shape)
print(pad_result.squeeze())

(1, 6, 6, 1)
(1, 6, 6, 1)
[[0. 0. 1. 1. 0. 0.]
 [1. 1. 1. 2. 1. 0.]
 [0. 2. 1. 2. 2. 1.]
 [0. 0. 3. 1. 2. 1.]
 [0. 0. 0. 2. 0. 1.]
 [0. 0. 0. 0. 1. 0.]]


[(back...)](#Padding)

## ZeroPadding

In [50]:
zero_padding = ZeroPadding2D(padding=(1, 1))(input_img)
zero_result = Conv2D(1, (3, 3),
                     kernel_initializer=kernel_init)(zero_padding)

zero_padding = zero_padding.numpy()
zero_result = zero_result.numpy()

In [53]:
print(input_img.shape)
print(zero_padding.shape)
print(zero_result.shape)
print(pad_result.squeeze())

(1, 6, 6, 1)
(1, 8, 8, 1)
(1, 6, 6, 1)
[[0. 0. 1. 1. 0. 0.]
 [1. 1. 1. 2. 1. 0.]
 [0. 2. 1. 2. 2. 1.]
 [0. 0. 3. 1. 2. 1.]
 [0. 0. 0. 2. 0. 1.]
 [0. 0. 0. 0. 1. 0.]]


In [None]:
print(zero_result.shape)
print(zero_result.squeeze())

[(back...)](#Padding)

# Pooling


* [Average Pooling](#Average-Pooling)
* [Max Pooling](#Max-Pooling)

![](https://i.imgur.com/XZQtZC3.jpg)

In [54]:
import numpy as np
from tensorflow.keras.layers import AveragePooling2D, MaxPool2D

In [55]:
input_img = np.array([[1, 2, 2, 0],
                      [1, 2, 3, 2],
                      [3, 1, 3, 2],
                      [0, 2, 0, 2]], dtype='float32').reshape((1, 4, 4, 1))

* ## Average Pooling

![](https://i.imgur.com/sDKe1To.gif)

In [56]:
avg_result = AveragePooling2D()(input_img)
avg_result = avg_result.numpy()

In [57]:
print(input_img.shape)
print(avg_result.shape)
print(avg_result.squeeze())

(1, 4, 4, 1)
(1, 2, 2, 1)
[[1.5  1.75]
 [1.5  1.75]]


[(back...)](#Pooling)

* ## Max Pooling

![](https://i.imgur.com/HZhzUzN.gif)

In [58]:
max_result = MaxPool2D()(input_img)
max_result = max_result.numpy()

In [59]:
print(input_img.shape)
print(max_result.shape)
print(max_result.squeeze())

(1, 4, 4, 1)
(1, 2, 2, 1)
[[2. 3.]
 [3. 3.]]


[(back...)](#Pooling)

# GlobalPooling

* [Global Average Pooling](#Global-Average-Pooling)
* [Global Max Pooling](#Global-Max-Pooling)

In [60]:
import numpy as np
from tensorflow.keras.layers import (GlobalAveragePooling2D,
                                     GlobalMaxPooling2D)

In [61]:
input_img = np.load("./data/globalpooling_input.npy")[np.newaxis, ...]
input_img = input_img.astype('float32')

* ## Global Average Pooling

![](https://i.imgur.com/c62Vie8.gif)

In [62]:
print(input_img.shape)
print(input_img[..., 0])

(1, 4, 4, 8)
[[[ 0.  1.  2.  3.]
  [ 4.  5.  6.  7.]
  [ 8.  9. 10. 11.]
  [12. 13. 14. 15.]]]


In [63]:
avg_result = GlobalAveragePooling2D()(input_img)
avg_result = avg_result.numpy()

In [64]:
print(avg_result.shape)
print(avg_result.squeeze())

(1, 8)
[ 7.5  7.5  3.5  1.5  0.   1.5 -0.5  0.5]


In [65]:
input_img.mean((1, 2))

array([[ 7.5,  7.5,  3.5,  1.5,  0. ,  1.5, -0.5,  0.5]], dtype=float32)

[(back...)](#GlobalPooling)

* ## Global Max Pooling

![](https://i.imgur.com/XFNnWSe.gif)

In [66]:
max_result = GlobalMaxPooling2D()(input_img)
max_result = max_result.numpy()

In [67]:
print(input_img.shape)
print(input_img[..., 0])

(1, 4, 4, 8)
[[[ 0.  1.  2.  3.]
  [ 4.  5.  6.  7.]
  [ 8.  9. 10. 11.]
  [12. 13. 14. 15.]]]


In [68]:
print(max_result.shape)
print(max_result.squeeze())

(1, 8)
[15. 15.  7.  3.  0.  3.  0.  2.]


[(back...)](#GlobalPooling)