# **Intro to common CNN APIs**
此份程式碼會介紹在 CNN model 當中常使用的 Layers。

## 本章節大綱
* [Conv2D( filters, kernel_size, strides, use_bias)](#Conv2D)
  * [use_bias](#use-bias)
  * [Multi-Channels](#Multi-Channels-with-1-Filter)
  * [filters](#filters)
  * [kernel_size](#kernel-_-size)
  * [strides](#strides)
* [Flatten](#Flatten)
* [Padding](#Padding)
* [Pooling](#Pooling)

In [None]:
# 下載課程所需檔案
!wget -q "https://github.com/TA-aiacademy/course_3.0/releases/download/CVCNN_Data/cnn_part2_data.zip"
!unzip -q cnn_part2_data.zip

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn

In [None]:
input_img = torch.tensor(
    [[0, 0, 0, 0, 0, 0],
     [0, 0, 0, 1, 1, 0],
     [0, 1, 1, 1, 1, 0],
     [0, 0, 1, 0, 1, 0],
     [0, 0, 0, 1, 0, 0],
     [0, 0, 0, 0, 0, 0]],
    dtype=torch.float32
)
input_img.shape

In [None]:
# add batch and channel dim
input_img = input_img.unsqueeze(0).unsqueeze(0)
print(input_img.shape)
print("(batch_size, channels, height, width)")

* ## Conv2D
![conv2D](https://hackmd.io/_uploads/Hy6RbRUIp.gif)

In [None]:
layer = nn.Conv2d(1, 1, kernel_size=3, bias=False)
layer.weight.shape
# weight shape: (out_channels, in_channels ,H, W)

In [None]:
filter_init = torch.tensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=torch.float32)

with torch.no_grad():
    # set kernel weight
    weights = filter_init.reshape(1, 1, 3, 3)
    layer.weight = nn.Parameter(weights)


conv_result = layer(input_img)
print(conv_result)
print(conv_result.shape)

[(back...)](#Convolution2D)

* ## use bias
![use bias](https://hackmd.io/_uploads/BkFRfR8La.gif)

In [None]:
layer = nn.Conv2d(1, 1, kernel_size=3, bias=True)
# bias shape: (out_channels, )
layer.bias, layer.bias.shape

In [None]:
bias_init = torch.ones(1, ).to(torch.float32)

with torch.no_grad():
    weights = filter_init.reshape(1, 1, 3, 3)
    layer.weight = nn.Parameter(weights)
    # set kernel bias
    layer.bias = nn.Parameter(bias_init)


bias_result = layer(input_img)
print(bias_result)
print(bias_result.shape)

In [None]:
print(bias_result.shape)
print(bias_result.squeeze())

[(back...)](#Convolution2D)

* ## Multi Channels with 1 Filter
![Multi Channels with 1 filter](https://hackmd.io/_uploads/S1q1m08I6.gif)
![QEjI0jq](https://hackmd.io/_uploads/By9e70I8a.png)


In [None]:
input_img = np.load("./data/conv2d_multichannel_input.npy")
print(input_img.shape)
print(input_img.dtype)

In [None]:
input_img = input_img[np.newaxis, ...]
print(input_img.shape)
print("(Batch_size, Height, Width, Channel)")

In [None]:
input_img = torch.tensor(input_img, dtype=torch.float32)
input_img = input_img.permute(0, 3, 1, 2) # to channel first
print(input_img.shape, input_img.dtype)
print("(Batch_size, Channel, Height, Width)")

In [None]:
filter_init = np.load("./data/conv2d_multichannelfilter.npy")
print(filter_init.shape)
print("(Height, Width, Channel, Num of Filters)")

In [None]:
# weight shape: (out_channels, in_channels ,H, W)
kernel_init = torch.tensor(filter_init, dtype=torch.float32).permute(3, 2, 0, 1)
print(kernel_init.shape)

In [None]:
multichannel_layer = nn.Conv2d(3, 1, kernel_size=3)
with torch.no_grad():
    # set kernel weight
    multichannel_layer.weight = nn.Parameter(kernel_init)
    # set kernel bias
    multichannel_layer.bias = nn.Parameter(torch.zeros(1))

multichannel_result = multichannel_layer(input_img)
print(multichannel_result.shape)

In [None]:
print(multichannel_result.shape)
print(multichannel_result.squeeze())

[(back...)](#Convolution2D)

* ## filters
![filters](https://hackmd.io/_uploads/BJV77RUU6.gif)

In [None]:
multi_filter_init = np.zeros((3, 3, 3, 8))
for i in range(8):
    multi_filter_init[:, :, :, i] = filter_init.squeeze()
multi_filter_init = multi_filter_init.astype('float32')

print(multi_filter_init.shape)

In [None]:
kernel_init = torch.tensor(multi_filter_init)
kernel_init = kernel_init.permute(3, 2, 0, 1)
print(kernel_init.shape)

In [None]:
multifilter = nn.Conv2d(3, 8, kernel_size=3)
with torch.no_grad():
    # set kernel weight
    multifilter.weight = nn.Parameter(kernel_init)
    # set kernel bias
    multifilter.bias = nn.Parameter(torch.zeros(8))

    multifilter_result = multifilter(input_img)
    print(multifilter_result.shape)

In [None]:
plt.figure(figsize=(10, 5))
for i in range(8):
    plt.subplot(2, 4, i+1)
    plt.imshow(multifilter_result.squeeze()[i, :, :])
plt.show()

print(multifilter_result.shape)
print(multifilter_result.squeeze())

[(back...)](#Convolution2D)

* ## strides
![strides](https://hackmd.io/_uploads/r17N708Up.gif)

In [None]:
input_img = np.load("./data/conv2d_1channel_input.npy")
input_img = torch.tensor(input_img).permute(0, 3, 1, 2)
input_img.shape

In [None]:
filter_init = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype='float32')
filter_init = torch.tensor(filter_init).unsqueeze(0).unsqueeze(0)
print(filter_init.shape)

In [None]:
conv_stride_layer = nn.Conv2d(1, 1, kernel_size=3, stride=(2, 2))
with torch.no_grad():
    # set kernel weight
    conv_stride_layer.weight = nn.Parameter(filter_init)
    # set kernel bias
    conv_stride_layer.bias = nn.Parameter(torch.zeros(1))

    stride_result = conv_stride_layer(input_img)

In [None]:
print(stride_result.shape)
print(stride_result.squeeze())

![image](https://hackmd.io/_uploads/Sk__X0LIa.png)

# Flatten

* [Way1-Reshape](#Way1---Reshape)
* [Way2-Flatten](#Way2---Flatten)

In [None]:
import numpy as np
import torch
import torch.nn as nn

In [None]:
input_img1 = np.array([[0, 1, 2, 3],
                       [4, 5, 6, 7],
                       [8, 9, 10, 11],
                       [12, 13, 14, 15]], dtype='float32')
input_img1 = input_img1[np.newaxis, np.newaxis, ...]
input_img1 = torch.tensor(input_img1, dtype=torch.float32)
print(input_img1.shape)

# repeat input_img to 8 channels
input_img2 = input_img1.repeat(1, 8, 1, 1)
print(input_img2.shape)

* ## Way1 - Reshape

In [None]:
reshape_result = input_img1.reshape(input_img1.shape[0], -1)
print(input_img1.shape)
print(reshape_result.shape)
print(reshape_result)

In [None]:
reshape_result = input_img2.reshape(input_img2.shape[0], -1)
print(input_img2.shape)
print(reshape_result.shape)
print(reshape_result)

[(back...)](#Flatten)

* ## Way2 - Flatten

![Flatten](https://hackmd.io/_uploads/ByBFmR8Ia.gif)

In [None]:

flatten_result = nn.Flatten()(input_img1)
flatten_result

In [None]:
print(input_img1.shape)
print(flatten_result.shape)
print(flatten_result)

In [None]:
flatten_result = nn.Flatten()(input_img2)
flatten_result

In [None]:
print(input_img2.shape)
print(flatten_result.shape)
print(flatten_result)

![Flatten_M](https://hackmd.io/_uploads/HyZ5Q08Ia.gif)

[(back...)](#Flatten)

# Padding

* [padding='valid'](#padding='VALID')
* [padding='same'](#padding='SAME')
* [padding=INT](#padding=INT)
* [nn.ZeroPad2d](#nn.ZeroPad2d)

In [None]:
import numpy as np
import torch
import torch.nn as nn

In [None]:
input_img = np.array([[0, 0, 0, 0, 0, 0],
                      [0, 0, 0, 1, 1, 0],
                      [0, 1, 1, 1, 1, 0],
                      [0, 0, 1, 0, 1, 0],
                      [0, 0, 0, 1, 0, 0],
                      [0, 0, 0, 0, 0, 0]], dtype='float32')
input_img = input_img[np.newaxis, np.newaxis, ...]
input_img = torch.tensor(input_img)

In [None]:
def kernel_init(layer, output_channel=1):
    filter_init = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
    filter_init = torch.tensor(filter_init).float().unsqueeze(0).unsqueeze(0)
    with torch.no_grad():
        filter_init = nn.Parameter(filter_init)
        layer.weight = filter_init
        layer.bias = nn.Parameter(torch.zeros(output_channel))
    return

* ## padding='VALID'

In [None]:
layer = nn.Conv2d(1, 1, kernel_size=3, padding='valid')
kernel_init(layer)
nopad_result = layer(input_img)

In [None]:
print(input_img.shape)
print(nopad_result.shape)
print(nopad_result.squeeze())

[(back...)](#Padding)

* ## padding='SAME'
![padding_s](https://hackmd.io/_uploads/HyQBZ1PUp.gif)

In [None]:
layer = nn.Conv2d(1, 1, kernel_size=3, padding='same')
kernel_init(layer)
pad_result = layer(input_img)

In [None]:
print(input_img.shape)
print(pad_result.shape)
print(pad_result.squeeze())

[(back...)](#Padding)

## nn.ZeroPad2d

In [None]:
zero_padding = nn.ZeroPad2d(padding=1)(input_img)
layer = nn.Conv2d(1, 1, kernel_size=3)
kernel_init(layer)
zero_result = layer(zero_padding)

In [None]:
print(input_img.shape)
print(zero_padding.shape)
print(zero_padding.squeeze())

In [None]:
print(zero_result.shape)
print(zero_result.squeeze())

[(back...)](#Padding)

# Pooling


* [Average Pooling](#Average-Pooling)
* [Max Pooling](#Max-Pooling)

![image](https://hackmd.io/_uploads/H1LFb1D8p.png)

In [None]:
import numpy as np
import torch.nn as nn

In [None]:
input_img = np.array([[1, 2, 2, 0],
                      [1, 2, 3, 2],
                      [3, 1, 3, 2],
                      [0, 2, 0, 2]], dtype='float32')
input_img = torch.tensor(input_img).unsqueeze(0).unsqueeze(0)

* ## Average Pooling

![avg pool](https://hackmd.io/_uploads/HkgoW1v86.gif)


In [None]:
avg_result = nn.AvgPool2d(kernel_size=2)(input_img)

In [None]:
print(input_img.shape)
print(avg_result.shape)
print(avg_result.squeeze())

[(back...)](#Pooling)

* ## Max Pooling

![max pool](https://hackmd.io/_uploads/rkCob1P8p.gif)

In [None]:
max_result = nn.MaxPool2d(kernel_size=2)(input_img)

In [None]:
print(input_img.shape)
print(max_result.shape)
print(max_result.squeeze())

[(back...)](#Pooling)

# GlobalPooling

* [Global Average Pooling](#Global-Average-Pooling)
* [Global Max Pooling](#Global-Max-Pooling)

In [None]:
import numpy as np
import torch.nn as nn

In [None]:
input_img = np.load("./data/globalpooling_input.npy")[np.newaxis, ...]
input_img = input_img.astype('float32')
input_img = torch.tensor(input_img).permute(0, 3, 1, 2)
input_img.shape

* ## Global Average Pooling

![GAP](https://hackmd.io/_uploads/Bk9n-1PIp.gif)


In [None]:
print(input_img.shape)
print(input_img[0, 0, ...])

In [None]:
avg_result = nn.AdaptiveAvgPool2d(1)(input_img)
avg_result = nn.Flatten()(avg_result)

In [None]:
print(avg_result.shape)
print(avg_result.squeeze())

In [None]:
tensor_operation = input_img.mean((2, 3))
print(tensor_operation.shape)
print(tensor_operation)

[(back...)](#GlobalPooling)

* ## Global Max Pooling

![GMP](https://hackmd.io/_uploads/B1mJz1DL6.gif)


In [None]:
max_result = nn.AdaptiveMaxPool2d(1)(input_img)
max_result = nn.Flatten()(max_result)

In [None]:
print(input_img.shape)
print(input_img[0, 0, :, :])

In [None]:
print(max_result.shape)
print(max_result.squeeze())

[(back...)](#GlobalPooling)