In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Date    : Jan-29-21 14:38
# @Author  : Kan HUANG (kan.huang@connect.ust.hk)

import torch
import numpy as np
torch.__version__

'1.8.1'

#### Appendix: Dimensions' order

PyTorch is *channel_first*:

```Python
images = torch.randint(0, 256, size=(N, C, H, W), dtype=torch.float32)
kernels = torch.rand(size=(N_filters, C, ksize, ksize))
```

For TensorFlow:

```Python
images = tf.random.uniform((N, H, W, C), 0, 255, dtype=tf.dtypes.float32)
kernels = tf.random.uniform((N_filters, ksize, ksize, C), 0, 255, dtype=tf.dtypes.float32)
```

### Forward

In [2]:
N, h_x, w_x, C = 16, 32, 32, 3
N_filters, ksize, ksize, C = 6, 5, 5, 3

images = torch.randint(0, 256, size=(N, C, h_x, w_x), dtype=torch.float32)
kernels = torch.rand(size=(N_filters, C, ksize, ksize))

In [3]:
conv2d = torch.nn.Conv2d(3,6,3) # in_channels, out_channels, kernel_size
conv2d(images).size()

torch.Size([16, 6, 30, 30])

In [4]:
W, X = kernels, images
print(f"X.shape: {X.shape}")
print(f"W.shape: {W.shape}")

X.shape: torch.Size([16, 3, 32, 32])
W.shape: torch.Size([6, 3, 5, 5])


In [63]:
# padding = 1, "same" 模式
padding = 1 # 0, 1
stride = 1

# d_filter==d_x, when groups=1
n_filters, d_filter, h_filter, w_filter = W.size()
n_x, d_x, h_x, w_x = X.size()

h_out = (h_x - h_filter + 2 * padding) / stride + 1
w_out = (w_x - w_filter + 2 * padding) / stride + 1
h_out, w_out = int(h_out), int(w_out)

print(h_out, w_out)

30 30


### Unfold

两种表达没有区别：

```Python
unfold X.view(1, -1, h_x, w_x).view(n_x, -1, h_out*w_out)
```

```Python
unfold X
```

In [64]:
# X.view(1, -1, h_x, w_x) # 这一句的作用是合并所有样本
X_col = torch.nn.functional.unfold(
    X.view(1, -1, h_x, w_x), h_filter, padding=padding, stride=stride).view(n_x, -1, h_out*w_out)
print(X_col.size())

torch.Size([16, 75, 900])


In [65]:
# [N, C*\product k_size_i, L]
X_col1 = torch.nn.functional.unfold(
    X, h_filter, padding=padding, stride=stride)
print(X_col1.shape)
# 16 images
# 75 3x5x5 pixel values to Conv2D/Adder
# 900 areas

torch.Size([16, 75, 900])


In [9]:
# r = X_col == X_col1
# print(False in r.numpy())

### Unfold results

X_col: Tensor with shape $(N, C\times\prod(kernel\_size),L)$

$(N, C\times\prod(kernel\_size),L)$

$(N, C\times\prod(kernel\_size),h_out, w_out)$

h0
w0~wxxx

h1
w0~....


C-style: R-majoy

#### Explanations

N: sample index, keeps the same.

$C\times\prod(kernel\_size)$: perception area's size for each Conv2D/Adder operation. For example:

|  channel_0   | channel_1  | channel_2|
|  ----  | ----  |----  |
| pixel values | pixel values | pixel values  |



L: Perception area index dim. L is total output size for one image sample, and one filter.

Actually $L=h_{out}\times w_{out}$

#### Appendix: Conv2D layer in PyTorch
Inputs:

X:  $(N, C_{\text{in}}, H, W)$

W: $(N_{filters}, C_{\text{in}}, k\_size, k\_size)$

Outputs:

out: $(N, N_{filters}, H_{out}, W_{out})$

$H_{out}, W_{out}$ depend on $H, W$ and `padding` and `strides`.

#### Unfold

[https://pytorch.org/docs/stable/generated/torch.nn.Unfold.html](https://pytorch.org/docs/stable/generated/torch.nn.Unfold.html)

## Forward

In [10]:
W_col = W.view(n_filters, -1)
print(X_col.size()) # N, op_dim, out_dim
print(W_col.size()) # N_filter, op_dim

torch.Size([16, 75, 900])
torch.Size([6, 75])


### Forward

In [11]:
# permute 交换维度顺序，把 N 维放到了最后，
# X_col: [C* \product k_size_i, h_out*w_out, N]
X_col = X_col.permute(1, 2, 0).contiguous().view(X_col.size(1), -1)
# X_col: [C* \product k_size_i, (h_out*w_out)* N]
print(X_col.size()) # [C* \product k_size_i, h_out*w_out*N]

torch.Size([75, 14400])


In [12]:
print(f"X_col.size(): {X_col.size()}")
print(f"W_col.size(): {W_col.size()}")
# broadcasting to a common shape (PyTorch's feature)
output = -(W_col.unsqueeze(2)-X_col.unsqueeze(0)).abs().sum(1)
print(output.shape) # sum of the results in this batch and on all the channels

# 900 * 16

X_col.size(): torch.Size([75, 14400])
W_col.size(): torch.Size([6, 75])
torch.Size([6, 14400])


### Reshape output back

output

In [13]:
output = output.view(n_filters, h_out, w_out, n_x) # 注意维度顺序
output = output.permute(3, 0, 1, 2).contiguous() # 改变维度顺序 -> (n_x, n_filters, h_out, w_out)

In [14]:
output.shape

torch.Size([16, 6, 30, 30])

## Backward

In [15]:
print(X_col.size()) # [C* \product k_size_i, h_out*w_out*N]
print(W_col.size()) # [n_filters, C* \product k_size_i]

torch.Size([75, 14400])
torch.Size([6, 75])


In [16]:
tmp = X_col.unsqueeze(0)-W_col.unsqueeze(2)
tmp.shape

torch.Size([6, 75, 14400])

In [69]:
import sys
sys.path.append("..")
from addernet.adder_torch import Adder2D

In [71]:
layer = Adder2D(3, 16, 3)

In [73]:
result = layer(X)