## 畳み込み層の実装

In [1]:
import numpy as np

### 4次元配列

In [2]:
x = np.random.rand(10, 1, 28, 28)

# 1つ目のデータの形状
x[0].shape

(1, 28, 28)

### 4次元配列を2次元の行列へ展開

In [3]:
# 1枚目の画像データを用意
input_data = np.random.rand(1, 1, 3, 3)

input_data.shape

(1, 1, 3, 3)

In [4]:
N, C, H, W = input_data.shape

print(N, C, H, W)

1 1 3 3


In [5]:
# 展開の設定
filter_h = 3
filter_w = 3
stride = 1
pad = 1

In [6]:
# 特徴マップのサイズ取得
out_h = (H + 2 * pad - filter_h) // stride + 1
out_w = (W + 2 * pad - filter_w) // stride + 1

out_h, out_w

(3, 3)

In [7]:
# パディングの実装
img = np.pad(input_data, [(0, 0), (0, 0), (pad, pad), (pad, pad)], 'constant')

print(img)

[[[[0.         0.         0.         0.         0.        ]
   [0.         0.12008749 0.26034823 0.31243241 0.        ]
   [0.         0.89416154 0.13200925 0.68446477 0.        ]
   [0.         0.68226961 0.8210616  0.3151989  0.        ]
   [0.         0.         0.         0.         0.        ]]]]


In [8]:
img.shape

(1, 1, 5, 5)

In [10]:
# 展開用の0を用意
col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))

col.shape

(1, 1, 3, 3, 3, 3)

In [11]:
print(col)

[[[[[[0. 0. 0.]
     [0. 0. 0.]
     [0. 0. 0.]]

    [[0. 0. 0.]
     [0. 0. 0.]
     [0. 0. 0.]]

    [[0. 0. 0.]
     [0. 0. 0.]
     [0. 0. 0.]]]


   [[[0. 0. 0.]
     [0. 0. 0.]
     [0. 0. 0.]]

    [[0. 0. 0.]
     [0. 0. 0.]
     [0. 0. 0.]]

    [[0. 0. 0.]
     [0. 0. 0.]
     [0. 0. 0.]]]


   [[[0. 0. 0.]
     [0. 0. 0.]
     [0. 0. 0.]]

    [[0. 0. 0.]
     [0. 0. 0.]
     [0. 0. 0.]]

    [[0. 0. 0.]
     [0. 0. 0.]
     [0. 0. 0.]]]]]]


In [12]:
for y in range(filter_h):
    y_max = y + stride * out_h
    for x in range(filter_w):
        x_max = x + stride * out_w
        print(f'画像データの縦{y}~{y_max}、横{x}~{x_max}領域を、colの{y, x}番目に代入する')

画像データの縦0~3、横0~3領域を、colの(0, 0)番目に代入する
画像データの縦0~3、横1~4領域を、colの(0, 1)番目に代入する
画像データの縦0~3、横2~5領域を、colの(0, 2)番目に代入する
画像データの縦1~4、横0~3領域を、colの(1, 0)番目に代入する
画像データの縦1~4、横1~4領域を、colの(1, 1)番目に代入する
画像データの縦1~4、横2~5領域を、colの(1, 2)番目に代入する
画像データの縦2~5、横0~3領域を、colの(2, 0)番目に代入する
画像データの縦2~5、横1~4領域を、colの(2, 1)番目に代入する
画像データの縦2~5、横2~5領域を、colの(2, 2)番目に代入する


In [16]:
# 画像データの最初の領域
print(img[:, :, 0:3:1, 0:3:1])

[[[[0.         0.         0.        ]
   [0.         0.12008749 0.26034823]
   [0.         0.89416154 0.13200925]]]]


In [17]:
# col に代入する
col[:, :, 0, 0, :, :] = img[:, :, 0:3:1, 0:3:1]
print(col)

[[[[[[0.         0.         0.        ]
     [0.         0.12008749 0.26034823]
     [0.         0.89416154 0.13200925]]

    [[0.         0.         0.        ]
     [0.         0.         0.        ]
     [0.         0.         0.        ]]

    [[0.         0.         0.        ]
     [0.         0.         0.        ]
     [0.         0.         0.        ]]]


   [[[0.         0.         0.        ]
     [0.         0.         0.        ]
     [0.         0.         0.        ]]

    [[0.         0.         0.        ]
     [0.         0.         0.        ]
     [0.         0.         0.        ]]

    [[0.         0.         0.        ]
     [0.         0.         0.        ]
     [0.         0.         0.        ]]]


   [[[0.         0.         0.        ]
     [0.         0.         0.        ]
     [0.         0.         0.        ]]

    [[0.         0.         0.        ]
     [0.         0.         0.        ]
     [0.         0.         0.        ]]

    [[0.         0. 

In [18]:
# 画像データの 2 番目の領域
print(img[:, :, 0:3:1, 1:4:1])

[[[[0.         0.         0.        ]
   [0.12008749 0.26034823 0.31243241]
   [0.89416154 0.13200925 0.68446477]]]]


In [19]:
# col に代入
col[:, :, 0, 1, :, :] = img[:, :, 0:3:1, 1:4:1]
print(col)

[[[[[[0.         0.         0.        ]
     [0.         0.12008749 0.26034823]
     [0.         0.89416154 0.13200925]]

    [[0.         0.         0.        ]
     [0.12008749 0.26034823 0.31243241]
     [0.89416154 0.13200925 0.68446477]]

    [[0.         0.         0.        ]
     [0.         0.         0.        ]
     [0.         0.         0.        ]]]


   [[[0.         0.         0.        ]
     [0.         0.         0.        ]
     [0.         0.         0.        ]]

    [[0.         0.         0.        ]
     [0.         0.         0.        ]
     [0.         0.         0.        ]]

    [[0.         0.         0.        ]
     [0.         0.         0.        ]
     [0.         0.         0.        ]]]


   [[[0.         0.         0.        ]
     [0.         0.         0.        ]
     [0.         0.         0.        ]]

    [[0.         0.         0.        ]
     [0.         0.         0.        ]
     [0.         0.         0.        ]]

    [[0.         0. 

In [20]:
for y in range(filter_h):
    y_max = y + stride*out_h
    for x in range(filter_w):
        x_max = x + stride*out_w
        col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]
print(col)

[[[[[[0.         0.         0.        ]
     [0.         0.12008749 0.26034823]
     [0.         0.89416154 0.13200925]]

    [[0.         0.         0.        ]
     [0.12008749 0.26034823 0.31243241]
     [0.89416154 0.13200925 0.68446477]]

    [[0.         0.         0.        ]
     [0.26034823 0.31243241 0.        ]
     [0.13200925 0.68446477 0.        ]]]


   [[[0.         0.12008749 0.26034823]
     [0.         0.89416154 0.13200925]
     [0.         0.68226961 0.8210616 ]]

    [[0.12008749 0.26034823 0.31243241]
     [0.89416154 0.13200925 0.68446477]
     [0.68226961 0.8210616  0.3151989 ]]

    [[0.26034823 0.31243241 0.        ]
     [0.13200925 0.68446477 0.        ]
     [0.8210616  0.3151989  0.        ]]]


   [[[0.         0.89416154 0.13200925]
     [0.         0.68226961 0.8210616 ]
     [0.         0.         0.        ]]

    [[0.89416154 0.13200925 0.68446477]
     [0.68226961 0.8210616  0.3151989 ]
     [0.         0.         0.        ]]

    [[0.13200925 0.6

In [21]:
col.shape

(1, 1, 3, 3, 3, 3)

In [22]:
# 配列の順番を入れ替えて 2 次元の行列に整形する
col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1)

In [23]:
col.shape

(9, 9)

In [24]:
def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
   
    N, C, H, W = input_data.shape
    out_h = (H + 2*pad - filter_h)//stride + 1
    out_w = (W + 2*pad - filter_w)//stride + 1

    img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
    col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))

    for y in range(filter_h):
        y_max = y + stride*out_h
        for x in range(filter_w):
            x_max = x + stride*out_w
            col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]

    col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1)
    return col

In [25]:
x1 = np.random.rand(10, 3, 7, 7)
col1 = im2col(x1, 3, 3, stride=1, pad=1)
print(col1.shape)

(490, 27)


In [26]:
# 画像、重み、バイアス、パディング、ストライドの設定
x = np.random.rand(1, 3, 224, 224)
conv_W = np.random.rand(64, 3, 3, 3)
b = 0
pad = 1
stride = 1

In [27]:
# 各サイズの取得
FN, C, FH, FW = conv_W.shape
N, C, H, W = x.shape

# 出力特徴マップサイズ（縦、横）の取得
out_h = 1 + int((H + 2*pad - FH) / stride)
out_w = 1 + int((W + 2*pad - FW) / stride)
print(out_h, out_w)

224 224


In [28]:
# 画像データの行列展開
col = im2col(x, FH, FW, stride, pad)
print(col.shape)

(50176, 27)


In [29]:
# 重みを行列に整形
col_W = conv_W.reshape(FN, -1).T
print(col_W.shape)

(27, 64)


In [30]:
# 畳み込み演算（画像と重みの行列積）
out = np.dot(col, col_W) + b
print(out.shape)

(50176, 64)


In [31]:
# 画像データに成型
out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)
print(out.shape)

(1, 64, 224, 224)


In [32]:
class Convolution:

    def __init__(self, W, b, stride=1, pad=0):
        self.W = W
        self.b = b
        self.stride = stride
        self.pad = pad

    def forward(self, x):
        FN, C, FH, FW = self.W.shape
        N, C, H, W = x.shape
        out_h = 1 + int((H + 2*self.pad - FH) / self.stride)
        out_w = 1 + int((W + 2*self.pad - FW) / self.stride)

        col = im2col(x, FH, FW, self.stride, self.pad)
        col_W = self.W.reshape(FN, -1).T
        out = np.dot(col, col_W) + self.b
        
        out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)

        return out

In [33]:
# 画像と重み
x = np.random.rand(1, 3, 224, 224)
W = np.random.rand(64, 3, 3, 3)

In [34]:
# Convolution クラスのインスタンス化
conv = Convolution(W, 0, 1, 1)

In [35]:
# 畳み込み処理
out = conv.forward(x)
print(out.shape)

(1, 64, 224, 224)


## プーリング層の実装

In [36]:
# 畳み込み演算後の out を使用
out.shape

(1, 64, 224, 224)

In [37]:
# プーリングの設定
pool_h = 2
pool_w = 2
stride = 2
pad = 0

In [38]:
# プーリング後のサイズ取得
N, C, H, W = out.shape
out_h = int(1 + (H - pool_h) / stride)
out_w = int(1 + (W - pool_w) / stride)
print(out_h, out_w)

112 112


In [39]:
# 行列展開
col = im2col(out, pool_h, pool_w, stride, pad)
col = col.reshape(-1, pool_h*pool_w)
print(col.shape)

(802816, 4)


In [40]:
# 最大値取得
out = np.max(col, axis=1)
print(out.shape)

(802816,)


In [41]:
out

array([6.32678157, 7.33021409, 8.91505879, ..., 5.53159073, 6.68773394,
       6.22980891])

In [42]:
# 整形
out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
print(out.shape)

(1, 64, 112, 112)


In [43]:
class Pooling:
    def __init__(self, pool_h, pool_w, stride=2, pad=0):
        self.pool_h = pool_h
        self.pool_w = pool_w
        self.stride = stride
        self.pad = pad

    def forward(self, x):
        N, C, H, W = x.shape
        out_h = int(1 + (H - self.pool_h) / self.stride)
        out_w = int(1 + (W - self.pool_w) / self.stride)

        col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
        col = col.reshape(-1, self.pool_h*self.pool_w)

        out = np.max(col, axis=1)
        out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)

        return out

In [44]:
x = np.random.rand(10, 3, 224, 224)
W = np.random.rand(64, 3, 3, 3)
conv = Convolution(W, 0, 1, 1)
pool = Pooling(2, 2, 2, 0)

In [45]:
# 畳み込み処理
out = conv.forward(x)
print(out.shape)

(10, 64, 224, 224)


In [46]:
# プーリング処理
out_ = pool.forward(out)
print(out_.shape)

(10, 64, 112, 112)


## 実際の画像データと学習済みの重みで確認

In [47]:
from PIL import Image

In [None]:
img = Image.open('sample.jpg').convert('RGB')
img

In [None]:
img = np.array(img) / 255.0
x = img[np.newaxis, :, :, :].transpose(0, 3, 1, 2)

In [None]:
import torchvision
from torchvision.models import resnet18
feature = resnet18(pretrained=True)
print(feature.conv1)
print(feature.conv1.weight.shape)
filter_W = feature.conv1.weight.cpu().detach().numpy()

### Convolution

In [None]:
# convolution クラスのインスタンス化
conv = Convolution(filter_W, b=False, stride=2, pad=3)

In [None]:
out_conv = conv.forward(x)
print(out_conv.shape)

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.figure(figsize=(20, 20))
for n in range(out_conv.shape[1]):
    plt.subplot(8, 8, n+1)
    plt.imshow(out_conv[0][n], cmap='gray')
    plt.axis('off')

### Pooling

In [None]:
# Pooling クラスのインスタンス化
pool = Pooling(2, 2, 2, 0)

In [None]:
# pooling
out_pool = pool.forward(out_conv)
print(out_pool.shape)

In [None]:
plt.figure(figsize=(20, 20))
for n in range(out_pool.shape[1]):
    plt.subplot(8, 8, n+1)
    plt.imshow(out_pool[0][n], cmap='gray')
    plt.axis('off')