# import

In [233]:
import numpy as np
from tensorflow.keras import datasets

# データの読み込み

In [234]:
mnist = datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# 時間かかるので最初の100枚だけ取得
x_train = x_train[0:1000, :, :]
y_train = y_train[0:1000]
x_test = x_test[0:100, :, :]
y_test = y_test[0:100]

# データの整形

In [235]:
# グレースケールを0～1の間に変換
## 訓練データ
train_images = x_train / 255
## テストデータ
test_images = x_test / 255

# one-hot-encoding
## 訓練データ
train_one_hot_labels = np.zeros((len(y_train), 10))
for i, l in enumerate(y_train):
    train_one_hot_labels[i][l] = 1

## テストデータ
test_one_hot_labels = np.zeros((len(y_test), 10))
for i, l in enumerate(y_test):
    test_one_hot_labels[i][l] = 1

# 活性化関数

In [236]:
# 活性化関数を定義
## 双曲線正接関数を定義
def tanh(x):
    return np.tanh(x)

## 双曲線正接関数の微分を定義
def tanh_deriv(x):
    return 1 - tanh(x)*tanh(x)

## softmax関数を定義
def softmax(x):
    exp_x = np.exp(x)
    return exp_x / np.sum(exp_x)
# , axis=1, keepdims=True

# パラメータ

In [237]:
# 学習率とイテレーション回数
alpha, iterations = (0.02, 300)

# 1画像あたりのピクセル数とラベルのクラス数
num_labels = 10

# バッチサイズ
batch_size = 2

# 入力画像のサイズ
_, input_rows, input_cols = x_train.shape

# カーネルに関するハイパーパラメータ
kernel_rows = 3
kernel_cols = 3
num_kernels = 16

# 隠れ層のサイズ
hidden_size = ((input_rows-kernel_rows+1) * (input_cols-kernel_cols+1)) * num_kernels

# カーネルの初期値

In [238]:
kernels = 0.02 * np.random.random((num_kernels, kernel_rows*kernel_cols)) - 0.01
weights_1_2 = 0.2 * np.random.random((num_labels, hidden_size)) - 0.01


# 順伝播の実装

## im2colの実装

In [239]:
def im2col(image, F_h, F_w):
    O_h = image.shape[0]-F_h+1
    O_w = image.shape[1]-F_w+1
    
    col = np.empty((F_h, F_w, O_h, O_w))

    for h in range(F_h):
        for w in range(F_w):
            col[h, w, :, :] = image[h : h+O_h, w : w+O_w]
    return col.reshape(F_h*F_w, O_h*O_w)


In [240]:
# im2colの挙動確認
im2col_ex = im2col(train_images[0], F_h=kernel_rows, F_w=kernel_cols)
print(im2col_ex)
print(im2col_ex.shape)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
(9, 676)


## 順伝播本体の実装

In [241]:
def cnn(image, kernels, weights_1_2):
    # im2colによる変換
    col_from_image = im2col(image, F_h=kernel_rows, F_w=kernel_cols)
    # 畳み込み演算
    cnn_output = np.dot(kernels, col_from_image)
    # 全結合層への入力に変換
    fcn_input = tanh(cnn_output.reshape(-1))
    # 全結合層の出力
    fcn_output = np.dot(weights_1_2, fcn_input)
    # 出力層の出力
    ol_output = softmax(fcn_output)
    # 最終出力
    return np.argmax(ol_output)

In [242]:
# Accuracyの計算
acc = 0
for image, label in zip(x_test, y_test):
    result = cnn(image=image, kernels=kernels, weights_1_2=weights_1_2)
    acc += int(result==label)

print("Accuracy:", acc / len(x_test))
    

Accuracy: 0.11


# 逆伝播の実装
推定対象は<code>kernels</code>と<code>weights_1_2</code>の二つ。順伝播では、im2colで得られた行列を$C$とすると、
- 畳み込み演算後の出力：$X^{(10)} = KC$。(ただし、$K$は<code>kernels</code>のこと)
- 本当はここでflattenを行うので、$X^{(10)}$は1次元のベクトルに変換される
- 活性化関数${\rm tanh}$をかました後の出力：$X^{(11)} = {\rm tanh}(X^{(10)})$
- 全結合層の出力：$X^{(20)} = WX^{(11)}$。(ただし、$W$は<code>weights_1_2</code>のこと)
- 出力層の出力：$X^{(21)} = {\rm softmax}(X^{(20)})$
としている。

In [243]:
def back_propagation_cnn(images, one_hot_labels, kernels, weights_1_2, epoch):
    for _ in range(epoch):    
        for image, one_hot_label in zip(images, one_hot_labels):
            # 順伝播
            ## im2colによる変換
            col_from_image = im2col(image, F_h=kernel_rows, F_w=kernel_cols)
            ## 畳み込み演算
            cnn_output = np.dot(kernels, col_from_image)
            ## flatten
            cnn_output_flatten = cnn_output.reshape(-1)
            ## 全結合層への入力に変換
            fcn_input = tanh(cnn_output_flatten)
            ## 全結合層の出力
            fcn_output = np.dot(weights_1_2, fcn_input)
            ## 出力層の出力
            ol_output = softmax(fcn_output)
            
            # 誤差逆伝播
            output_delta = ol_output.reshape(num_labels, 1)-one_hot_label.reshape(num_labels, 1)
            ## weights_1_2の更新
            weights_1_2 -= alpha * (output_delta @ fcn_input.reshape(1, -1))
            
            ## kernelsの更新
            kernels -= alpha * ((tanh_deriv(cnn_output_flatten).reshape(-1, 1) * (weights_1_2.T @ output_delta)).reshape(num_kernels, -1)) @ col_from_image.T
        
    return weights_1_2, kernels

In [244]:
weights_1_2, kernels = back_propagation_cnn(images=x_train, one_hot_labels=train_one_hot_labels, kernels=kernels, weights_1_2=weights_1_2, epoch=10)

In [245]:
print(weights_1_2)
print(kernels)

[[ 0.08583741  0.16724828  0.17759539 ...  0.17603894  0.062585
   0.14975771]
 [ 0.08561066  0.0342123   0.18663416 ...  0.11726033  0.06510679
   0.11530162]
 [-0.00802659  0.02092663  0.02626932 ...  0.14397447  0.22380602
   0.08967592]
 ...
 [ 0.18334943  0.02193146  0.04488193 ...  0.02303941  0.17015256
   0.13843577]
 [ 0.05298112  0.14503708  0.08422366 ...  0.11698517  0.00865164
   0.060528  ]
 [ 0.00776529  0.10268865  0.16790026 ... -0.02556005  0.00891432
   0.15737823]]
[[ -3.7411914   -3.59281368  -4.06373619  -2.83246732  -4.00637716
   -4.67249112  -2.94506348  -3.84016182  -3.550644  ]
 [  0.84532005   3.22897332   7.85466959   4.03610655   4.26769537
    6.39170617  -1.58854386   1.78082833   7.51795018]
 [ -0.69628231  -0.2684483   -0.59690376  -0.17160873  -0.12672878
   -0.36325533  -0.58716428  -0.79473547  -0.90045056]
 [  4.54850834   3.50868662  -5.1823603    3.21513507   4.81489121
   -0.14174708   5.14776272   7.30248662   3.27188131]
 [ -2.40689214  -4.202

In [246]:
# Accuracyの計算
acc = 0
for image, label in zip(x_test, y_test):
    result = cnn(image=image, kernels=kernels, weights_1_2=weights_1_2)
    acc += int(result==label)

print("Accuracy:", acc / len(x_test))

Accuracy: 0.83


# 精度向上の仕方を確認

In [247]:
# パラメータの初期値
kernels = 0.02 * np.random.random((num_kernels, kernel_rows*kernel_cols)) - 0.01
weights_1_2 = 0.2 * np.random.random((num_labels, hidden_size)) - 0.01

for epoch_num in range(100):
    weights_1_2, kernels = back_propagation_cnn(images=x_train, one_hot_labels=train_one_hot_labels, kernels=kernels, weights_1_2=weights_1_2, epoch=epoch_num)
    
    # Accuracyの計算
    acc = 0
    for image, label in zip(x_test, y_test):
        result = cnn(image=image, kernels=kernels, weights_1_2=weights_1_2)
        acc += int(result==label)

    print("Accuracy:", acc / len(x_test))

Accuracy: 0.08
Accuracy: 0.59
Accuracy: 0.84
Accuracy: 0.81
Accuracy: 0.86
Accuracy: 0.85
Accuracy: 0.84
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 0.89
Accuracy: 