In [1]:
# import
import numpy as np
from tensorflow.keras import datasets
import sys




In [2]:
# データの読み込み
mnist = datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# 時間かかるので最初の100枚だけ取得
x_train = x_train[0:10, :, :]
y_train = y_train[0:10]
x_test = x_test[0:10, :, :]
y_test = y_test[0:10]

In [3]:
# データの整形
## 特徴量をテーブルデータに変換, 正解ラベルを取得
images, labels = (x_train.reshape(x_train.shape[0], 28*28) / 255, y_train)

## 正解ラベルをone-hot-encodingに変換
one_hot_labels = np.zeros((len(labels), 10))
for i, l in enumerate(labels):
    one_hot_labels[i][l] = 1

labels = one_hot_labels

# 正解ラベルをテーブルデータに変換
test_images = x_test.reshape(len(x_test), 28*28) / 255

## テストデータの正解ラベルをone-hot-encodingに変換
test_labels_one_hot = np.zeros((len(y_test), 10))
for i, l in enumerate(y_test):
    test_labels_one_hot[i][l] = 1

test_labels = test_labels_one_hot

# 活性化関数を定義
## 双曲線正接関数を定義
def tanh(x):
    return np.tanh(x)

## 双曲線正接関数の微分を定義
def tanh_deriv(output):
    return 1 - output**2

## softmax関数を定義
def softmax(x):
    exp_x = np.exp(x)
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

# random seed
np.random.seed(1)

# 学習率とイテレーション回数
alpha, iterations = (0.02, 300)

# 1画像あたりのピクセル数とラベルのクラス数
pixels_per_image, num_labels = (784, 10)

# バッチサイズ
batch_size = 2

# 入力画像のサイズ
_, input_rows, input_cols = x_train.shape

# カーネルに関するハイパーパラメータ
kernel_rows = 3
kernel_cols = 3
num_kernels = 16

# 隠れ層のサイズ
hidden_size = ((input_rows-kernel_rows) * (input_cols-kernel_cols)) * num_kernels

# カーネルの初期値
kernels = 0.02 * np.random.random((kernel_rows*kernel_cols, num_kernels)) - 0.01
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

# 画像の一部を取り出し、二次元のベクトルに変換するメソッド
def get_image_section(layer, row_from, row_to, col_from, col_to):
    section = layer[:, row_from:row_to, col_from:col_to]
    return section.reshape(-1, 1, row_to-row_from, col_to-col_from)

for j in range(iterations):
    train_acc = 0
    for i in range(int(len(images) / batch_size)):
        batch_start, batch_end = i*batch_size, (i+1)*batch_size
        layer_0 = images[batch_start:batch_end]
        layer_0 = layer_0.reshape(layer_0.shape[0], 28, 28)
        
        sects = []
        for row_start in range(layer_0.shape[1]-kernel_rows):
            for col_start in range(layer_0.shape[2]-kernel_cols):
                sect = get_image_section(layer_0,
                                         row_start, row_start+kernel_rows,
                                         col_start, col_start+kernel_cols)
                sects.append(sect)
                
        expanded_input = np.concatenate(sects, axis=1)
        es = expanded_input.shape
        flattened_input = expanded_input.reshape(es[0]*es[1], kernel_rows*kernel_cols)
        
        kernel_output = flattened_input.dot(kernels)
        layer_1 = tanh(kernel_output.reshape(es[0], -1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask*2
        layer_2 = softmax(np.dot(layer_1, weights_1_2))
        
        for k in range(batch_size):
            labelset = labels[batch_start+k:batch_start+k+1]
            _inc = int(np.argmax(layer_2[k:k+1]) == np.argmax(labelset))
            train_acc += _inc
            
        layer_2_delta = (labels[batch_start:batch_end]-layer_2) / batch_size
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * tanh_deriv(layer_1)
        layer_1_delta *= dropout_mask
        weights_1_2 += alpha*layer_1.T.dot(layer_2_delta)
        l1d_reshape = layer_1_delta.reshape(kernel_output.shape)
        k_update = flattened_input.T.dot(l1d_reshape)
        kernels -= alpha*k_update
        
    test_acc = 0
    
    for i in range(len(test_images)):
        layer_0 = test_images[i:i+1]
        layer_0 = layer_0.reshape(layer_0.shape[0], 28, 28)
        layer_0.shape
        
        sects = list()
        for row_start in range(layer_0.shape[1]-kernel_rows):
            for col_start in range(layer_0.shape[2]-kernel_cols):
                sect = get_image_section(layer_0,
                                         row_start, row_start+kernel_rows,
                                         col_start, col_start+kernel_cols)
                
                sects.append(sect)
                
        expanded_input = np.concatenate(sects, axis=1)
        
        es = expanded_input.shape
        flattened_input = expanded_input.reshape(es[0]*es[1], -1)

        
        kernel_output = flattened_input.dot(kernels)
        layer_1 = tanh(kernel_output.reshape(es[0], -1))
        layer_2 = np.dot(layer_1, weights_1_2)
        
        test_acc += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        
    if(j%1 == 0):
        sys.stdout.write(
            "\n" + \
            "I:" + str(j) + \
            " Train-Acc:" + str(train_acc/float(len(images))) + \
            " Test-Acc:" + str(test_acc/float(len(test_images))))
        
        
        
        
         
                                                 
        
        
        
        
                
                
                
                
        









[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]

I:0 Train-Acc:0.1 Test-Acc:0.1
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]

[[0. 0. 0. ... 0. 0. 0.]
 

In [8]:
kernel_output.shape

(625, 16)

In [9]:
layer_1.shape

(1, 10000)

In [10]:
625*16

10000