## 最終課題(irisデータのニューラルネットワーク構築)

In [1]:
# 必要なライブラリのインポート
import sys, os
sys.path.append(os.pardir)  # 親ディレクトリのファイルをインポートするための設定

from common import functions
from common import optimizer
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split

# データセットのロード
# iris.data = [(がく片の長さ , がく片の幅 , 花びらの長さ , 花びらの幅)]
iris = datasets.load_iris()

x_vals = np.array([x[0:3] for x in iris.data])
y_vals = np.array([x[3] for x in iris.data])

# トレーニングデータ（80％）とテストデータ（20％）に分割
x_train, x_test, y_train, y_test = train_test_split(x_vals, y_vals, test_size=0.2, shuffle=True)

In [None]:
# 学習率
learning_rate = 0.05

# 重み補正係数
weight_init = 0.01

# 入力層
input_layer_size = 3

# 中間層
hidden_layer_size1 = 40
hidden_layer_size2 = 20

# 出力層
output_layer_size = 1

# 学習回数(1万回)
learning_num = 10000

# 描写頻度
plot_interval=10

# ミニバッチサイズ
batch_size = 100

train_size = len(x_train)

# ネットワークの初期化を実施
def init_network():
    network = {}
    
    # 重みの設定
    # 通常設定
    network['W1'] = weight_init * np.random.randn(input_layer_size, hidden_layer_size1)
    network['W2'] = weight_init * np.random.randn(hidden_layer_size1, hidden_layer_size2)
    network['W3'] = weight_init * np.random.randn(hidden_layer_size2, output_layer_size)

#     # Xavierでの設定
#     network['W1'] = np.random.randn(input_layer_size, hidden_layer_size1) / (np.sqrt(input_layer_size))
#     network['W2'] = np.random.randn(hidden_layer_size1, hidden_layer_size2) / (np.sqrt(hidden_layer_size1))
#     network['W3'] = np.random.randn(hidden_layer_size2, output_layer_size) / (np.sqrt(hidden_layer_size2))
    
#     # Heでの設定
#     network['W1'] = np.random.randn(input_layer_size, hidden_layer_size1) / (np.sqrt(input_layer_size)) * np.sqrt(2)
#     network['W2'] = np.random.randn(hidden_layer_size1, hidden_layer_size2) / (np.sqrt(hidden_layer_size1)) * np.sqrt(2)
#     network['W3'] = np.random.randn(hidden_layer_size2, output_layer_size) / (np.sqrt(hidden_layer_size2)) * np.sqrt(2)
    
    # バイアスの設定
    network['b1'] = np.zeros(hidden_layer_size1)
    network['b2'] = np.zeros(hidden_layer_size2)
    network['b3'] = np.zeros(output_layer_size)
    
    return network
    
# 順伝播
def forward(network, x):
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']
    
    # 勾配
    u1 = np.dot(x, W1) + b1
    # 活性化関数 Relu関数を使用
    z1 = functions.relu(u1)
    # 勾配
    u2 = np.dot(z1, W2) + b2
    # 活性化関数 Relu関数を使用
    z2 = functions.relu(u2)
    # 勾配
    u3 = np.dot(z2, W3) + b3
    # 誤差関数(softmax関数)
    y = functions.softmax(u3)
    
    return z1, z2, y

# 逆伝播
def backward(x, d, z1, z2, y):
    grad = {}
    
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']
    
    # 出力層でのデルタ
    delta3 = functions.d_softmax_with_loss(d, y)
    # b3の勾配
    grad['b3'] = np.sum(delta3, axis=0)
    # W3の勾配
    grad['W3'] = np.dot(z2.T, delta3)
    # 活性化関数の導関数 Relu関数
    delta2 = np.dot(delta3, W3.T) * functions.d_relu(z2)
    # b2の勾配
    grad['b2'] = np.sum(delta2, axis=0)
    # W2の勾配
    grad['W2'] = np.dot(z1.T, delta2)
    # 活性化関数の導関数 Relu関数
    delta1 = np.dot(delta2, W2.T) * functions.d_relu(z1)
    # b1の勾配
    grad['b1'] = np.sum(delta1, axis=0)
    # W1の勾配
    grad['W1'] = np.dot(x.T, delta1)
    
    return grad

# 正答率
def accuracy(x, d):
    z1, z2, y = forward(network, x)
    y = np.argmax(y, axis=1)
    if d.ndim != 1 : d = np.argmax(d, axis=1)
    accuracy = np.sum(y == d) / float(x.shape[0])
    return accuracy

# パラメータの初期化
network = init_network()

for i in range(learning_num):
    # ランダムにバッチを取得    
    batch_mask = np.random.choice(train_size, batch_size)
    # ミニバッチに対応する教師訓練画像データを取得    
    x_batch = x_train[batch_mask]
    # ミニバッチに対応する訓練正解ラベルデータを取得する
    y_batch = y_train[batch_mask]
    y_batch = y_batch[:, np.newaxis]
    
    z1, z2, y = forward(network, x_batch)
    grad = backward(x_batch, y_batch, z1, z2, y)




[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
[[0.         0.         0.         ... 0.00035606 0.         0.00218072]
 [0.         0.         0.00098981 ... 0.         0.         0.0013112 ]
 [0.         0.         0.00090963 ... 0.         0.         0.00100823]
 ...
 [0.         0.         0.0004921  ... 0.         0.         0.00123979]
 [0.         0.         0.00093584 ... 0.         0.         0.00067375]
 [0.         0.         0.00123268 ... 0.         0.         0.00107148]]
[[1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.