# 3章 ニューラルネットワーク

## 3.1 パーセプトロンからニューラルネットワークへ

### 3.1.1 ニューラルネットワークの例



###3.1.2 パーセプトロンの復習

###3.1.3 活性化関数の登場

## 3.2 活性化関数

###3.2.1 シグモイド関数

###3.2.2 ステップ関数の実装

>単純な実装

In [None]:
def step_function(x):
    if x > 0:
        return 1
    else:
        return 0

# test
x = 0.1
print(step_function(x))

>NumPy配列入力に対応した実装

In [None]:
import numpy as np
def step_function(x):
    y = x > 0
    return y.astype(np.int)

#test
x = np.array([-1,-0,0.1,2])
print(step_function(x))

>NumPy配列入力に対応した実装の詳細説明コード

In [None]:
import numpy as np
#explanation of detail
x = np.array([-1.0,1.0,2.0])
print(x)
y = x > 0
print(y)
y = y.astype(np.int)
print(y)

###3.2.3 ステップ関数のグラフ

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def step_function(x):
    return np.array(x > 0, dtype=np.int)

x = np.arange(-5.0, 5.0, 0.1)
y = step_function(x)
plt.plot(x,y)
plt.ylim(-0.1,1.1)
plt.show()

###3.2.4 シグモイド関数の実装

>シグモイド関数の実装

In [None]:
import numpy as np
import matplotlib.pylab as plt
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# test
x = np.array([-1.0, 1.0, 2.0])
print(sigmoid(x))

>NumPyのブロードキャスト機能の説明コード

In [None]:
import numpy as np
# explanation of broadcasting
t = np.array([1.0, 2.0, 3.0])
print(1.0+t)
print(1.0/t)

[2. 3. 4.]
[1.         0.5        0.33333333]


> シグモイド関数のグラフ

In [None]:
import numpy as np
import matplotlib.pylab as plt
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# draw graph
x = np.arange(-5.0, 5.0, 0.1)
y = sigmoid(x)
plt.plot(x,y)
plt.ylim(-0.1, 1.1)
plt.show()

###3.2.5 シグモイド関数とステップ関数の比較

###3.2.6 非線形関数

###3.2.7 ReLU関数

In [None]:
import numpy as np
def relu(x):
    return np.maximum(0,x)

# test
x = np.array([-1.0, 1.0, 2.0])
print(relu(x))

##3.3 多次元配列の計算

###3.3.1 多次元配列

In [None]:
import numpy as np
#1D
A = np.array([1, 2, 3, 4])
print(A)
print(np.ndim(A))
print(A.shape)
print(A.shape[0])

#2D
B = np.array([[1,2],[3,4],[5,6]])
print(B)
print(np.ndim(B))
print(B.shape)

###3.3.2 行列の積

>行列の積
$C=AB$
の各要素$c_{ij}$は，$A$の各要素を$a_{ij}$，$B$の各要素を$b_{ij}$，
$n$を$A$の列数（＝$B$の行数 [でなければならない]）として，また行番号・列番号は1始まりとして，一つ目の添え字を行，二つ目の添え字を列とすると，
$$c_{ij}=\sum_{k=1}^{n} a_{ik}b_{kj}$$
である。    
$A$を$l\times m$，$B$を$m\times n$行列とすれば，$C$は$l\times n$行列である。

>2×2行列と2×2行列の積

In [None]:
import numpy as np

#2×2 vs 2×2
A = np.array([[1, 2], [3, 4]])
print(A.shape)
B = np.array([[5, 6], [7, 8]])
print(B.shape)
print(np.dot(A,B))

>2×3行列と3×2行列の積

In [None]:
import numpy as np

#2×3 vs 3×2
A = np.array([[1,2,3],[4,5,6]])
print(A.shape)
B = np.array([[1,2], [3,4], [5,6]])
print(B.shape)
print(np.dot(A,B))

>[エラー]2×3行列と2×2行列の積

In [None]:
import numpy as np

#Error:2×3 vs 2×2
A = np.array([[1,2,3],[4,5,6]])
print(A.shape)
C = np.array([[1,2], [3,4]])
print(C.shape)
print(np.dot(A,C))

>3×2行列と2×1行列（ベクトル）の積

In [None]:
import numpy as np

#3×2 vs 2×1
A = np.array([[1,2],[3,4],[5,6]])
print(A.shape)
B = np.array([7,8])
print(B.shape)
print(np.dot(A,B))

###3.3.3 ニューラルネットワークの行列の積

In [None]:
import numpy as np

X = np.array([1,2])
print(X.shape)
W = np.array([[1,3,5],[2,4,6]])
print(W)
print(W.shape)
Y = np.dot(X,W)
print(Y)

##3.4 ３層ニューラルネットワークの実装

###3.4.1 記号の確認

###3.4.2 各層における信号伝達の実装

In [None]:
import numpy as np
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def identity_function(x):
    return x

# layer0 to layer1
X = np.array([1.0,0.5])
W1 = np.array([[0.1,0.3,0.5],[0.2,0.4,0.6]])
B1 = np.array([0.1, 0.2, 0.3])
print(W1.shape)
print(X.shape)
print(B1.shape)
A1 = np.dot(X,W1) + B1
Z1 = sigmoid(A1)
print(A1)
print(Z1)

#layer1 to layer2
W2 = np.array([[0.1,0.4],[0.2,0.5],[0.3,0.6]])
B2 = np.array([0.1,0.2])
print(Z1.shape)
print(W2.shape)
print(B2.shape)
A2 = np.dot(Z1,W2) + B2
Z2 = sigmoid(A2)
print(A2)
print(Z2)

#layer2 to layer3 (output)
W3 = np.array([[0.1, 0.3], [0.2, 0.4]])
B3 = np.array([0.1, 0.2])
A3 = np.dot(Z2, W3) + B3
Y = identity_function(A3)

print(Y)

###3.4.3 実装のまとめ

In [None]:
import numpy as np
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def identity_function(x):
    return x

def init_network():
    network = {}
    network['W1'] = np.array([[0.1, 0.3, 0.5],[0.2, 0.4, 0.6]])
    network['b1'] = np.array( [0.1, 0.2, 0.3])
    network['W2'] = np.array([[0.1, 0.4],[0.2, 0.5],[0.3, 0.6]])
    network['b2'] = np.array( [0.1, 0.2])
    network['W3'] = np.array([[0.1, 0.3], [0.2, 0.4]])
    network['b3'] = np.array( [0.1, 0.2])
    return network

def forward(network, x):
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']
    a1 = np.dot(x , W1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    z2 = sigmoid(a2)
    a3 = np.dot(z2, W3) + b3
    y = identity_function(a3)
    return y

# execute
network = init_network()
x = np.array([1.0, 0.5])
y = forward(network, x)
print(y)

##3.5 出力層の設計

###3.5.1 恒等関数とソフトマックス関数

>説明用コード

In [None]:
import numpy as np

#explanation
a = np.array([0.3, 2.9, 4.0])
exp_a = np.exp(a)
print(exp_a)
sum_exp_a = np.sum(exp_a)
print(sum_exp_a)

y = exp_a / sum_exp_a
print(y)

>ソフトマックス関数の実装

In [None]:
import numpy as np

# softmax function
def softmax(a):
    exp_a = np.exp(a)
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a
    return y

###3.5.2 ソフトマックス関数の実装上の注意

>説明用コード

In [43]:
import numpy as np

#explanation
a = np.array([1010, 1000, 990])
exp_a = np.exp(a)
print(exp_a)
c = np.max(a)
print(a-c)
print(np.exp(a-c)/np.sum(np.exp(a-c)))

[inf inf inf]
[  0 -10 -20]
[9.99954600e-01 4.53978686e-05 2.06106005e-09]


  """


>ソフトマックス関数の実装（改善）

In [None]:
import numpy as np

# softmax function improved
def softmax(a):
    c = np.max(a)             #improved
    exp_a = np.exp(a-c)       #improved
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a
    return y

###3.5.3 ソフトマックス関数の特徴

In [None]:
import numpy as np

# softmax function improved
def softmax(a):
    c = np.max(a)             #improved
    exp_a = np.exp(a-c)       #improved
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a
    return y

a = np.array([0.3, 2.9, 4.0])
y = softmax(a)
print(y)
print(np.sum(y))

###3.5.4 出力層のニューロンの数

##3.6 手書き数字認識

###3.6.1 MNISTデータセット

>MNISTデータセットの読み込み

In [36]:
######################################
#google driveのマウント
from google.colab import drive
drive.mount('/content/drive')
#パスを通す
import sys
target_dir_path = 'drive/My Drive/Colab Notebooks/deep_learning_from_scratch/chapter03/'
sys.path.append(target_dir_path)
######################################

import sys, os
sys.path.append(os.pardir)
from dataset.mnist import load_mnist

(x_train, t_train), (x_test, t_test) = \
    load_mnist(flatten = True, normalize = False)

print(x_train.shape)
print(t_train.shape)
print(x_test.shape)
print(t_test.shape)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
(60000, 784)
(60000,)
(10000, 784)
(10000,)


>MNISTデータセット画像の表示

In [35]:
######################################
#google driveのマウント
from google.colab import drive
drive.mount('/content/drive')
#パスを通す
import sys
target_dir_path = 'drive/My Drive/Colab Notebooks/deep_learning_from_scratch/chapter03/'
sys.path.append(target_dir_path)
######################################

import sys, os
sys.path.append(os.pardir)
import numpy as np
from dataset.mnist import load_mnist
from PIL import Image

def img_show(img):
    pil_image = Image.fromarray(np.uint8(img))
    pil_image.show()

(x_train, t_train), (x_test, t_test) = \
    load_mnist(flatten = True, normalize = False)

img = x_train[0]
label = t_train[0]
print(label)

print(img.shape)

img = img.reshape(28,28)
print(img.shape)

img_show(img)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
5
(784,)
(28, 28)


###3.6.2 ニューラルネットワークの推論処理

In [39]:
######################################
#google driveのマウント
from google.colab import drive
drive.mount('/content/drive')
#パスを通す
import sys
target_dir_path = 'drive/My Drive/Colab Notebooks/deep_learning_from_scratch/chapter03/'
sys.path.append(target_dir_path)
######################################

import sys, os
sys.path.append(os.pardir)
import numpy as np
from dataset.mnist import load_mnist
from PIL import Image
import pickle

#sigmoid function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# softmax function improved
def softmax(a):
    c = np.max(a)             #improved
    exp_a = np.exp(a-c)       #improved
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a
    return y

def get_data():
    (x_train, t_train), (x_test, t_test) = \
        load_mnist(normalize = True, flatten = True, one_hot_label=False)
    return x_test, t_test

def init_network():
    #with open("sample_weight.pkl", 'rb') as f:
    with open(target_dir_path + "sample_weight.pkl", 'rb') as f:
        network = pickle.load(f)
    return network

def predict(network, x):
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']
    a1 = np.dot(x, W1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    z2 = sigmoid(a2)
    a3 = np.dot(z2, W3) + b3
    y = softmax(a3)
    return y

x, t = get_data()
network = init_network()

accuracy_cnt = 0
for i in range(len(x)):
    y = predict(network, x[i])
    p = np.argmax(y)
    if p == t[i] :
        accuracy_cnt += 1

print("Accuracy:" + str(float(accuracy_cnt)/len(x)))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Accuracy:0.9352


###3.6.3 バッチ処理

In [41]:
######################################
#google driveのマウント
from google.colab import drive
drive.mount('/content/drive')
#パスを通す
import sys
target_dir_path = 'drive/My Drive/Colab Notebooks/deep_learning_from_scratch/chapter03/'
sys.path.append(target_dir_path)
######################################

import sys, os
sys.path.append(os.pardir)
import numpy as np
from dataset.mnist import load_mnist
from PIL import Image
import pickle

#sigmoid function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# softmax function improved
def softmax(a):
    c = np.max(a)             #improved
    exp_a = np.exp(a-c)       #improved
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a
    return y

def get_data():
    (x_train, t_train), (x_test, t_test) = \
        load_mnist(normalize = True, flatten = True, one_hot_label=False)
    return x_test, t_test

def init_network():
    #with open("sample_weight.pkl", 'rb') as f:
    with open(target_dir_path + "sample_weight.pkl", 'rb') as f:
        network = pickle.load(f)
    return network

def predict(network, x):
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']
    a1 = np.dot(x, W1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    z2 = sigmoid(a2)
    a3 = np.dot(z2, W3) + b3
    y = softmax(a3)
    return y

x, t = get_data()
network = init_network()
W1, W2, W3 = network['W1'],network['W2'],network['W3']
print(x[0].shape)
print(W1.shape)
print(W2.shape)
print(W3.shape)

#batch
batch_size = 100
accuracy_cnt = 0

for i in range(0,len(x),batch_size):
    x_batch = x[i:i+batch_size]
    y_batch = predict(network, x_batch)
    p=np.argmax(y_batch, axis=1)
    accuracy_cnt+= np.sum(p==t[i:i+batch_size])
print("Accuracy:" + str(float(accuracy_cnt)/len(x)))


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
(784,)
(784, 50)
(50, 100)
(100, 10)
Accuracy:0.9352


>説明用コード1

In [None]:
import numpy as np
#explanation 1
print(list(range(0,10)))
print(list(range(0,10,3)))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 3, 6, 9]


>説明用コード2

In [None]:
import numpy as np
#explanation 2
x = np.array([[0.1, 0.8, 0.1], [0.3, 0.1, 0.6], [0.2, 0.5, 0.3], [0.8,0.1,0.1]])
y = np.argmax(x,axis=1)
print(y)
for i in range(0,4):
    print (x[i][y[i]])

[1 2 1 0]
0.8
0.6
0.5
0.8


>説明用コード3

In [42]:
import numpy as np
#explanation 3
y = np.array([1,2,1,0])
t = np.array([1,2,0,0])
print(y==t)
print(np.sum(y==t))

[ True  True False  True]
3


##3.7 まとめ