In [2]:


import numpy as np

In [3]:
# 3층 신경망 구현하기
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def identity(x):
    return x


X = np.array([1.0, 0.5])
W1 = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]])
B1 = np.array([0.1, 0.2, 0.3])

# 단순 행렬 모양 출력
print(W1.shape)
print(X.shape)
print(B1.shape)

# 행렬 곱 연산
A1 = np.dot(X, W1) + B1
print(A1.shape)
print(A1)

# 시그모이드 함수 적용
Z1 = sigmoid(A1)

print(A1)
print(Z1)

W2 = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]])
B2 = np.array([0.1, 0.2])

print(Z1.shape)
print(W2.shape)
print(B2.shape)

A2 = np.dot(Z1, W2) + B2
Z2 = sigmoid(A2)

W3 = np.array([[0.1, 0.3], [0.2, 0.4]])
B3 = np.array([0.1, 0.2])
A3 = np.dot(Z2, W3) + B3
Y = identity(A3)

print(A3)

(2, 3)
(2,)
(3,)
(3,)
[0.3 0.7 1.1]
[0.3 0.7 1.1]
[0.57444252 0.66818777 0.75026011]
(3,)
(3, 2)
(2,)
[0.31682708 0.69627909]


In [4]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def identity(x):
    return x


def init_network():
    network = {
        "W1": np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]]),
        "b1": np.array([0.1, 0.2, 0.3]),
        "W2": np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]]),
        "b2": np.array([0.1, 0.2]),
        "W3": np.array([[0.1, 0.3], [0.2, 0.4]]),
        "b3": np.array([0.1, 0.2])
    }
    return network


def forward(network, x):
    W1, W2, W3 = network["W1"], network["W2"], network["W3"]
    b1, b2, b3 = network["b1"], network["b2"], network["b3"]

    a1 = np.dot(x, W1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    z2 = sigmoid(a2)
    a3 = np.dot(z2, W3) + b3
    y = identity(a3)
    return y


# 가중치와 편향을 생성
network = init_network()
x = np.array([1.0, 0.5])
# 입력 신호를 출력으로 변환하는 처리 과정 구현
y = forward(network, x)
print(y)

[0.31682708 0.69627909]


In [5]:
# 소프트 맥스 함수 구현
# softmax 함수는 배열을 입력 값으로 넣으면 전체 배열을 확률 분포의 형태로 변환해주고 확률분포의 총합을 무조건 1로 만들어준다
a = np.array([0.3, 2.9, 4.0])

exp_a = np.exp(a)
print(exp_a)

sum_exp_a = np.sum(exp_a)
print(sum_exp_a)

y = exp_a / sum_exp_a
print(y)


def softmax(a):
    b = np.max(x)
    tmp = np.exp(a - b)
    return tmp / np.sum(tmp)


print(softmax(a))

[ 1.34985881 18.17414537 54.59815003]
74.1221542101633
[0.01821127 0.24519181 0.73659691]
[0.01821127 0.24519181 0.73659691]


In [6]:
# mnist image set base forward propagation
import sys, os
from dataset.mnist import (load_mnist)
import numpy as np
from PIL import Image

sys.path.append(os.path.curdir)


def image_show(image):
    pil_image = Image.fromarray(np.uint8(image))
    pil_image.show()


(x_train, y_train), (x_test, y_test) = load_mnist(flatten=True, normalize=False)

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

image = x_train[0]
label = y_train[0]
print(label)

print(image.shape)
image = image.reshape(28, 28)
print(image.shape)
image_show(image)


(60000, 784)
(60000,)
(10000, 784)
(10000,)
5
(784,)
(28, 28)


In [8]:
import pickle
from pathlib import Path
from dataset.mnist import (load_mnist)
from tqdm import tqdm

current_dir = Path().resolve()
sys.path.append(str(current_dir.parent))


def get_data():
    (x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=True, one_hot_label=False)
    return x_test, t_test


def init_network():
    with open("sample_weight.pkl", "rb") as f:
        network = pickle.load(f)
    return network


def predict(network, x):
    w1, w2, w3 = network["W1"], network["W2"], network["W3"]
    b1, b2, b3 = network["b1"], network["b2"], network["b3"]

    a1 = np.dot(x, w1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, w2) + b2
    z2 = sigmoid(a2)
    a3 = np.dot(z2, w3) + b3
    return softmax(a3)


x, t = get_data()
network = init_network()

accuracy_cnt = 0
for i in tqdm(range(len(x))):
    # print(f"progress : {i}/{len(x)}")

    image = x[i]
    expected_label = t[i]

    y = predict(network, image)
    p = np.argmax(y)
    if p == expected_label:
        accuracy_cnt += 1
print(f"정확도 : {accuracy_cnt / len(x)}")


100%|██████████| 10000/10000 [00:10<00:00, 978.71it/s]

정확도 : 0.9352





In [9]:
# batch size 단위로 예측하여 성능을 향상시키는 실습
x, t = get_data()
network = init_network()

batch_size = 1000
accuracy_cnt = 0

for i in tqdm(range(0, len(x), batch_size)):
    x_batch = x[i:i + batch_size]
    y_batch = predict(network, x_batch)
    p = np.argmax(y_batch, axis=1) # 입력된 2D 행렬에서 각 행마다 가장 큰 값을 가지는 index 값을 반환
    accuracy_cnt += np.sum(p == t[i:i + batch_size])

100%|██████████| 10/10 [00:00<00:00, 157.14it/s]
