In [None]:
import numpy as np

In [None]:
def sigmoid(x):
  return 1 / (1 + np.exp(-x))

In [None]:
class NeuralNetwork:
  def __init__(self, input_nodes, hidden_nodes, output_nodes, learning_rate):
    self.input_nodes = input_nodes
    self.hidden_nodes = hidden_nodes
    self.output_nodes = output_nodes
    self.learning_rate = learning_rate

    self.W1 = np.random.randn(self.input_nodes, self.hidden_nodes) / np.sqrt(self.input_nodes) # Xavier initialization
    self.b1 = np.random.rand(1, self.hidden_nodes)

    self.W2 = np.random.randn(self.hidden_nodes, self.output_nodes) / np.sqrt(self.hidden_nodes) # Xavier initialization
    self.b2 = np.random.rand(1, self.output_nodes)

    self.Z1 = np.zeros([1, input_nodes])  
    self.O1 = np.zeros([1, input_nodes])

    self.Z2 = np.zeros([1, self.hidden_nodes])
    self.O2 = np.zeros([1, self.hidden_nodes])

    self.Z3 = np.zeros([1, self.output_nodes])
    self.O3 = np.zeros([1, self.output_nodes])


  def feed_forward(self):
    delta = 1e-7  # log 무한대 발산 방지

    self.Z1 = self.input_data # input 값 그대로
    self.O1 = self.input_data # (1,784)

    self.Z2 = np.dot(self.O1, self.W1) + self.b1
    self.O2 = sigmoid(self.Z2) # (1,100)

    self.Z3 = np.dot(self.O2, self.W2) + self.b2
    self.O3 = sigmoid(self.Z3) # (1,10)
    h = self.O3

    self.loss = -np.mean(self.y * np.log(h + delta) + (1 - self.y) * np.log(1 - h + delta))  # log 무한대 발산 방지를 위해 delta 더해주기


  def back_propagation(self):
    loss_3 = (self.O3 - self.y) * self.O3 * (1 - self.O3)
    self.W2 -= np.dot(self.O2.T, loss_3) * self.learning_rate
    self.b2 -= loss_3 * self.learning_rate

    loss_2 = np.dot(loss_3, self.W2.T) * self.O2 * (1 - self.O2)
    self.W1 -= np.dot(self.O1.T, loss_2) * self.learning_rate
    self.b1 -= loss_2 * self.learning_rate


  def train(self, input_data, target_data):
    self.input_data = input_data
    self.y = target_data

    self.feed_forward()
    
    self.back_propagation()


  def predict(self, input_data_list, target_data_list):
    total_num = target_data_list.shape[0]
    answer_num = 0

    for idx in range(len(target_data_list)):
      input_data = input_data_list[idx] / 255.0 * 0.99 + 0.01
      self.input_data = np.array(input_data, ndmin=2)

      target_value = target_data_list[idx]

      self.feed_forward()

      predict_value = np.argmax(self.O3)

      if predict_value == target_value:
        answer_num += 1

    print('Accuracy: ', answer_num / total_num)
    

In [None]:
input_nodes = 784
hidden_nodes = 100
output_nodes = 10
learning_rate = 0.3

# 신경망 객체 생성
model = NeuralNetwork(input_nodes, hidden_nodes, output_nodes, learning_rate)

In [None]:
# Train

epochs = 5

train_data_list = np.loadtxt('./mnist_train.csv', delimiter=',', dtype=np.float32)

for epoch in range(epochs):
  for step, train_data in enumerate(train_data_list):
    input_data = train_data[1:] / 255.0 * 0.99 + 0.01  # 0이면 가중치 업데이트가 중지되므로 0.01~1로 만듦

    target_data = np.zeros(output_nodes) + 0.01
    target_data[int(train_data[0])] = 0.99

    model.train(np.array(input_data, ndmin=2), np.array(target_data, ndmin=2))

    if step % 1000 == 0:
      print('Epoch: {:2d}  Step: {:7d}  Loss: {:10f}'.format(epoch, step, model.loss))

Epoch:  0  Step:       0  Loss:   0.912552
Epoch:  0  Step:    1000  Loss:   0.205158
Epoch:  0  Step:    2000  Loss:   0.479460
Epoch:  0  Step:    3000  Loss:   0.246177
Epoch:  0  Step:    4000  Loss:   0.098203
Epoch:  0  Step:    5000  Loss:   0.120299
Epoch:  0  Step:    6000  Loss:   0.082670
Epoch:  0  Step:    7000  Loss:   0.361919
Epoch:  0  Step:    8000  Loss:   0.099397
Epoch:  0  Step:    9000  Loss:   0.094805
Epoch:  0  Step:   10000  Loss:   0.096559
Epoch:  0  Step:   11000  Loss:   0.101311
Epoch:  0  Step:   12000  Loss:   0.090518
Epoch:  0  Step:   13000  Loss:   0.149562
Epoch:  0  Step:   14000  Loss:   0.093545
Epoch:  0  Step:   15000  Loss:   0.091422
Epoch:  0  Step:   16000  Loss:   0.095754
Epoch:  0  Step:   17000  Loss:   0.110818
Epoch:  0  Step:   18000  Loss:   0.098914
Epoch:  0  Step:   19000  Loss:   0.244522
Epoch:  0  Step:   20000  Loss:   0.096035
Epoch:  0  Step:   21000  Loss:   0.230054
Epoch:  0  Step:   22000  Loss:   0.106706
Epoch:  0  

In [None]:
# Test

test_data_list = np.loadtxt('./mnist_test.csv', delimiter=',', dtype=np.float32)

input_data = test_data_list[:, 1:]
target_data = test_data_list[:, 0]

model.predict(input_data, target_data)

Accuracy:  0.9623


In [None]:
# Accuracy 비교

# 가중치 초기화 방법 다르게
# Xavier: 0.9654
# random: 0.8543
# zero: 0.9394

# bias 업데이트 식 다르게
# loss_3 = (h-y)*y*(1-y) : 0.9672
# loss_3 = h-y : 0.8582