In [7]:
import numpy as np

def sigmoid(x):
  return 1 / (1 + np.exp(-x))
def numerical_derivative(f, x):
  delta_x = 1e-4
  grad = np.zeros_like(x)
  it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
  while not it.finished:
    idx = it.multi_index
    tmp_val = x[idx]
    x[idx] = float(tmp_val) + delta_x
    fx1 = f(x)
    x[idx] = float(tmp_val) - delta_x
    fx2 = f(x)
    if np.isscalar(fx1):
      grad[idx] = (fx1 - fx2) / (2 * delta_x)
    else:
      grad[idx] = (fx1[idx] - fx2[idx]) / (2 * delta_x)
    x[idx] = tmp_val
    it.iternext()
  return grad

In [20]:
class MNIST_DL:
  def __init__(self, input_nodes, hidden_nodes, output_nodes, learning_rate):
    self.__input_nodes = input_nodes
    self.__hidden_nodes = hidden_nodes
    self.__output_nodes = output_nodes
    
    self.__W2 = np.random.rand(input_nodes, hidden_nodes)
    self.__b2 = np.random.rand(hidden_nodes)
    self.__W3 = np.random.rand(hidden_nodes, output_nodes)
    self.__b3 = np.random.rand(output_nodes)
    
    self.__learning_rate = learning_rate
    
  def __feed_forward(self):
    delta = 1e-7
    
    z2 = np.dot(self.__input_nodes ,self.__W2) + self.__b2
    a2 = sigmoid(z2)
    z3 = np.dot(a2, self.__W3) + self.__b3
    a3 = sigmoid(z3)
    
    return -np.sum(self.__target_data * np.log(a3 + delta) + 
                   (1 - self.__target_data) * np.log((1 - a3) + delta))
  
  def loss_val(self):
    delta = 1e-7
    
    z2 = np.dot(self.__input_nodes, self.__W2) + self.__b2
    a2 = sigmoid(z2)
    z3 = np.dot(a2, self.__W3) + self.__b3
    a3 = sigmoid(z3)
    
    return -np.sum(self.__target_data * np.log(a3 + delta) + 
                   (1 - self.__target_data) * np.log((1 - a3) + delta))
    
  def train(self, training_data):
    self.__target_data = np.zeros(self.__output_nodes) + 0.01
    self.__target_data[int(training_data[0])] = 0.99
    self.__input_nodes = (training_data[1:] / 255.0 * 0.99) + 0.01
    
    f = lambda x: self.__feed_forward()
    
    self.__W2 = self.__learning_rate * numerical_derivative(f, self.__W2)
    self.__b2 = self.__learning_rate * numerical_derivative(f, self.__b2)
    self.__W3 = self.__learning_rate * numerical_derivative(f, self.__W3)
    self.__b3 = self.__learning_rate * numerical_derivative(f, self.__b3)
    
  def predict(self, input_data):
    z2 = np.dot(input_data, self.__W2) + self.__b2
    a2 = sigmoid(z2)
    z3 = np.dot(a2, self.__W3) + self.__b3
    a3 = sigmoid(z3)
    
    predicted_num = np.argmax(a3)
    return predicted_num
  
  def accuracy(self, test_data):
    matched_list = []
    not_matched_list = []
    
    for index in range(len(test_data)):
      label = int(test_data[index, 0])
      data = (test_data[index, 1:] / 255.0 * 0.99) + 0.01
      predicted_num = self.predict(data)
      if label == predicted_num:
       matched_list.append(index)
      else:
        not_matched_list.append(index)
    print("Current accuracy =", 100 * (len(matched_list) / len(test_data)), " %")
    return matched_list, not_matched_list

In [9]:
training_data = np.loadtxt(r"C:\Users\skygr\OneDrive\바탕 화면\mnist_train.csv", 
                           delimiter=',', dtype=np.float32)
test_data = np.loadtxt(r"C:\Users\skygr\OneDrive\바탕 화면\mnist_test.csv", 
                           delimiter=',', dtype=np.float32)

In [None]:
input_nodes = 784
hidden_nodes = 100
output_nodes = 10
learning_rate = 1e-4

nn = MNIST_DL(input_nodes, hidden_nodes, output_nodes, learning_rate)

for step in range(60001):
    index = np.random.randint(0, len(training_data) - 1)
    nn.train(training_data[index])
    if step % 400 == 0:
        print("step =", step, ", loss_val =", nn.loss_val())

nn.accuracy(test_data)