# 신경망 학습

## 단순한 신경망 구현 : Logic Gate

### 필요한 모듈 import

In [None]:
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')

### 하이퍼 파라미터(Hyper Parameter)

In [None]:
epochs = 1000
lr = 0.1

### 유틸 함수들(Util Functions)

In [None]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def mean_squared_error(y_pred, y_true):
    return np.mean(np.power(y_true - y_pred, 2))

def cross_entropy_error(y_pred, y_true):
    if y_true.ndim == 1:
        y_true = y_true.reshape(1, -1)
        y_pred = y_pred.reshape(1, -1)
    delta = 1e-7
    return -np.sum(y_true * np.log(y_pred + delta))

def cross_entropy_error_for_batch(y_pred, y_true):
    if y_true.ndim == 1:
        y_true = y_true.reshape(1, -1)
        y_pred = y_pred.reshape(1, -1)
    delta = 1e-7
    batch_size = y_pred.shape[0]
    return -np.sum(y_true * np.log(y_pred + delta)) / batch_size

def cross_entropy_error_for_bin(y_pred, y_true):
    return 0.5 * np.sum(-y_true * np.log(y_pred) - (1 - y_true) * np.log(1 - y_pred))

def softmax(a):
    exp_a = np.exp(a)
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a
    return y

def differential(f, x):
    eps = 1e-5
    diff_value = np.zeros_like(x)

    for i in range(x.shape[0]):
      temp_val = x[i]

      x[i] = temp_val + eps
      f_h1 = f(x)
      x[i] = temp_val - eps
      f_h2 = f(x)

      diff_value[i] = (f_h1 - f_h2) / (2*eps)
      x[i] = temp_val

    return diff_value


### 신경망

In [None]:
class LogicGateNet():

    def __init__(self):
        def weight_init():
            np.random.seed(1)
            weights = np.random.randn(2)
            bias = np.random.rand(1)

            return weights, bias

        self.weights, self.bias = weight_init()

    def predict(self, x):
        W = self.weights.reshape(-1, 1)
        b = self.bias

        y_pred = sigmoid(np.dot(x, W) + b)
        return y_pred

    def loss(self, x, y_true):
        y_pred = self.predict(x)
        return cross_entropy_error_for_bin(y_pred, y_true)


    def get_gradient(self, x, t):
        def loss_grad(grad):
            return self.loss(x, t)

        grad_W = differential(loss_grad, self.weights)
        grad_b = differential(loss_grad, self.bias)

        return grad_W, grad_b

### AND Gate

#### 모델 생성 및 학습

In [None]:
AND = LogicGateNet()

X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[0], [0], [0], [1]])

train_loss_list = list()

for i in range(epochs):
  grad_W, grad_b = AND.get_gradient(X, Y)

  AND.weights -= lr * grad_W
  AND.bias -= lr * grad_b

  loss = AND.loss(X, Y)
  train_loss_list.append(loss)

  if i % 100 == 0:
    print('Epoch: {}, loss: {}, Weights: {}, Bias: {}'.format(i, loss, AND.weights, AND.bias))

#### 테스트

In [None]:
print(AND.predict(X))

### OR Gate

#### 모델 생성 및 학습

In [None]:
OR = LogicGateNet()

X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y_2 = np.array([[0], [1], [1], [1]])

train_loss_list = list()

for i in range(epochs):
  grad_W, grad_b = OR.get_gradient(X, Y_2)

  OR.weights -= lr * grad_W
  OR.bias -= lr * grad_b

  loss = OR.loss(X, Y_2)
  train_loss_list.append(loss)

  if i % 100 == 0:
    print('Epoch: {}, loss: {}, Weights: {}, Bias: {}'.format(i, loss, OR.weights, OR.bias))

#### 테스트

In [None]:
print(OR.predict(X))

### NAND Gate

#### 모델 생성 및 학습

In [None]:
NAND = LogicGateNet()

X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y_3 = """ 구현"""


#### 테스트

In [None]:
print(NAND.predict(X))

### XOR Gate

#### 모델 생성 및 학습

In [None]:
XOR = LogicGateNet()

X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y_4 = np.array([[0], [1], [1], [0]])

train_loss_list = list()

for i in range(epochs):
  grad_W, grad_b = XOR.get_gradient(X, Y_4)

  XOR.weights -= lr * grad_W
  XOR.bias -= lr * grad_b

  loss = XOR.loss(X, Y_4)
  train_loss_list.append(loss)

  if i % 100 == 99:
    print('Epoch: {}, loss: {}, Weights: {}, Bias: {}'.format(i+1, loss, XOR.weights, XOR.bias))

#### 테스트

In [None]:
print(XOR.predict(X))

#### 2층 신경망으로 XOR 게이트 구현(1)

- 얕은 신경망, Shallow Neural Network

- 두 논리게이트(NAND, OR)를 통과하고  
  AND 게이트로 합쳐서 구현

- 06 신경망 구조 참고

In [None]:
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y_5 = np.array([[0], [1], [1], [0]])


s1 = NAND.predict(X)
s2 = OR.predict(X)
X_2 = np.array([s1, s2]).T.reshape(-1, 2)

#### 테스트

In [None]:
print(AND.predict(X_2))

#### 2층 신경망으로 XOR 게이트 구현(2)
- 클래스로 구현

In [None]:
class XORNet():

  def __init__(self):
      np.random.seed(1)

      def weight_init():
         params = {}
         params['W1'] = np.random.randn(2)
         params['b1'] = np.random.rand(2)
         params['W2'] = np.random.randn(2)
         params['b2'] = np.random.rand(1)
         return params

      self.params = weight_init()

  def predict(self, x):
      """
      구현
      """

      return y

  def loss(self, x, y_true):
      y_pred = self.predict(x)
      return cross_entropy_error_for_bin(y_pred, y_true)

  def get_gradient(self, x, t):
      def loss_grad(grad):
          return self.loss(x, t)

      """
      구현
      """

      return grad

#### 하이퍼 파라미터(Hyper Parameter)
- 재조정

In [None]:
lr = 0.3

#### 모델 생성 및 학습

In [None]:
XOR = XORNet()
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y_5 = np.array([[0], [1], [1], [0]])

train_loss_list = list()

for i in range(epochs):
  grad = XOR.get_gradient(X, Y_5)

  for key in ('W1', 'b1', 'W2', 'b2'):
    XOR.params[key] -= lr * grad[key]

  loss = XOR.loss(X, Y_5)
  # print(loss)
  train_loss_list.append(loss)

  if i % 100 == 99:
    print('Epoch: {}, loss: {}'.format(i+1, loss))

#### 테스트

In [None]:
print(XOR.predict(X))

## 다중 클래스 분류 : MNIST Dataset

### 배치 처리
- 학습 데이터 전체를 한번에 진행하지 않고  
  일부 데이터(샘플)을 확률적으로 구해서 조금씩 나누어 진행

- 확률적 경사 하강법(Stochastic Gradient Descent) 또는  
  미니 배치 학습법(mini-batch learning)이라고도 부름

#### 신경망 구현 : MNIST

#### 필요한 모듈 임포트

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from keras.datasets import mnist
import time
from tqdm.notebook import tqdm

#### 데이터 로드

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

#### 데이터 확인

In [None]:
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)


In [None]:
img = x_train[0]
print(img.shape)
print(img.min(), img.max())
plt.imshow(img, cmap='gray')

In [None]:
label = y_train[0]
print(label)

#### 데이터 전처리 (Data Preprocessing)

In [None]:
def flatten_for_mnist(x):
    temp = np.zeros((x.shape[0], x[0].size))

    for idx, data in enumerate(x):
        temp[idx ,:] = data.flatten()

    return temp

In [None]:
x_train, x_test = x_train / 255.0, x_test / 255.0

x_train = flatten_for_mnist(x_train)
x_test = flatten_for_mnist(x_test)

print(x_train.shape)
print(x_test.shape)

y_train_ohe = tf.one_hot(y_train, depth=10).numpy()
y_test_ohe = tf.one_hot(y_test, depth=10).numpy()

print(y_train_ohe.shape)
print(y_test_ohe.shape)

In [None]:
print(x_train[0].max(), x_train[0].min())
print(y_train_ohe[0])

#### 하이퍼 파라미터(Hyper Parameter)

In [None]:
epochs = 2
lr = 0.1
batch_size = 100
train_size = x_train.shape[0]

#### 사용되는 함수들(Util Functions)

In [None]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def mean_squared_error(y_pred, y_true):
    return np.mean(np.power(y_true - y_pred, 2))

def cross_entropy_error(y_pred, y_true):
    if y_true.ndim == 1:
        y_true = y_true.reshape(1, -1)
        y_pred = y_pred.reshape(1, -1)
    delta = 1e-7
    return -np.sum(y_true * np.log(y_pred + delta))

def cross_entropy_error_for_batch(y_pred, y_true):
    if y_true.ndim == 1:
        y_true = y_true.reshape(1, -1)
        y_pred = y_pred.reshape(1, -1)
    delta = 1e-7
    batch_size = y_pred.shape[0]
    return -np.sum(y_true * np.log(y_pred + delta)) / batch_size

def cross_entropy_error_for_bin(y_pred, y_true):
    return 0.5 * np.sum(-y_true * np.log(y_pred) - (1 - y_true) * np.log(1 - y_pred))

def softmax(a):
    exp_a = np.exp(a)
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a
    return y

def differential_1d(f, x):
    eps = 1e-5
    diff_value = np.zeros_like(x)

    for i in range(x.shape[0]):
      temp_val = x[i]

      x[i] = temp_val + eps
      f_h1 = f(x)
      x[i] = temp_val - eps
      f_h2 = f(x)

      diff_value[i] = (f_h1 - f_h2) / (2*eps)
      x[i] = temp_val

    return diff_value

def differential_2d(f, X):
    if X.ndim == 1:
        return differential_1d(f, X)
    else :
        grad = np.zeros_like(X)

        for idx, x in enumerate(X):
            grad[idx] = differential_1d(f, x)

        return grad


#### 2층 신경망으로 구현

In [None]:
class MyModel():

  def __init__(self):

      def weight_init(input_nodes, hidden_nodes, output_nodes):
         np.random.seed(777)


         params = {}
         params['W1'] = 0.01 * np.random.randn(input_nodes, hidden_nodes)
         params['b1'] = np.zeros(hidden_nodes)
         params['W2'] = 0.01 * np.random.randn(hidden_nodes, output_nodes)
         params['b2'] = np.zeros(output_nodes)

         return params

      self.params = weight_init(784, 64, 10)

  def predict(self, x):
      W1, W2 = self.params['W1'], self.params['W2']
      b1, b2 = self.params['b1'], self.params['b2']

      A1 = np.dot(x, W1) + b1
      Z1 = sigmoid(A1)
      A2 = np.dot(Z1, W2) + b2
      y = softmax(A2)
      return y

  def loss(self, x, y_true):
      y_pred = self.predict(x)
      return cross_entropy_error_for_bin(y_pred, y_true)

  def accuracy(self, x, y_true):
      y_pred = self.predict(x)
      y_argmax = np.argmax(y_pred, axis=1)
      y_true_argmax = np.argmax(y_true, axis=1)

      accuracy = np.mean(y_argmax == y_true_argmax)
      return accuracy

  def get_gradient(self, x, t):
      def loss_grad(grad):
          return self.loss(x, t)

      grad = {}
      grad['W1'] = differential_2d(loss_grad, self.params['W1'])
      grad['b1'] = differential_2d(loss_grad, self.params['b1'])
      grad['W2'] = differential_2d(loss_grad, self.params['W2'])
      grad['b2'] = differential_2d(loss_grad, self.params['b2'])

      return grad

#### 모델 생성 및 학습
- 시간 많이 소요

In [None]:
model = MyModel()

train_loss_list = list()
train_acc_list = list()
test_acc_list = list()
iter_per_epoch = max(train_size / batch_size, 1)

start_time = time.time()

for i in tqdm(range(epochs)):

    batch_idx = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_idx]
    y_batch = y_train_ohe[batch_idx]

    grad = model.get_gradient(x_batch, y_batch)

    for key in grad.keys():
        model.params[key] -= lr * grad[key]

    loss = model.loss(x_batch, y_batch)
    train_loss_list.append(loss)

    train_accuracy = model.accuracy(x_train, y_train_ohe)
    test_accuracy = model.accuracy(x_test, y_test_ohe)
    train_acc_list.append(train_accuracy)
    test_acc_list.append(test_accuracy)

    print('Epoch: {}, Train Loss: {}, Train Accuracy: {}, Test Accuracy: {}'.format(i+1, loss, train_accuracy, test_accuracy))

end_time = time.time()

print('총 학습 소요시간: {:.3f}s'.format(end_time - start_time))

### 모델의 결과
- 모델은 학습이 잘 될 수도, 잘 안될 수도 있음

- 만약, 학습이 잘 되지 않았다면,  
  학습이 잘 되기 위해서 어떠한 조치를 취해야 하는가?
  - 다양한 학습관련 기술이 존재