<a href="https://colab.research.google.com/github/SoYeoni621/mulcam/blob/master/02_17_Thurs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np

In [None]:
import pickle
def load_mnist(normalize=True, flatten=True, one_hot_label=False):
  def _change_one_hot_label(X):
    T = np.zeros((X.size, 10))
    for idx, row  in enumerate(T):
      row[X[idx]] = 1

      return T

  with open('/content/drive/MyDrive/딥러닝/mnist.pkl', 'rb') as f:
      dataset = pickle.load(f)


  if normalize:
      for key in ('train_img', 'test_img'):
        dataset[key] = dataset[key].astype(np.float32)
        dataset[key] /= 255.0
    
  if one_hot_label:
    dataset['train_label'] = _change_one_hot_label(dataset['train_label'])
    dataset['test_label'] = _change_one_hot_label(dataset['test_label'])

  if not flatten:
    for key in ('train_img', 'test_img'):
      dataset[key] = dataset[key].reshape(-1, 1, 28, 28)

  return (dataset['train_img'], dataset['train_label']), (dataset['test_img'], dataset['test_label'])

In [None]:
def sigmoid(x):
  return 1 / (1 + np.exp(-x))

In [None]:
def softmax(x):
  exp_x = np.exp(x)
  sum_exp_x = np.sum(exp_x)
  y = exp_x / sum_exp_x

  return y

In [None]:
def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)

    batch_size = y.shape[0]

    if y.size == t.size:
        t = t.argmax(axis=1)

    delta = 1e-7
    return -np.sum(np.log(y[np.arange(batch_size), t] + delta))/batch_size

In [None]:
def _numerical_gradient_no_batch(f, x):
  h = 1e-4
  grad = np.zeros_like(x) #x와 같은 배열 생성

  for idx in range(x.size):
    tmp_val = x[idx]

    #f(x+h)
    x[idx] = float(tmp_val) + h
    fxh1 = f(x)

    #f(x-h)
    x[idx] = float(tmp_val) - h
    fxh2 = f(x)

    grad[idx] = (fxh1 - fxh2) / (2*h)  #미분/기울기
    x[idx] = tmp_val   # 값 복원
 
  return grad

In [None]:
def numerical_gradient(f, X):
  if X.ndim == 1:
    return _numerical_gradient_no_batch(f, X)
  else:
    grad = np.zeros_like(X)

    for idx, x in enumerate(X):
      grad[idx] = _numerical_gradient_no_batch(f, x)

    return grad

In [None]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        # 파라미터 랜덤 초기화
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)



    # forward 연산
    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']

        a1 = np.dot(x, W1) + b1
        #활성화 함수
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        #소트프맥스 함수 적용
        y = softmax(a2)

        return y

    # loss(cross entropy 값) 값 구하기
    def loss(self, x, t):
        #x는 입력, t라벨
        y = self.predict(x)
        #y 소프트 맥스 통과한 값(예측값)
        return cross_entropy_error(y, t)

    #정확도 계산
    def accuracy(self, x, t):
        y = self.predict(x)
        #y, t 는 원핫 형식 -> 라벨 형식
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)

        
        accuracy = np.sum(y == t)/float(x.shape[0]) #x.shape[0] 는 전체 갯수
        return accuracy

    #각 파라미터의 기울기 구하기
    def numerical_gradient(self, x, t):
        #목적 함수 - Cross Entropy 
        loss_W = lambda W: self.loss(x, t)

        grads = {}
        #목적함수에 대해 각 파라메터 별로 편미분
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])

        return grads

In [None]:
 # 데이터 로딩 - 원핫 형식으로 
(x_train, y_train), (x_test, y_test) = load_mnist(normalize=True, flatten=True, one_hot_label=True)

In [None]:
y_train.shape

(60000, 10)

In [None]:
# 2층 신경망 객체 생성
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

In [None]:
print(network.params['W1'].shape)
print(network.params['b1'].shape)
print(network.params['W2'].shape)
print(network.params['b2'].shape)

(784, 50)
(50,)
(50, 10)
(10,)


In [None]:
x = np.random.rand(50, 784)
y = network.predict(x)
np.argmax(y[0])

2

In [None]:
iters_num = 10000 #반복 횟수
train_size = x_train.shape[0] #훈련 데이터 크기
batch_size = 100 #미니배치 사이즈
learning_rate = 0.01 #학습률

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size/batch_size, 1) #1에폭당 반복 횟수, 최소 한번


In [41]:
for i in range(iters_num):
  batch_mask = np.random.choice(train_size, batch_size) # 랜덤하게 0~train_size-1 안에 있는 숫자를 batch_size만큼 가져옴
  x_batch = x_train[batch_mask] # 랜덤하게 batch_size만큼 훈련 데이터에서 선택
  y_batch = y_train[batch_mask] # 랜덤하게 batch_size만큼 라벨에서 선택

  # 기울기 계산
  grad = network.numerical_gradient(x_batch, y_batch)
  
  # 매개 변수 갱신 (ex. w = w - learning_rate * 기울기(미분값))
  for key in ('W1', 'b1', 'W2', 'b2'):
    network.params[key] -= learning_rate * grad[key]

  # 학습 경과 기록(loss구하기)
  loss = network.loss(x_batch, y_batch)
  train_loss_list.append(loss) # loss 값을 train_loss_list 에 추가

  # 파라미터 업데이트 후의 정확도 계산
  train_acc = network.accuracy(x_train, y_train)
  test_acc = network.accuracy(x_test, y_test)

  train_acc_list.append(train_acc)
  test_acc_list.append(test_acc)

  print(f"loss {loss}, train_accuracy {train_acc}, test_accuracy {test_acc}")

loss 6.561431094367921, train_accuracy 0.9999833333333333, test_accuracy 0.9999
loss 6.450112874964179, train_accuracy 0.9999833333333333, test_accuracy 0.9999
loss 6.343897447789523, train_accuracy 0.9999833333333333, test_accuracy 0.9999
loss 6.241609376852648, train_accuracy 0.9999833333333333, test_accuracy 0.9999
loss 6.143685654285284, train_accuracy 0.9999833333333333, test_accuracy 0.9999
loss 6.049802581520046, train_accuracy 0.9999833333333333, test_accuracy 0.9999
loss 5.961747698171604, train_accuracy 0.9999833333333333, test_accuracy 0.9999
loss 5.87721068195389, train_accuracy 0.9999833333333333, test_accuracy 0.9999
loss 5.797147011717912, train_accuracy 0.9999833333333333, test_accuracy 0.9999
loss 5.722746718637222, train_accuracy 0.9999833333333333, test_accuracy 0.9999
loss 5.651386565414426, train_accuracy 0.9999833333333333, test_accuracy 0.9999
loss 5.5870811622884675, train_accuracy 0.9999833333333333, test_accuracy 0.9999
loss 5.525788806005039, train_accuracy 0

KeyboardInterrupt: ignored