In [2]:
import numpy as np

In [None]:
#numerical gradiend
def numerical_gradient_no_batch(f, x):
    h = 1e-4
    grads = np.zeros_like(x)

    for idx in range(x.shape[0]):
        temp = x[idx]

        x[idx] = x + h
        fx1 = f(x)

        x[idx] = x - h
        fx2 = f(x)

        grads[idx] = (fx1 - fx2) / 2*h
        x[idx] = temp

def numerical_gradient(f, x):
    if x.ndim == 1:
        return numerical_gradient_no_batch(f, x)
    
    grads = np.zeros_like(x)
    for idx, x1 in enumerate(x):
        grads[idx] = numerical_gradient_no_batch(f, x1)
    return grads


In [None]:
#gradient descent
def gradient_descent(f, init_x, lr, iter):
    
    x = init_x
    x_history = []

    for _ in iter:
        grad = numerical_gradient(f, init_x)
        x = x - lr * grad
        x_history.append(x)
    
    return x, x_history

In [None]:
class Mullayer:
    def __init__(self):
        self.x1 = None
        self.x2 = None
    
    def forward(self, x1, x2):
        self.x1 = x1
        self.x2 = x2

        return x1 * x2
    
    def backward(self, dout):
        dx1 = dout * self.x2
        dx2 = dout * self.x1
        
        return dx1, dx2
    


In [None]:
class Addlayer:
    def __init__(self):
        pass
    def forward(self, x1, x2):
        return x1 + x2
    def backward(self, dout):
        dx = dout * 1
        dy = dout * 1
        return dout
    

In [None]:
class Twolayer:
    def __init__(self, input_size, hidden_size, output_size, weight_init = 0.01):
        self.params = {}
        self.params['W1'] = np.zeros((input_size, hidden_size))
        self.params['b1'] = np.zeros((1, hidden_size))

        self.params['W2'] = np.zeros((hidden_size, output_size))
        self.params['b2'] = np.zeros((1, output_size))
    

    def predict(self, x):
        x.reshape(1, x.shape[0])
        q1 = np.dot(x, self.params['W1']) + self.params['b1']

        z1 = sigmoid(q1)
        q2 = np.dot(z1, self.params['W2']) + self.params['b2']

        y = softmax(q2)

        return y

    def loss(self, x, t): 
        y = predict(x) #이미 softmax 처리 되어있음
        
        return cross_entrophy(y, t)


    def accuracy(self, x, t): 
        y = predict(x)
        max_y = np.max(y, axis = 1)
        max_t = np.max(t, axis = 1)

        return np.sum(max_y == max_t) / x.shape[0]


    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)

        grads  = {}
        grads['W1'] = numercial_gradient(loss_W, self.params['W1'])
        grads['W2'] = numercial_gradient(loss_W, self.params['W2'])
        grads['W3'] = numercial_gradient(loss_W, self.params['W3'])
        grads['W4'] = numercial_gradient(loss_W, self.params['W4'])

    def gradient(self, x, t):


        #forward
        a1 = np.dot(x, self.params['W1']) + self.params['b1']
        z1 = sigmoid(q1)
        a2 = np.dot(z1, self.params['W2']) + self.params['b2']
        y = softmax(q2)

        W1 = self.params['W1']
        W2 = self.params['W2']
        b1 = self.params['b1']
        b2 = self.params['b2']

        #backward
        batch_size = t.shape[0]
        dy = (y-t) / batch_size

        da2 = dy
        db2 = np.sum(da2, axis = 0) 

        dz1 = np.dot(da2, W2.T)
        dw2 = np.dot(z1.t, da2)

        da1 = dz1 * z1 * (1-z1)

        dx = np.dot(da1, W1.T)
        dW1 = np.dot(x.T, da1)

        db1 = np.sum(da1, axis = 0)
        #저장도 해야함



In [None]:
def softmax(x):
    col_sum = np.sum(x, axis=1)
    np.sum

def sigmoid(x):

def sigmoid_grad(x):

def cross_entrophy(y, t): #y: softmax 처리 완료, t: one hot encoding일 수도/아닐수도
    if y.ndim == 1:
        y.reshape(1, y.reshape[0])
        t.reshape(1, t.reshape[0])

    if t.size == x.size:          #t가 one hot encoding 되어 있으면
        flat_t = np.argmax(t, axis =1)

    extract = y[np.rarange(y.shape[0], t)] + 1e-8
    minus_log_col = -np.log(extract)
    return np.sum(minus_log_col) /batch_size

In [6]:
mtrx = np.random.randn(4,3)
print(mtrx.size)

12


In [None]:
#change_into_one hot
def one_hot(t):
    mtrx = np.zeros(t.size, 10)

    for idx, row in enumerate(mtrx):
        row[mtrx[idx]] +=1

In [None]:
#training process

net1 = Twolayer(input_size= 1, hidden_size= 1, output_size= 1)

iter_num = 3
train_size = 3
batch_size = 3
lr = 3

train_loss = []
train_acc = []
test_acc = []

iter_per_epoch = max(train_size/batch_size, 1)

for i in iter_num:
    #batch 뽑기
    batch_mask = np.random.rand(train_size, batch_size)

    batch_x = train_x[batch_mask]
    batch_t = train_t[batch_mask]
    #각 batch별로 parameter update

    grads = {}
    grads = net1.gradient(batch_x, batch_t)

    for keys in grads.keys():
        params[keys] = params[keys] - lr * grads[keys]

    #epoch 아닐때도 loss는 계속 append한다!
    #loss는 현재 batch에서, acc는 전체에서
    loss = net1.loss(batch_x, batch_t)
    loss_list.append(loss)

    
    #epoch마다 한번씩 결과 보고
    if i % iter_per_epoch % 0 == 0:

        train_acc = net1.accuracy(train_x, train_t)
        test_acc = net1.accuracy(test_x, test_t)

        #append acc


In [None]:
#relu
class Relu:
    def __init__(self):
        self.mask = None

    def forward(self, x):
        mask = (x>=0)
        self.mask = mask
        forward = x.copy()
        result = forward[mask]

        return result
    
    def backward(self, dout):
        dout1 = dout[self.mask]

        return dout1


In [None]:
#sigmoid
class sigmoid:
    def __init__(self):
        self.out = None
    
    def forward(self, x):
        out = 1 / np.exp(-x)
        self.out = out
        #dout update
        return out

    def backward(self, dout):
        out = self.out
        dx = dout* out * (1-out)

        return dx


In [None]:
#affine
class Affine:
    def __init__(self, W, b):
        self.W = W
        self.b = b

    def forward(self, x):
        

---------------------------------------------------------------------------------------------------------------------------------------------------------------------
Former

In [4]:
#two layer input
class two_layer:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01): #여기서 input size는 feature의 개수
        self.params = {}
        self.params['W1'] = np.random.rand(input_size, hidden_size)
        self.params['W2'] = np.random.rand(hidden_size, output_size)
        self.params['b1'] = np.random.rand(1, hidden_size) #이렇게하면 의도와는 다르게 broadcasting되어 버림
        self.params['b2'] = np.random.rand(1, output_size) #이렇게하면 의도와는 다르게 broadcasting되어 버림


    def loss(self, x, t):
        y_predict = self.predict(x)
        CE_loss = cross_entrophy_loss(y_predict, t)

        return CE_loss

    def predict(self, X): 
        a1 = np.dot(X, self.params['W1']) + self.params['b1']
        z1 = sigmoid(a1)
        a2= np.dot(z1, self.params['W2']) + self.params['b2']
        y = softmax(a2)

        return y
    
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)

        grads = {}
        self.grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        self.grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        self.grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        self.grads['b2'] = numerical_gradient(loss_W, self.params['b2'])


    def gradient(self, x, t): #backward
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        grads = {}
        
        batch_norm = x.shape[0]

        #forward
        a1 = np.dot(x, self.params['W1']) + self.params['b1']
        z1 = sigmoid(a1)
        a2= np.dot(z1, self.params['W2']) + self.params['b2']
        y = softmax(a2)

        y = self.predict(x)
        dy = (y-t) / batch_norm

        grads['W2'] = np.dot(z1.T, dy)
        grads['b2'] = np.sum(dy, axis=0)

        dz1 = np.dot(dy, W2.T)
        da1 = sigmoid_backward(a1) * dz1  #처음에 여기 dz1안 곱했음
        
        grads['W1'] = np.dot(x.T, da1)
        grads['b1'] = np.sum(da1, axis = 0)

        return dy


In [5]:

def sigmoid(x):
    return 1/1+np.exp(-x)
    
def sigmoid_backward(x):
    y = sigmoid(x)
    return np.dot(y,(1-y))

def softmax(x): 
    exp_x = np.exp(x)
    exp_sum = np.sum(exp_x, axis = 1)

    return x/exp_sum

def cross_entrophy_loss(y, t):
    
    return -t/y

In [6]:
def _numerical_gradient_no_batch(f, x):
  h = 1e-4  # 0.0001
  grad = np.zeros_like(x)  # an array with the same shape as 'x'

  for idx in range(x.size):
    tmp_val = x[idx]

    # f(x + h)
    x[idx] = tmp_val + h  #현재 x보다 아주 조금 옆에
    fxh1 = f(x) #거기서의 f값
 
    # f(x - h)
    x[idx] = tmp_val - h 
    fxh2 = f(x)

    grad[idx] = (fxh1 - fxh2) / (2 * h)
    x[idx] = tmp_val  # restore the original value

  return grad


def numerical_gradient(f, X): #X: multidimensional인 경우
  if X.ndim == 1: #1 data example
    return _numerical_gradient_no_batch(f, X)
  else:
    grad = np.zeros_like(X)

    for idx, x in enumerate(X):  #i 
      grad[idx] = _numerical_gradient_no_batch(f, x)

    return grad


In [7]:
try:
    import urllib.request
except ImportError:
    raise ImportError('You should use Python 3.x')
import os.path
import gzip
import pickle
import os
import numpy as np


url_base = 'https://github.com/WegraLee/deep-learning-from-scratch/raw/master/dataset/'
key_file = {
  'train_img':'train-images-idx3-ubyte.gz',
  'train_label':'train-labels-idx1-ubyte.gz',
  'test_img':'t10k-images-idx3-ubyte.gz',
  'test_label':'t10k-labels-idx1-ubyte.gz'
}

dataset_dir = '/content'  # Colab base dir
save_file = dataset_dir + "/mnist.pkl"

train_num = 60000
test_num = 10000
img_dim = (1, 28, 28)
img_size = 784


def _download(file_name):
  file_path = dataset_dir + "/" + file_name

  if os.path.exists(file_path):
      return

  print("Downloading " + file_name + " ... ")
  urllib.request.urlretrieve(url_base + file_name, file_path)
  print("Done")

def download_mnist():
  for v in key_file.values():
    _download(v)

def _load_label(file_name):
  file_path = dataset_dir + "/" + file_name

  print("Converting " + file_name + " to NumPy Array ...")
  with gzip.open(file_path, 'rb') as f:
    labels = np.frombuffer(f.read(), np.uint8, offset=8)
  print("Done")

  return labels

def _load_img(file_name):
  file_path = dataset_dir + "/" + file_name

  print("Converting " + file_name + " to NumPy Array ...")
  with gzip.open(file_path, 'rb') as f:
    data = np.frombuffer(f.read(), np.uint8, offset=16)
  data = data.reshape(-1, img_size)
  print("Done")

  return data

def _convert_numpy():
  dataset = {}
  dataset['train_img'] =  _load_img(key_file['train_img'])
  dataset['train_label'] = _load_label(key_file['train_label'])
  dataset['test_img'] = _load_img(key_file['test_img'])
  dataset['test_label'] = _load_label(key_file['test_label'])

  return dataset

def init_mnist():
  download_mnist()
  dataset = _convert_numpy()
  print("Creating pickle file ...")
  with open(save_file, 'wb') as f:
    pickle.dump(dataset, f, -1)
  print("Done!")

def _change_one_hot_label(X):
  T = np.zeros((X.size, 10))
  for idx, row in enumerate(T):
    row[X[idx]] = 1

  return T


def load_mnist(normalize=True, flatten=True, one_hot_label=False):
  """Read MNIST

  Parameters
  ----------
  normalize : Image pixel values become 0.0~1.0
  one_hot_label :
    encodes labels as one hot vectores
    an example of an one-hot : [0,0,1,0,0,0,0,0,0,0]
  flatten : makes the images 1-D vectors

  Returns
  -------
  (Train Images, Train Labels), (Test Images, Test Images)
  """
  if not os.path.exists(save_file):
      init_mnist()

  with open(save_file, 'rb') as f:
      dataset = pickle.load(f)

  if normalize:
      for key in ('train_img', 'test_img'):
          dataset[key] = dataset[key].astype(np.float32)
          dataset[key] /= 255.0

  if one_hot_label:
      dataset['train_label'] = _change_one_hot_label(dataset['train_label'])
      dataset['test_label'] = _change_one_hot_label(dataset['test_label'])

  if not flatten:
        for key in ('train_img', 'test_img'):
          dataset[key] = dataset[key].reshape(-1, 1, 28, 28)

  return (dataset['train_img'], dataset['train_label']), (dataset['test_img'], dataset['test_label'])

In [8]:
# read datar
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) 

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) #we have 10 classes

# hyper-parameters
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

# iteration per epoch
iter_per_epoch = max(train_size / batch_size, 1) #reasoning: 

for i in range(iters_num):
  # get minibatch
  batch_mask = np.random.choice(train_size, batch_size) #1(0)부터 train_size까지 숫자 중 임의로 batch size만큼 뽑기
  x_batch = x_train[batch_mask]
  t_batch = t_train[batch_mask]

  # gradient calculation
  #grad = network.numerical_gradient(x_batch, t_batch)
  grad = network.gradient(x_batch, t_batch)

  # update parameters
  for key in ('W1', 'b1', 'W2', 'b2'):
    network.params[key] -= learning_rate * grad[key]

  # record training process
  loss = network.loss(x_batch, t_batch)
  train_loss_list.append(loss)

  # log accuracy at the end of each epoch
  if i % iter_per_epoch == 0: #0-iters_num까지 계속 update되는데 그 과정에서 iter_per_epoch번마다 한번씩 성능을 확인함(이때를 epoch이라고 정의)
    train_acc = network.accuracy(x_train, t_train)
    test_acc = network.accuracy(x_test, t_test)
    train_acc_list.append(train_acc)
    test_acc_list.append(test_acc)
    print(f"train acc, test acc | {train_acc:.4f}, {test_acc:.4f}")

# plot the result
markers = {'train': 'o', 'test': 's'}
x = np.arange(len(train_acc_list))
plt.plot(x, train_acc_list, label='train acc')
plt.plot(x, test_acc_list, label='test acc', linestyle='--')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.legend(loc='lower right')
plt.show()

Downloading train-images-idx3-ubyte.gz ... 


FileNotFoundError: [Errno 2] No such file or directory: '/content/train-images-idx3-ubyte.gz'