In [82]:
import sys, os
sys.path.append(os.pardir)
from common.functions import *
from common.gradient import numerical_gradient

In [83]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        # 가중치 초기화
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        
    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        
        return y
    
    # x: input, t: labeled
    def loss(self, x, t):
        y = self.predict(x)
        
        return cross_entropy_error(y, t)

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)
        
        accuracy = np.sum(y == t) / float(x.shape[0])

        return accuracy
    
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        
        return grads

In [84]:
net = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)
for k in net.params.keys():
     print("{}: {}".format(k, net.params[k].shape))

W1: (784, 100)
b1: (100,)
W2: (100, 10)
b2: (10,)


In [88]:
x = np.random.rand(100, 784)
y = net.predict(x)
print(y)

[[ 0.09922376  0.10399276  0.10001807  0.10083684  0.09511914  0.0996111
   0.10010926  0.10483831  0.09634949  0.09990128]
 [ 0.09932867  0.10414784  0.09984373  0.10072486  0.09492603  0.09978659
   0.10037575  0.10485692  0.09632202  0.0996876 ]
 [ 0.09937822  0.10383741  0.09950445  0.10090879  0.09500235  0.09980419
   0.10023586  0.1050393   0.09640312  0.09988632]
 [ 0.09909568  0.10393555  0.09943899  0.10111711  0.09489182  0.09995432
   0.10021687  0.10471837  0.09671256  0.09991872]
 [ 0.09909241  0.10395002  0.10014611  0.10088891  0.09497097  0.09954
   0.10017637  0.10466547  0.09631165  0.1002581 ]
 [ 0.09955006  0.10401362  0.09977349  0.10072325  0.09505486  0.09972271
   0.1001212   0.10475412  0.096475    0.09981169]
 [ 0.09913723  0.10397266  0.09955865  0.10084118  0.09510791  0.09960533
   0.09997699  0.10545016  0.09645417  0.09989573]
 [ 0.09951554  0.1039435   0.10009295  0.10045854  0.09479408  0.0997051
   0.10041437  0.10485246  0.09633911  0.09988435]
 [ 0.

In [91]:
x = np.random.rand(100, 784)
t = np.random.rand(100, 10)

grads = net.numerical_gradient(x, t)

for k in grads.keys():
     print("{}: {}".format(k, grads[k].shape))

In [None]:
from dataset.mnist import load_mnist

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
train_loss_list = []

# Hyper Parameters
iters_num = 10000 # no. of iteration
train_size = x_train.shape[0]
batch_size = 100 # size of mini-batch
learning_rate = 0.1

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

# train
for i in range(iters_num):
    # get mini-batch
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    # calc. gradient
    grad = network.numerical_gradient(x_batch, t_batch)
    # grad = network.gradient(x_batch, t_batch)
    
    # update params
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]
    
    # loss
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)

    if i % 50 == 0:
        print("{}: {}".format(i, loss))