In [1]:
import numpy as np
import matplotlib.pyplot as plt
import sys,os
sys.path.append(os.pardir)
from dataset.mnist import load_mnist
from gradient_simplenet import simpleNet
from common import functions
from common import gradient


def AND(x1, x2):
    x = np.array([x1, x2])
    w = np.array([0.5, 0.5])
    b = -0.7
    tmp = np.sum(w*x) + b
    if tmp <= 0:
        return 0
    else:
        return 1
   
def NAND(x1, x2):
    x = np.array([x1, x2])
    w = np.array([-0.5, -0.5])
    b = 0.7
    tmp = np.sum(w*x) + b
    if tmp <= 0:
        return 0
    else:
        return 1

def OR(x1, x2):
    x = np.array([x1, x2])
    w = np.array([0.5, 0.5])
    b = -0.2
    tmp = np.sum(w*x) + b
    if tmp <= 0:
        return 0
    else:
        return 1

def XOR(x1, x2):
    s1 = NAND(x1, x2)
    s2 = OR(x1, x2)
    y = AND(s1, s2)
    return y
    
def step_function(x):
    return np.array(x > 0, dtype=np.int64)
    
    
def sigmoid(x):
    return 1 / (1+np.exp(-x))
    
def relu(x):
    return np.maximum(0, x)
    
def identify_function(x):
    return x

def softmax(x):
    c = np.max(x) # オーバーフロー対策
    exp_a = np.exp(x - c)
    sum_exp_a = np.sum(exp_a)
    y = exp_a/sum_exp_a
    return y

# 2乗誤差
def mean_squared_error(y, t):
    return 0.5 * np.sum((y-t)**2)
    
# 交差エントロピー誤差
def cross_entorypy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
    batch_size = y.shape[0]
    return -np.sum(t*np.log(y)/batch_size)
    
def numerical_diff(f, x):
    h = 1e-4
    return (f(x+1) - f(x-1)) / 2*h

# 勾配を求める
def numerical_gredient(f,x):
    h = 1e-4
    grad = np.zeros_like(x)
    for idx in range(x.size):
        tmp_val = x[idx]
        x[idx] = tmp_val + h
        fxh1 = f(x)

        x[idx] = tmp_val - h
        fxh2 = f(x)

        grad[idx] = (fxh1 - fxh2) / (2*h)
        x[idx] = tmp_val
        
    return grad
    
# 勾配降下法
def gradient_descent(f, init_x, lr = 0.01, step_num=100):
    x = init_x
    
    for i in range(step_num):
        grad = numerical_gredient(f, x)
        x -= lr * grad
    return x

def function_2(x):
    return x[0]**2 + x[1]**2

net = simpleNet()
x = np.array([0.6, 0.9])
p = net.predict(x)
t = np.array([0,0,1])
def f(W):
    return net.loss(x, t)

dW = gradient.numerical_gradient(f, net.W)
dW

[[ 0.1454517   0.18298851 -0.32844021]
 [ 0.21817756  0.27448277 -0.49266032]]


array([[ 0.1892606 ,  0.10340364, -0.29266425],
       [ 0.28389091,  0.15510547, -0.43899637]])

In [2]:
np.zeros_like([1,2,3,5])

array([0, 0, 0, 0])

In [3]:
np.random.choice(784, 100)

array([697, 188, 488, 120, 547, 558, 380, 710, 390, 758, 478, 511,  22,
       442, 346,  27, 391, 475, 625,  81, 243, 284, 742, 118, 361, 212,
       248, 225, 278,  18, 628, 282, 711, 144, 738, 444, 331, 334, 641,
       597,  30, 206, 332, 540, 191, 608, 505, 194, 517,  22, 719, 698,
       350,  51, 107, 776, 684, 500, 764, 172, 325, 167,  11, 562, 688,
       222, 708, 581, 387, 271,  22,   5, 245, 395, 423, 481, 180,   4,
       413, 568, 190, 462, 426, 454, 723, 664, 530, 614, 564, 765, 772,
       129, 540, 751, 419, 708, 140, 760, 502, 647])

In [4]:
from twolayler import TwoLayerNet

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

# ハイパーパラメータ
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

train_loss_list = []

for i in range(iters_num):
    # ミニバッチの取得
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    # 勾配計算
    grad = network.numerical_gradient(x_batch, t_batch)
    
    # 勾配を利用してパラメータ更新
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]

    # 学習経過の記録
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    print(i)




KeyboardInterrupt: 

In [5]:
X_dot_W = np.array([[0,0,0],[10,10,10]])
B = np.array([1,2,3])
X_dot_W + B

array([[ 1,  2,  3],
       [11, 12, 13]])

In [1]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
from dataset.mnist import load_mnist
from two_layer_net import TwoLayerNet

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1
train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size/batch_size, 1)

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    grad = network.gradient(x_batch, t_batch)
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]
        
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)

    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(test_acc)
        print(train_acc, test_acc)

0.09381666666666667 0.0971
0.906 0.9089
0.9230166666666667 0.9234
0.93625 0.9348
0.9430666666666667 0.9399
0.9506166666666667 0.9477
0.9549 0.9525
0.9605833333333333 0.9565
0.9627833333333333 0.9588
0.9659 0.961
0.9661833333333333 0.9613
0.9709833333333333 0.9643
0.9712166666666666 0.9662
0.9748833333333333 0.9679
0.9753166666666667 0.969
0.9765333333333334 0.968
0.9787833333333333 0.9695
