In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [21]:
randn = np.random
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def init_network():
    network = {}
    network['W1'] = np.array(randn.rand(2,3))
    network['b1'] = np.array(randn.rand(1,3))
    network['W2'] = np.array(randn.rand(3,2))
    network['b2'] = np.array(randn.rand(2,2))
    network['W3'] = np.array(randn.rand(2,2))
    network['b3'] = np.array(randn.rand(1,2))

    return network


def forward(network, x):
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']
    
    a1 = np.dot(x,W1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1,W2) + b2
    z2 = sigmoid(a2)
    a3 = np.dot(z2,W3) + b3
    y = sigmoid(a3)
    
    return y

In [23]:
network = init_network()
x = np.array([1.0, 0.5])
y = forward(network, x)
print(network)
print(y)

{'W1': array([[ 0.44086074,  0.26322219,  0.55679132],
       [ 0.32954143,  0.74092043,  0.6592731 ]]), 'b1': array([[ 0.73412453,  0.59604638,  0.19726939]]), 'W2': array([[ 0.6836049 ,  0.2342354 ],
       [ 0.16382168,  0.98132462],
       [ 0.84908849,  0.97544239]]), 'b2': array([[ 0.12282336,  0.26629992],
       [ 0.89653928,  0.21117206]]), 'W3': array([[ 0.31218155,  0.73675731],
       [ 0.3847653 ,  0.1640022 ]]), 'b3': array([[ 0.05644193,  0.09966613]])}
[[ 0.65578308  0.69784651]
 [ 0.66184192  0.71203309]]


In [3]:
randn = np.random
np.array(randn.rand(2,3))

array([[ 0.29832443,  0.12845232,  0.28920862],
       [ 0.38659211,  0.94855943,  0.37966442]])

## Identity function and Softmax function

### Softmax function
$$y_k = \frac{\exp(a_k)}{\sum_{i=1}^n\exp(a_i)}$$

In [24]:
a = np.array([0.3, 2.9, 4.0])

exp_a = np.exp(a) #log func
print(exp_a)

sum_exp_a = np.sum(exp_a)
print(sum_exp_a)

y = exp_a / sum_exp_a
print(y)

[  1.34985881  18.17414537  54.59815003]
74.1221542102
[ 0.01821127  0.24519181  0.73659691]


In [25]:
def softmax_with_overflow(a):
    exp_a = np.exp(a) #log func
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a

    return y

In [26]:
# To prevent from overflow problem, we need to make a caution
def softmax(a):
    c = np.max(a)
    exp_a = np.exp(a - c) #log func
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a

    return y

$$
y_k = \frac{\exp(a_k)}{\sum_{i=1}^n\exp(a_i)}\\
= \frac{C\exp(a_k)}{C\sum_{i=1}^n\exp(a_i)}\\
= \frac{\exp(a_k+logC)}{\sum_{i=1}^n\exp(a_i+logC)}\\
= \frac{\exp(a_k+C')}{\sum_{i=1}^n\exp(a_i+C')}
$$
normally, $C$ is selected for maximum number in the array.

## 2 Layer Neural Network

X is array of N by M.  

$$
X= (N,m)\\
W1=(m,n)\\
b1=(n,1)\\
a1 = X*W1 + b1\\
z1 = sigmoid(a1)\\
$$

In [None]:
import sys, os
sys.path.append(os.pardir)

class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size) #Input_size by hidden_size
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) #hidden_size by output_size
        self.params['b2'] = np.zeros(output_size)
        
    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a3)
        
        return y
    
    #x: input data, y = label
    def loss(self, x, t):
        y = self.predict(x)
        
        return cross_entropy_error(y, t)

    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)
        
        accu = np.sum(y == t) / float(x.shape[0])
        return accu
    
    
    # x: input data, t: label
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])

        return grads


network = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)
print(net.params['W1'].shape)
print(net.params['b1'].shape)
print(net.params['W2'].shape)
print(net.params['b2'].shape)

