<a href="https://colab.research.google.com/github/Qualot/deep-learning-from-scratch/blob/colab/ch04/TwoLayeresNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import numpy as np
import matplotlib.pyplot as plt

#common functions

In [6]:
def sigmoid(x):
  return 1/(1 + np.exp(-x))

In [7]:
def softmax(a):
  c = np.max(a)
  exp_a = np.exp(a-c) #prevents overflow
  sum_exp_a = np.sum(exp_a)
  y = exp_a / sum_exp_a
  return y

In [8]:
def cross_entropy_error(y, t):
  if y.ndim == 1:
    t = t.reshape(1, t.size)
    y = y.reshape(1, y.size)

  batch_size = y.shape[0]
  return -np.sum(t * np.log(y + 1e-7)) / batch_size

In [9]:
def _numerical_gradient_wo_batch(f, x):
  h = 1e-4
  grad = np.zeros_like(x)

  for idx in range(x.size):
    tmp_val = x[idx]

    #f(x+h)
    x[idx] = tmp_val + h
    fxh1 = f(x)

    #f(x+h)
    x[idx] = tmp_val - h
    fxh2 = f(x)

    grad[idx] = (fxh1 - fxh2) / (2*h)
    x[idx] = tmp_val

  return grad

In [10]:
def numerical_gradient(f, x):
  if x.ndim == 1:
    return _numerical_gradient_wo_batch(f, x)

  grad = np.zeros_like(x)
  for key, xvec in enumerate(x):
    grad[key] = _numerical_gradient_wo_batch(f, xvec)

  return grad

In [11]:
class TwoLayerNet:
  def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
    #initialize weights and biasses
    self.params = {}
    self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
    self.params['b1'] = np.zeros(hidden_size)
    self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
    self.params['b2'] = np.zeros(output_size)

  def predict(self, x):
    W1, W2 = self.params['W1'], self.params['W2']
    b1, b2 = self.params['b1'], self.params['b2']

    a1 = np.dot(x, W1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    y = softmax(a2)

    return y

  #x: input data, t: teacher data
  def loss(self, x, t):
    y = self.predict(x)

    return cross_entropy_error(y, t)

  def accuracy(self, x, t):
    y = self.predict(x)
    y = np.argmax(y, axis=1)
    t = np.argmax(t, axis=1)

    accuracy = np.sum(y == t) / float(x.shape[0])
    return accuracy

  #x: input, t: teacher
  def numerical_gradient(self, x, t):
    loss_W = lambda W: self.loss(x, t)

    grads = {}
    grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
    grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
    grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
    grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
    return grads

#Practice part (argmax() for accuracy)

In [12]:
a = np.random.randn(10, 5)
print(a)
b = np.random.randn(10, 5)
print(b)

a = np.argmax(a, axis=1)
b = np.argmax(b, axis=1)
print(a)
print(b)
print(a==b)

[[-4.40098944e-01  1.01506697e+00  4.75240090e-01 -8.09481248e-01
  -7.35222725e-02]
 [ 1.92537672e+00  1.46082539e+00 -7.96724455e-01 -3.40468130e-01
  -5.56172532e-02]
 [ 8.27299851e-02  1.35465382e-01 -8.21453570e-01  1.23620687e+00
   4.46699079e-01]
 [-8.90099041e-01 -1.66670761e-01  1.85060837e+00 -4.28194338e-01
  -4.94758980e-01]
 [-7.44370484e-01  1.17304773e+00 -2.36839692e+00  1.17359748e+00
  -6.31464740e-01]
 [-1.13404709e+00  2.80192855e-01 -6.51578067e-01 -1.79151394e-01
  -4.82956648e-01]
 [-2.73989067e-01 -3.13937246e-01 -1.69592515e+00  6.74475137e-01
  -3.84430554e-01]
 [ 2.00465336e+00 -2.51342138e+00 -3.85966695e-01  1.12496311e+00
  -4.83004326e-01]
 [-5.34233647e-02 -4.75246927e-01  8.93721287e-04  4.58918434e-01
   4.09413575e-01]
 [ 1.04926737e-01  2.14891061e+00  9.72473357e-01  6.57825160e-01
  -5.83275721e-01]]
[[ 0.54708588 -0.03016445 -2.39357691  0.36102322 -0.31093912]
 [-0.05700301 -0.99902865  0.39380328 -0.62916598 -0.05526616]
 [ 0.56392866 -0.566364

In [13]:
alist = np.random.randn(10, 2)
print(alist)
for key, avec in enumerate(alist):
  print(f"{key} th ... {avec}")

[[-0.78311628  0.5000375 ]
 [-0.597761    1.78072889]
 [-0.28579855 -0.43017572]
 [-0.81465686 -0.82184514]
 [-2.27492803 -0.62649002]
 [-0.2670409   1.2524933 ]
 [ 0.59981983  0.01029244]
 [ 0.29699048  0.04323425]
 [ 0.10527408  0.36435918]
 [ 0.30785953 -0.33464247]]
0 th ... [-0.78311628  0.5000375 ]
1 th ... [-0.597761    1.78072889]
2 th ... [-0.28579855 -0.43017572]
3 th ... [-0.81465686 -0.82184514]
4 th ... [-2.27492803 -0.62649002]
5 th ... [-0.2670409  1.2524933]
6 th ... [0.59981983 0.01029244]
7 th ... [0.29699048 0.04323425]
8 th ... [0.10527408 0.36435918]
9 th ... [ 0.30785953 -0.33464247]


#Practice part (TwoLayerNet)

In [18]:
net = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)
print(net.params['W1'].shape)
print(net.params['b1'].shape)
print(net.params['W2'].shape)
print(net.params['b2'].shape)

(784, 100)
(100,)
(100, 10)
(10,)


In [19]:
x = np.random.rand(100, 784)
y = net.predict(x)

In [21]:
t = np.random.rand(100, 10)
grads = net.numerical_gradient(x, t)