In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def numerical_diff(f, x):
  delta = 0.0001

  if x.ndim == 1:
    x.reshape(1, -1)
  grad = np.zeros_like(x)
  for i in range(x.shape[1]):
    x[:,i] = x[:,i] + delta
    f1 = f(x)
    x[:,i] = x[:,i] - 2*delta
    f2 = f(x)
    grad[:,i] = (f1 - f2) / (2 * delta)
    x[:,i] = x[:,i] + delta
  if x.shape[0] == 1:
    x.reshape(-1)
    grad.reshape(-1)
  return grad

In [15]:
def softmax(z):
  if z.ndim == 1:
    c = np.max(z)
    exp_z = np.exp(z - c)
    sum_exp_z = np.sum(exp_z)
    return exp_z / sum_exp_z
  C = np.max(z, axis=1)
  print(z - C.reshape(z.shape[0], 1))
  exp_z = np.exp(z - C.reshape(z.shape[0], -1))
  sum_exp_z = np.sum(exp_z, axis=1)
  return (exp_z.T / sum_exp_z).T

def cross_entropy_error_with_label(y, t):
  delta = 0.0001
  if y.ndim == 1:
    return -np.sum(np.log(y[t]) + delta)
  return -np.sum(np.log(y[:,t] + delta), axis=1) / y.shape[0]

def cross_entropy_error_with_onehot(y, t):
  delta = 0.0001
  if y.ndim == 1:
    return -np.sum(t * np.log(y))
  return -np.sum(t.reshape(-1, 1) * np.log(y), axis=1) / y.shape[0]

In [14]:
a = np.array([1,2])
b = np.array([
              [1,2,3],
              [4,5,6],
])

a.reshape(-1, 1) * b

array([[ 1,  2,  3],
       [ 8, 10, 12]])

In [79]:
def numerical_gradient(f, x):
  delta = 0.0001
  grad = np.zeros_like(x)
  for idx, ele in np.ndenumerate(x):
    x[idx] += delta
    f1 = f(x)
    x[idx] -= 2 * delta
    f2 = f(x)
    grad[idx] = (f1 - f2) / (2. * delta)
    x[idx] += delta
  return grad

In [39]:
class SimpleNet:
  def __init__(self):
    self.W = np.random.rand(2,3)
  
  def predict(self, x):
    return np.dot(x, self.W)
  
  def loss(self, x, t):
    z = self.predict(x)
    y = softmax(z)
    loss = cross_entropy_error_with_onehot(y, t)
    return loss

In [40]:
net = SimpleNet()
net.W = np.array([
                  [0.47355232, 0.9977393, 0.84668094],
                  [0.85557411, 0.03563661, 0.69422093]
])
print(net.W)

[[0.47355232 0.9977393  0.84668094]
 [0.85557411 0.03563661 0.69422093]]


In [41]:
x = np.array([0.6, 0.9])
p = net.predict(x)
print(p)
np.argmax(p)

[1.05414809 0.63071653 1.1328074 ]


2

In [42]:
t = np.array([0, 0, 1])
net.loss(x, t)

0.9280685387482351

In [82]:
# def f(W):
#   return net.loss(x, t)

f = lambda w : net.loss(x, t)

dW = numerical_gradient(f, net.W)
print(dW)

[[ 0.21925597  0.14355932 -0.36281529]
 [ 0.32888396  0.21533898 -0.54422294]]
