# 梯度下降法
1、梯度 是由 函数的全部变量的偏导数汇总而成的向量
例如：$f(x_{1},x_{2}) = x_{1}^{2} + x_{2}^{2}$
那么 在点（3，4） f(x1,x2)的梯度是：$f^{'}(x_{1},x_{2}) = (2x_{1},2x_{2}) = (6,8)$
这个向量（6，8） 就是函数在这个点的梯度

In [3]:
import numpy as np

In [4]:
def function_2(x):
    return np.sum(x**2)

In [43]:
## 求 函数f 在 x点处的梯度数值解
# def numerical_gradient(f,x):
#     h = 1e-4
#     grad = np.zeros_like(x)
#     # 求x1的偏导数的时候，x2的值直接带入。求x2的偏导数，x1的值直接带入
#     for idx in range(x.size):
#         tmp_val = x[idx]
#         
#         x[idx] = tmp_val + h
#         fxh1 = f(x)
#         
#         x[idx] = tmp_val - h
#         fxh2 = f(x)
#         
#         grad[idx] = (fxh1 - fxh2) / (2*h)
#         x[idx] = tmp_val
#     
#     return grad

def numerical_gradient(f, W, eps=1e-5):
    grad = np.zeros_like(W)
    it = np.nditer(W, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        orig = W[idx]
        
        W[idx] = orig + eps
        loss_plus = f(W)
        
        W[idx] = orig - eps
        loss_minus = f(W)
        
        W[idx] = orig
        
        grad[idx] = (loss_plus - loss_minus) / (2 * eps)
        it.iternext()  # 移动到下一个元素
    return grad

In [44]:
def gradient_decent(f, init_x, lr=0.01, step_num = 100):
    x = init_x
    for i in range(step_num):
        grad = numerical_gradient(f, x)
        x -= lr*grad
        
    return x

init_x = np.array([-3.0, 4.0])
print(gradient_decent(function_2, init_x=init_x, lr=0.1))

[-6.11110793e-10  8.14814392e-10]


## 神经网络的梯度
神经网络的梯度是：损失函数关于权重参数的梯度

In [12]:
def softmax(z):
    max_value = np.max(z)
    exp_z = np.exp(z - max_value)
    sum_max_z = np.sum(exp_z)
    y = exp_z/sum_max_z
    return y

def cross_entropy_error(y, t):
    delta = 1e-7
    if y.ndim ==1:
        y = y.reshape(1, y.size)
        t = t.reshape(1, t.size)
        
    batch_size = y.shape[0]
    return -np.sum(t*np.log(y+delta))/batch_size

In [46]:
class simple_net:
    def __init__(self):
        self.W = np.random.randn(2,3)
    
    def predict(self, x):
        return x@self.W
    
    def loss(self, x, t):
        z = self.predict(x)
        y = softmax(z)
        loss = cross_entropy_error(y, t)
        return loss

net = simple_net()
print(net.W)
x = np.array([0.6, 0.9])
p = net.predict(x)
print(np.argmax(p))

[[-0.09958317 -2.37724888  0.12666433]
 [-0.5149041   0.91928847  0.21565623]]
2


In [47]:
t = np.array([0, 1, 0])
#print(net.loss(x, t))

In [51]:
def f(W):
    return net.loss(x, t)


## 解释
怎么理解 数值方法求w关于损失函数的梯度：
对于W里面的每个值，都变化一点 相加减 eps=1-e5后，求新的损失函数的值 loss_plus loss_minus。然后通过公式(loss_plus - loss_minus) / (2 * eps) 得到对应的w元素变化后它的梯度值是多少

In [50]:
dw = numerical_gradient(f, net.W)
print(dw)

[[ 0.1450132  -0.46557578  0.32056259]
 [ 0.21751979 -0.69836367  0.48084388]]
