In [1]:
import  numpy as np

In [43]:
def function(x):
    '''平方差函数'''
    if x.ndim == 1:
        return np.sum(x**2)
    else:
        return np.sum(x**2, axis=1)

def _numerical_gradient_no_batch(f, x):
    '''当维度为1的时候'''
    h = 1e-4 # 0.0001
    grad = np.zeros_like(x)

    for idx in range(x.size):
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + h
        fxh1 = f(x) # f(x+h)

        x[idx] = tmp_val - h
        fxh2 = f(x) # f(x-h)
        grad[idx] = (fxh1 - fxh2) / (2*h)

        x[idx] = tmp_val # 还原值

    return grad
def numerical_gradient(f, X):
    '''求梯度的函数'''
    if X.ndim == 1:
        '''如果x的维度是1'''
        return _numerical_gradient_no_batch(f, X)
    else:
        grad = np.zeros_like(X)
        for idx, x in enumerate(X):
            grad[idx] = _numerical_gradient_no_batch(f, x)
    return grad

def gradient_descent(f, init_x, lr = 0.01, step_num=100):#梯度下降
    '''进行梯度下降的函数'''
    x = init_x
    for i in range(step_num):
        grad = numerical_gradient(f,x)
        x -= lr * grad
    return x

def sigmoid(x):
    '''sigmoid函数'''
    return 1/(1+np.exp(-x))

def softmax(x):
    '''softmax函数，用于分类问题'''
    c = np.max(x)
    exp_a = np.exp(x-c) ##防止数据溢出
    sum_exp = np.sum(exp_a)
    return exp_a/sum_exp

def cross_entropy_error(y,t):
    '''交叉熵误差函数'''
    delta = 1e-7
    return -np.sum(t*np.log(y+delta))

In [26]:
numerical_gradient(function, np.array([3.0, 4.0]))

array([6., 8.])

In [27]:
init_x = np.array([-3.0, 4.0])
result = gradient_descent(function, init_x=init_x, lr=0.1, step_num=100)
result,function(result)

(array([-6.11110793e-10,  8.14814391e-10]), 1.0373788922158197e-18)

In [28]:
class simpleNet:
    def __init__(self):
        self.w = np.random.randn(2,3)
    def predict(self, x):
        return np.dot(x, self.w)
    def loss(self, x, t):
        z = self.predict(x)
        y = softmax(z)
        loss = cross_entropy_error(y, t)
        return loss

In [29]:
net = simpleNet()
print(net.w)

[[ 0.35045158 -0.48679263  0.22397527]
 [-0.49633575 -0.11827549 -1.05972292]]


In [30]:
x = np.array([0.6, 0.9])
p = net.predict(x)
p

array([-0.23643122, -0.39852352, -0.81936547])

In [31]:
p.argmax()
t = np.array([0,0,1])
net.loss(x, t)

1.4619880397509868

In [44]:
f = lambda w : net.loss(x, t)
dw = numerical_gradient(f, net.w)
dw


array([[ 0.24910513,  0.21182971, -0.46093484],
       [ 0.37365769,  0.31774457, -0.69140226]])