In [1]:
%load_ext autoreload
%autoreload 2

In [27]:
import numpy as np
from source.digits import get_X_y, show_image, TwoLayerClassifier
import matplotlib.pyplot as plt

In [3]:
X, y = get_X_y()

X.shape, y.shape

((1797, 64), (1797, 10))

In [87]:
nn = TwoLayerClassifier(64, 16, 10)

In [95]:
loss, grads = nn.loss(X[:2], y[:2])

In [96]:
def rel_error(x, y):
    """ returns relative error """
    return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

In [98]:
for param_name in grads:
    f = lambda W: nn.loss(X[:2], y[:2], reg=0)[0]
    param_grad_num = eval_numerical_gradient(f, nn.params[param_name])
    print('%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name])))

W2 max relative error: 1.000000e+00
b2 max relative error: 1.000000e+00
W1 max relative error: 1.000000e+00
b1 max relative error: 1.000000e+00


In [90]:


def eval_numerical_gradient(f, x, verbose=False, h=0.00001):
    """ 
    a naive implementation of numerical gradient of f at x 
    - f should be a function that takes a single argument
    - x is the point (numpy array) to evaluate the gradient at
    """ 

    fx = f(x) # evaluate function value at original point
    grad = np.zeros_like(x)
    # iterate over all indexes in x
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:

        # evaluate function at x+h
        ix = it.multi_index
        oldval = x[ix]
        x[ix] = oldval + h # increment by h
        fxph = f(x) # evalute f(x + h)
        x[ix] = oldval - h
        fxmh = f(x) # evaluate f(x - h)
        x[ix] = oldval # restore

        # compute the partial derivative with centered formula
        grad[ix] = (fxph - fxmh) / (2 * h) # the slope
        if verbose:
              print(ix, grad[ix])
        it.iternext() # step to next dimension

    return grad