In [None]:
def L_i(x, y, W):
    '''
    unvectorized version. Compute the multiclass svm loss for
    a single example(x, y)
    Args:
        - x sis a column vector representing an image (eg. 3073 x 1 in CIFAR-10)
          with an appended bias dimension in the 3073-rd position(i.e. bias trick)
        - y is an interger giving index of correct class (eg. between 0 and 9 in CIFAR-10)
        - W is the weight matrix (eg. 10 x 3073 in CIFAR=10)
    '''
    delta = 1.0
    scores = W.dot(x) # scores becomes of size 10 x 1, the scores for each class
    correct_class_score = scores[y]
    D = W.shape[0] # number of classes, e.g. 10
    loss_i = 0.0
    for j in range(D):
        if j == y:
            # skip for the true class to only loop over incorrect classes
            continue
        # accumulate loss for the i-th example
        loss_i += max(0, scores[j] - correct_class_score + delta)
    return loss_i

def L_i_vectorized(x, y, W):
    '''
    A faster half-vectorized implementation. Half-vectorized
    refers to the fact that for a single example the implementation contains
    no for loops, but there is still one loop over the examples (outside this function)
    '''
    delta = 1.0
    scores = W.dot(x)
    # compute the margins for all classes in one vector operation
    margins = np.maximum(0, scores - scores[y] + delta)
    # on y-th position scores[y] - scores[y] canceled and gave delta. We wat
    # to ignore the y-th position and only consider margin on max wrong class
    margins[y] = 0
    loss_i = np.sum(margins)
    return loss_i

### Computing the gradient

#### Computing the gradient numerically with finite differences

In [1]:
def eval_numerical_gradient(f, x):
    '''
    a naive implementation of numerical gradient of f at x
    Args:
        - f should be a function that take a single argument
        - x is the point (numpy array) to evaluate the gradient at
    '''
    
    fx = f(x)
    grad = np.zeros(x.shape)
    h = 1e-5
    
    # iterate over all indexes in x
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        # evaluate function at x+h
        ix = it.multi_index
        old_value = x[ix]
        x[ix] = old_value + h # increment by h
        fxh = f(x) # evaluate f(x + h)
        x[ix] = old_value # restore to previous value
        
        # compute the partial derivative
        grad[ix] = (fxh - fx) / h # the slope
        it.iternext()
        
    return grad


# Can replace by [f(x+h) - f(x-h)] / 2h

In [None]:
def L(x, y, W):
    pass

In [2]:
def CIFAR10_loss_func(W):
    return L(X_train, Y_train, W)

W = np.random.rand(10, 3073) * 0.001
df = eval_numerical_gradient(CIFAR10_loss_func, W)

loss_original = CIFAR10_loss_func(W)
print('Original loss:%f' % (loss_original))

for step_size_log in range(-10,0):
    step_size = 10 ** step_size_log
    W_new = W - step_size * df
    loss_new = CIFAR10_loss_func(W_new)
    print('For step size %f new loss: %f' % (step_size, loss_new))

NameError: name 'L' is not defined