In [1]:
import numpy as np

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random input and output data
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

# Randomly initialize weights
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6
for t in range(500):
    # Forward pass: compute predicted y
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    print(t, loss)

    # Backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)

    # Update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 35042659.45876932
1 29917194.07491207
2 27674649.737347737
3 24176838.01553967
4 18573776.142158907
5 12458817.181059983
6 7590421.805249261
7 4488812.233942021
8 2739988.9926420357
9 1792252.5587464152
10 1268247.3041380001
11 959611.2491052318
12 762358.8491834875
13 625553.4289658963
14 524129.78458791197
15 445265.09836533555
16 382112.16423713113
17 330281.97813076957
18 287095.68521322444
19 250818.11167396893
20 220023.5994094009
21 193687.66821836866
22 171076.96337671037
23 151561.79815520465
24 134645.61589180818
25 119917.17747244009
26 107068.77026746338
27 95810.80261127299
28 85921.20136915901
29 77214.8433384845
30 69525.61932533709
31 62725.27231272056
32 56694.23582661555
33 51333.27153027103
34 46555.099748193046
35 42287.018877447095
36 38468.31362502039
37 35044.981968910004
38 31970.47189118515
39 29206.556604677804
40 26715.09319273643
41 24467.142241700312
42 22439.02486572019
43 20603.70208633667
44 18940.65426579515
45 17431.093112783932
46 16058.024243167558

390 0.00045850600417983315
391 0.0004385307097121824
392 0.00041942642890262276
393 0.00040115191223214045
394 0.00038367860358749144
395 0.00036697247820301085
396 0.0003509959722580374
397 0.00033571656488660603
398 0.0003210989549761677
399 0.0003071293149953274
400 0.00029376600615209245
401 0.0002809880006092491
402 0.0002687660162400109
403 0.0002570817167206927
404 0.00024590397838220864
405 0.0002352153139837101
406 0.00022499232802311052
407 0.0002152160030261166
408 0.00020586615498248504
409 0.00019692327895416112
410 0.00018837372136865828
411 0.00018019537521186084
412 0.00017237148021456304
413 0.00016489245146365642
414 0.00015773646556697756
415 0.0001508912736016579
416 0.00014434373047497284
417 0.00013808677312731414
418 0.00013209736749576537
419 0.00012636870284682238
420 0.00012088999373479234
421 0.00011565226382460116
422 0.00011063887665789989
423 0.00010584371032784222
424 0.00010125985559837672
425 9.68737803536795e-05
426 9.26780376105385e-05
427 8.866521615