In [1]:
%matplotlib inline


Warm-up: numpy
--------------

A fully-connected ReLU network with one hidden layer and no biases, trained to
predict y from x using Euclidean error.

This implementation uses numpy to manually compute the forward pass, loss, and
backward pass.

A numpy array is a generic n-dimensional array; it does not know anything about
deep learning or gradients or computational graphs, and is just a way to perform
generic numeric computations.



In [2]:
import numpy as np

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random input and output data
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

# Randomly initialize weights
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6
for t in range(500):
    # Forward pass: compute predicted y
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    print(t, loss)

    # Backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)

    # Update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 33094954.524653867
1 27840669.702111617
2 23708094.862486206
3 18653487.97964617
4 13224644.154102813
5 8596094.910186276
6 5364063.5071746865
7 3368226.578798359
8 2208999.2994029173
9 1537063.6407210007
10 1134912.2445813266
11 880115.236752158
12 707932.6647817334
13 584345.3011341053
14 491009.28747736034
15 417809.7006676921
16 358782.76828366343
17 310253.13965269504
18 269820.15192040923
19 235804.52100132586
20 206906.4155962657
21 182209.1490176625
22 160982.33403170155
23 142657.5500734312
24 126752.35441994242
25 112893.73180782486
26 100778.40553844236
27 90149.54041162867
28 80804.0589174183
29 72574.84354950256
30 65298.01924738342
31 58848.52403269629
32 53121.0660383487
33 48021.95934473272
34 43474.04441689045
35 39408.76929704969
36 35768.594898306386
37 32504.537607983417
38 29574.997055239262
39 26940.82856963714
40 24568.72936613148
41 22428.34717780584
42 20495.405803804628
43 18746.736924627032
44 17163.49076187225
45 15728.701554101652
46 14426.13323647097
47 

408 1.0935794261634345e-05
409 1.0366516555380977e-05
410 9.826783999814874e-06
411 9.315188932642273e-06
412 8.830330214996862e-06
413 8.370728007547827e-06
414 7.935121986753704e-06
415 7.5222424338739184e-06
416 7.1307730450918945e-06
417 6.759717114302953e-06
418 6.408080725339318e-06
419 6.0746770068395785e-06
420 5.75864030394476e-06
421 5.45918491632408e-06
422 5.175222646932016e-06
423 4.906030972065616e-06
424 4.650931645764229e-06
425 4.40906649656798e-06
426 4.179816362002325e-06
427 3.962553407766079e-06
428 3.7565282592100795e-06
429 3.5612464318641746e-06
430 3.3761492082208147e-06
431 3.200648470435376e-06
432 3.0342829738229466e-06
433 2.8766246330734712e-06
434 2.727142758621824e-06
435 2.585416612263609e-06
436 2.4510965216285227e-06
437 2.32375090367139e-06
438 2.20301318962191e-06
439 2.088598511111305e-06
440 1.9801026517746687e-06
441 1.8772585364698013e-06
442 1.7797662410497061e-06
443 1.6873277391274285e-06
444 1.5997008330664047e-06
445 1.5166508514657133e-06
