In [1]:
%matplotlib inline


Warm-up: numpy
--------------

A fully-connected ReLU network with one hidden layer and no biases, trained to
predict y from x using Euclidean error.

This implementation uses numpy to manually compute the forward pass, loss, and
backward pass.

A numpy array is a generic n-dimensional array; it does not know anything about
deep learning or gradients or computational graphs, and is just a way to perform
generic numeric computations.



In [2]:
import numpy as np

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random input and output data
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

# Randomly initialize weights
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6
for t in range(500):
    # Forward pass: compute predicted y
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    print(t, loss)

    # Backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)

    # Update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 33389364.42165538
1 29460768.46855653
2 30189139.42039709
3 30227243.932131357
4 26470860.199081637
5 19172868.419840846
6 11615607.522667553
7 6322995.074544705
8 3404667.416776197
9 1966693.7044924544
10 1265580.3715541763
11 902749.871771727
12 694330.3936531977
13 560260.0564308325
14 465222.2456291064
15 393088.5545109387
16 335805.59507813386
17 289078.2828913465
18 250351.4485688766
19 217790.1794408948
20 190298.71648690276
21 166885.02712898306
22 146841.853359633
23 129613.38647623788
24 114740.02545837007
25 101847.30147616447
26 90635.19695220166
27 80855.21916383487
28 72305.95177775834
29 64804.488088365346
30 58198.691077372925
31 52366.34722841518
32 47206.51233632679
33 42625.06211110517
34 38564.82142909207
35 34948.476547253296
36 31720.56488034049
37 28835.093780034334
38 26250.576568326826
39 23932.25758376127
40 21847.743468867666
41 19971.77114291107
42 18280.491583148654
43 16754.781595002933
44 15375.426229946583
45 14126.429651311224
46 12994.239053314184
47

388 0.0012805399519807649
389 0.00122732604009665
390 0.0011763196704074653
391 0.001127436403971519
392 0.001080591664107496
393 0.001035690179192808
394 0.0009926599382063341
395 0.0009514163372514865
396 0.0009118958776145067
397 0.0008740140865151735
398 0.0008377098448058356
399 0.0008029210498883535
400 0.0007695790087798953
401 0.0007376168640052115
402 0.0007069909600732579
403 0.0006776360066713676
404 0.0006495022747007727
405 0.0006225355933549293
406 0.0005966954650396265
407 0.0005719268370320156
408 0.0005481878013273153
409 0.0005254358543498275
410 0.0005036305790067602
411 0.0004827296258855561
412 0.0004626973409055158
413 0.0004434995057956684
414 0.0004250989494184208
415 0.00040746367000236365
416 0.00039056009733764063
417 0.00037436168833874107
418 0.00035883158961887867
419 0.00034394890912416165
420 0.0003296831732193823
421 0.00031601159214322274
422 0.00030290540472736714
423 0.00029034424079493884
424 0.00027830553614421557
425 0.00026676643011199787
426 0.0