In [1]:
%matplotlib inline


PyTorch: Tensors and autograd
-------------------------------

A fully-connected ReLU network with one hidden layer and no biases, trained to
predict y from x by minimizing squared Euclidean distance.

This implementation computes the forward pass using operations on PyTorch
Tensors, and uses PyTorch autograd to compute gradients.


A PyTorch Tensor represents a node in a computational graph. If ``x`` is a
Tensor that has ``x.requires_grad=True`` then ``x.grad`` is another Tensor
holding the gradient of ``x`` with respect to some scalar value.



In [2]:
import torch

dtype = torch.float
device = torch.device("cpu")
# dtype = torch.device("cuda:0") # Uncomment this to run on GPU

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold input and outputs.
# Setting requires_grad=False indicates that we do not need to compute gradients
# with respect to these Tensors during the backward pass.
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)

# Create random Tensors for weights.
# Setting requires_grad=True indicates that we want to compute gradients with
# respect to these Tensors during the backward pass.
w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)

learning_rate = 1e-6
for t in range(500):
    # Forward pass: compute predicted y using operations on Tensors; these
    # are exactly the same operations we used to compute the forward pass using
    # Tensors, but we do not need to keep references to intermediate values since
    # we are not implementing the backward pass by hand.
    y_pred = x.mm(w1).clamp(min=0).mm(w2)

    # Compute and print loss using operations on Tensors.
    # Now loss is a Tensor of shape (1,)
    # loss.item() gets the a scalar value held in the loss.
    loss = (y_pred - y).pow(2).sum()
    print(t, loss.item())

    # Use autograd to compute the backward pass. This call will compute the
    # gradient of loss with respect to all Tensors with requires_grad=True.
    # After this call w1.grad and w2.grad will be Tensors holding the gradient
    # of the loss with respect to w1 and w2 respectively.
    loss.backward()

    # Manually update weights using gradient descent. Wrap in torch.no_grad()
    # because weights have requires_grad=True, but we don't need to track this
    # in autograd.
    # An alternative way is to operate on weight.data and weight.grad.data.
    # Recall that tensor.data gives a tensor that shares the storage with
    # tensor, but doesn't track history.
    # You can also use torch.optim.SGD to achieve this.
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad

        # Manually zero the gradients after updating weights
        w1.grad.zero_()
        w2.grad.zero_()

0 32798656.0
1 33781912.0
2 40981376.0
3 45741272.0
4 39798520.0
5 24309458.0
6 10909502.0
7 4398286.5
8 2077015.125
9 1269201.625
10 933312.125
11 749510.8125
12 624568.875
13 529389.3125
14 453148.9375
15 390422.5625
16 338267.46875
17 294451.84375
18 257414.8125
19 225881.1875
20 198895.390625
21 175652.140625
22 155607.765625
23 138231.5625
24 123107.15625
25 109926.2265625
26 98393.796875
27 88251.0546875
28 79323.53125
29 71448.109375
30 64473.734375
31 58282.625
32 52772.95703125
33 47856.37109375
34 43462.1796875
35 39525.546875
36 35996.30859375
37 32821.109375
38 29966.099609375
39 27393.0234375
40 25070.84765625
41 22969.890625
42 21067.201171875
43 19340.84375
44 17772.013671875
45 16345.4873046875
46 15052.5751953125
47 13876.5888671875
48 12802.853515625
49 11821.9130859375
50 10924.7431640625
51 10102.578125
52 9348.9599609375
53 8657.5029296875
54 8022.34765625
55 7438.65087890625
56 6901.81494140625
57 6408.32421875
58 5953.98583984375
59 5534.53466796875
60 5147.57226

401 0.0005675730062648654
402 0.0005527881439775229
403 0.0005377176566980779
404 0.0005248093511909246
405 0.0005101532442495227
406 0.0004967206041328609
407 0.0004849989200010896
408 0.0004724879690911621
409 0.000460449984529987
410 0.0004496993205975741
411 0.00043695670319721103
412 0.00042679053149186075
413 0.0004164253477938473
414 0.00040606240509077907
415 0.00039667318924330175
416 0.00038725879858247936
417 0.000378115480998531
418 0.0003690006269607693
419 0.00036061694845557213
420 0.00035245652543380857
421 0.0003441825392656028
422 0.0003375065280124545
423 0.00032927756546996534
424 0.00032215352985076606
425 0.000315786455757916
426 0.000308642367599532
427 0.00030182438786141574
428 0.0002948288747575134
429 0.00028906919760629535
430 0.00028285611188039184
431 0.0002776266192086041
432 0.00027133969706483185
433 0.000264850037638098
434 0.0002593512472230941
435 0.0002545689349062741
436 0.0002494208456482738
437 0.0002444143174216151
438 0.00023999436234589666
439