In [None]:
%matplotlib inline


PyTorch: optim
--------------

A fully-connected ReLU network with one hidden layer, trained to predict y from x
by minimizing squared Euclidean distance.

This implementation uses the nn package from PyTorch to build the network.

Rather than manually updating the weights of the model as we have been doing,
we use the optim package to define an Optimizer that will update the weights
for us. The optim package defines many optimization algorithms that are commonly
used for deep learning, including SGD+momentum, RMSProp, Adam, etc.



In [1]:
import torch

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

# Use the nn package to define our model and loss function.
model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out),
)
loss_fn = torch.nn.MSELoss(reduction='sum')

# Use the optim package to define an Optimizer that will update the weights of
# the model for us. Here we will use Adam; the optim package contains many other
# optimization algoriths. The first argument to the Adam constructor tells the
# optimizer which Tensors it should update.
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for t in range(500):
    # Forward pass: compute predicted y by passing x to the model.
    y_pred = model(x)

    # Compute and print loss.
    loss = loss_fn(y_pred, y)
    print(t, loss.item())

    # Before the backward pass, use the optimizer object to zero all of the
    # gradients for the variables it will update (which are the learnable
    # weights of the model). This is because by default, gradients are
    # accumulated in buffers( i.e, not overwritten) whenever .backward()
    # is called. Checkout docs of torch.autograd.backward for more details.
    optimizer.zero_grad()

    # Backward pass: compute gradient of the loss with respect to model
    # parameters
    loss.backward()

    # Calling the step function on an Optimizer makes an update to its
    # parameters
    optimizer.step()

0 706.1762084960938
1 688.6934814453125
2 671.6663208007812
3 655.0911865234375
4 638.9898071289062
5 623.3514404296875
6 608.1075439453125
7 593.29443359375
8 578.8784790039062
9 564.8502197265625
10 551.140869140625
11 537.8231201171875
12 524.9578857421875
13 512.573486328125
14 500.6503601074219
15 489.0698547363281
16 477.7608642578125
17 466.77069091796875
18 456.1504821777344
19 445.80792236328125
20 435.6931457519531
21 425.90069580078125
22 416.3762512207031
23 407.019775390625
24 397.8765563964844
25 388.9461669921875
26 380.275146484375
27 371.8204040527344
28 363.5655212402344
29 355.50537109375
30 347.6270446777344
31 339.88189697265625
32 332.2909851074219
33 324.86199951171875
34 317.5907287597656
35 310.44476318359375
36 303.4221496582031
37 296.5668640136719
38 289.8286437988281
39 283.2106628417969
40 276.7201232910156
41 270.34051513671875
42 264.078857421875
43 257.92071533203125
44 251.88290405273438
45 245.96578979492188
46 240.16796875
47 234.47625732421875
48 22