A fully-connected ReLU network with one hidden layer, trained to predict y from x by minimizing squared Euclidean distance.

This implementation uses the nn package from PyTorch to build the network.

Rather than manually updating the weights of the model as we have been doing, we use the optim package to define an Optimizer that will update the weights for us. The optim package defines many optimization algorithms that are commonly used for deep learning, including SGD+momentum, RMSProp, Adam, etc.

In [1]:
import torch

In [2]:
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

# Use the nn package to define our model and loss function.
model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out),
)
loss_fn = torch.nn.MSELoss(reduction='sum')

In [3]:
# Use the optim package to define an Optimizer that will update the weights of
# the model for us. Here we will use Adam; the optim package contains many other
# optimization algoriths. The first argument to the Adam constructor tells the
# optimizer which Tensors it should update.
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for t in range(500):
    # Forward pass: compute predicted y by passing x to the model.
    y_pred = model(x)

    # Compute and print loss.
    loss = loss_fn(y_pred, y)
    print(t, loss.item())

    # Before the backward pass, use the optimizer object to zero all of the
    # gradients for the variables it will update (which are the learnable
    # weights of the model). This is because by default, gradients are
    # accumulated in buffers( i.e, not overwritten) whenever .backward()
    # is called. Checkout docs of torch.autograd.backward for more details.
    optimizer.zero_grad()

    # Backward pass: compute gradient of the loss with respect to model
    # parameters
    loss.backward()

    # Calling the step function on an Optimizer makes an update to its
    # parameters
    optimizer.step()

0 691.4864501953125
1 674.3720092773438
2 657.6836547851562
3 641.4412841796875
4 625.6710205078125
5 610.3277587890625
6 595.3897705078125
7 581.016357421875
8 567.12890625
9 553.6511840820312
10 540.5231323242188
11 527.73046875
12 515.2595825195312
13 503.1619873046875
14 491.4029846191406
15 480.03912353515625
16 468.9888916015625
17 458.2541809082031
18 447.7975158691406
19 437.65850830078125
20 427.7842102050781
21 418.1620788574219
22 408.75872802734375
23 399.5650634765625
24 390.53277587890625
25 381.6871032714844
26 373.15423583984375
27 364.84881591796875
28 356.72552490234375
29 348.7978515625
30 341.1108093261719
31 333.59112548828125
32 326.2301330566406
33 319.0254211425781
34 311.9583435058594
35 305.0216979980469
36 298.23583984375
37 291.6277770996094
38 285.1474304199219
39 278.7840881347656
40 272.5206604003906
41 266.37255859375
42 260.31390380859375
43 254.35731506347656
44 248.5095672607422
45 242.7652130126953
46 237.14337158203125
47 231.63031005859375
48 226.2