## Initial Setup

In [0]:
import torch


# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10


# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)


# Defining Model

In [0]:
# Define the nn Module model
model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out),
)


# Define loss function
loss_fn = torch.nn.MSELoss(reduction = 'sum')


## Execution

In [3]:
learning_rate = 1e-4
for t in range(500):
    # Forward Pass
    y_pred = model(x)

    # Compute Loss
    loss = loss_fn(y_pred, y)
    print(t, loss.item())

    # Zero Gradients
    # (as PyTorch accumulates gradients on subsequent backward passes)
    model.zero_grad()

    # Backward Pass
    loss.backward()

    # Update Weights
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad

0 657.38818359375
1 607.0773315429688
2 563.5285034179688
3 525.4381103515625
4 491.5259094238281
5 460.9882507324219
6 433.66900634765625
7 408.7548522949219
8 385.74261474609375
9 364.38043212890625
10 344.58233642578125
11 326.1445617675781
12 308.7620849609375
13 292.38494873046875
14 276.8096008300781
15 262.13555908203125
16 248.208251953125
17 235.03579711914062
18 222.5055389404297
19 210.56817626953125
20 199.24131774902344
21 188.4724578857422
22 178.1813507080078
23 168.40882873535156
24 159.1022186279297
25 150.24867248535156
26 141.8421173095703
27 133.88917541503906
28 126.37588500976562
29 119.2607192993164
30 112.53780364990234
31 106.18180084228516
32 100.15411376953125
33 94.43775939941406
34 89.03731536865234
35 83.94618225097656
36 79.141357421875
37 74.61363220214844
38 70.34686279296875
39 66.33379364013672
40 62.551536560058594
41 58.98817443847656
42 55.64091491699219
43 52.48760986328125
44 49.51953887939453
45 46.73021697998047
46 44.1064453125
47 41.635002136