In [7]:
import torch
import torchvision
import torch.nn as nn
import numpy as np
import torchvision.transforms as transforms

In [2]:
# ================================================================== #
#                     1. Basic autograd example 1                    #
# ================================================================== #

# Create tensors.
x = torch.tensor(1., requires_grad=True)
w = torch.tensor(2., requires_grad=True)
b = torch.tensor(3., requires_grad=True)



In [3]:
# Build a computational graph.
y = w * x + b    # y = 2 * x + 3

In [5]:
# Compute gradients.
y.backward()

# Print out the gradients.
print(x.grad)    # x.grad = 2
print(w.grad)    # w.grad = 1
print(b.grad)    # b.grad = 1

tensor(2.)
tensor(1.)
tensor(1.)


In [8]:
# ================================================================== #
#                    2. Basic autograd example 2                     #
# ================================================================== #

# Create tensors of shape (10, 3) and (10, 2).
x = torch.randn(10, 3)
y = torch.randn(10, 2)

In [9]:
# Build a fully connected layer.
linear = nn.Linear(3, 2)
print ('w: ', linear.weight)
print ('b: ', linear.bias)

w:  Parameter containing:
tensor([[-0.3080,  0.2030,  0.3855],
        [ 0.4901,  0.1285, -0.3597]], requires_grad=True)
b:  Parameter containing:
tensor([0.4372, 0.1053], requires_grad=True)


In [10]:
# Build loss function and optimizer.
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(linear.parameters(), lr=0.01)

In [11]:
# Forward pass.
pred = linear(x)

In [13]:
# Compute loss.
loss = criterion(pred, y)
print('loss: ', loss.item())

loss:  2.804651975631714


In [14]:
# Backward pass.
loss.backward()

# Print out the gradients.
print ('dL/dw: ', linear.weight.grad)
print ('dL/db: ', linear.bias.grad)

dL/dw:  tensor([[ 0.1134,  0.3589, -0.1381],
        [ 1.6961,  1.4472, -0.7204]])
dL/db:  tensor([ 0.4731, -0.0966])


In [15]:
# 1-step gradient descent.
optimizer.step()


In [16]:

# You can also perform gradient descent at the low level.
# linear.weight.data.sub_(0.01 * linear.weight.grad.data)
# linear.bias.data.sub_(0.01 * linear.bias.grad.data)

# Print out the loss after 1-step gradient descent.
pred = linear(x)
loss = criterion(pred, y)
print('loss after 1 step optimization: ', loss.item())

loss after 1 step optimization:  2.7463717460632324
