In [19]:
import torch
from torchvision.models import resnet18, ResNet18_Weights
model = resnet18(weights=ResNet18_Weights.DEFAULT)
# single image with 3 channels, height and width of 64
data = torch.rand(1, 3, 64, 64)
labels = torch.rand(1, 1000)


In [20]:
prediction = model(data) # forward pass through the neural net

In [21]:
loss = (prediction - labels).sum()
loss.backward() # backpropagation to calculate weight gradients
# stores the gradients for each model parameter in the parameters .grad attribute

In [22]:
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)

In [23]:
optim.step() # do gradient descent, the optimizer will adjust each parameter by its gradient stored in .grad

In [24]:
# differentiation in autograd
a = torch.tensor([2., 3.], requires_grad=True)
b = torch.tensor([6., 4.], requires_grad=True)
Q = 3*a**3 - b**2

In [25]:
external_grad = torch.tensor([1., 1.]) # we need to explicitly pass a gradient argument in q.backward because its a vector
# gradient is a tensor of the same shape as Q and it represents the gradient of Q wrt itself
Q.backward(gradient=external_grad)
# check if gradients are correct
print(9*a**2 == a.grad)
print(-2*b**2)

tensor([True, True])
tensor([-72., -32.], grad_fn=<MulBackward0>)


In [None]:
# torch autograd tracks operations on all tensors which have requires_grad set to True, for tensors that dont require gradients,
# setting this attribute to false excludes it from the gradient computation DAG
x = torch.rand(5, 5)
y = torch.rand(5, 5)
z = torch.rand((5, 5), requires_grad=True)

# the output tensor of an operation will require gradients if only a single input tensor has requires_grad=True
a = x + y
print(f"Does `a` require gradients?: {a.requires_grad}")
b = x + z
print(f"Does `b` require gradients?: {b.requires_grad}")

Does `a` require gradients?: False
Does `b` require gradients?: True


In [28]:
# in NN, parameters that dont compute gradients are called frozen parameters
from torch import nn, optim

model = resnet18(weights=ResNet18_Weights.DEFAULT)
# freeze all parameters in the network
for param in model.parameters():
    param.requires_grad = False

In [29]:
# In resnet, the classifier is the last linear layer model.fc
# we can simply replace it with a new linear layer (unfrozen by default) that acts as our classifier
model.fc = nn.Linear(512, 10)

In [30]:
# all parameters in the model, except for the parameters of model.fc are frozen
# the only parameters that compute the gradients are the weights and bias of model.fc
optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
