# Autograd Mechanics

### This note will present an overview of how autograd works and records the operations. 

In [8]:
import torch.nn as nn
import torch.optim as optim
import torch
import torchvision

## Excluding subgraphs from backward

### Every Tensor has a flag: requires_grad that allows for fine grained exclusion of subgraphs from gradient computation and can increase efficiency

In [9]:
# requires_grad

x = torch.randn(5, 5) # requires_grad=False by default
y = torch.randn(5, 5) # requires_grad=False by default
z = torch.randn((5, 5), requires_grad=True)

a = x + y
print(a.requires_grad)

b = a + z
print(b.requires_grad)

False
True


In [10]:
# This is especially useful when you want to freeze part of you model, or you know in advance that you're
# not going to use gradients w.r.t some parameters.

model = torchvision.models.resnet18(pretrained=True)
for param in model.parameters():
    param.requires_grad = False
model.fc = nn.Linear(512, 100)

# Optimize only the classifier
optimizer = optim.SGD(model.fc.parameters(), lr=1e-2, momentum=0.9)

## How autograd encodes the history 