# AUTOGRAD MECHANICS
https://pytorch.org/docs/stable/notes/autograd.html#requires-grad

This note will present an overview of how autograd works and records the operations. It’s not strictly necessary to understand all this, but we recommend getting familiar with it,as it will help you write more efficient, cleaner programs, and can aid you in debugging.

In [1]:
import torch, torchvision

In [3]:
x = torch.randn(5, 5) # requires_grad = false by default 
y = torch.randn(5, 5)
z = torch.randn((5, 5), requires_grad=True)
a = x + y
a.requires_grad

False

In [4]:
b = a + z
b.requires_grad

True

In [7]:
model = torchvision.models.resnet18(pretrained=True)
for param in model.parameters():
    param.requires_grad = False
# Replace the last fully-connected layer
# Parameters of newly constructed modules have requires_grad=True by default
model.fc = torch.nn.Linear(512, 100)

# optimize only the classifier 
optimizer = torch.optim.SGD(model.fc.parameters(), lr=1e-2, momentum=0.9)

model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [10]:
# define a train function to be used in different threads 
def train_fn():
    x = torch.ones(5, 5, requires_grad=True)
    # forward 
    y = (x + 3) * (x + 4) * 0.5
    # backward 
    y.sum().backward()
    # potential optimizer update 

import threading
# user write their own threading code to drive the train_fn
threads = [] # list 
for _ in range(10):
    p = threading.Thread(target=train_fn, args=())
    p.start()
    threads.append(p)

for p in threads:
    p.join()
