In [1]:
import torch, torchvision

In [3]:
# importing pre trained model
model = torchvision.models.resnet18(pretrained=True)
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [7]:
# random image (3 chanels, 64x64 pixels) (why that 1???)
data = torch.rand(size = (1, 3, 64, 64))

# random labels
labels = torch.rand(1, 1000)

In [8]:
# forward propagation to the data
prediction = model(data)
prediction

tensor([[-6.6885e-01, -3.7314e-01, -3.9696e-01, -1.3377e+00, -3.9721e-01,
         -2.5575e-02, -6.9607e-01,  6.7276e-01,  4.0998e-01, -7.0392e-01,
         -7.7152e-01, -7.1203e-01, -2.9793e-01, -5.3480e-01, -1.2263e+00,
         -2.1339e-01, -5.0849e-01, -4.1842e-02, -2.0528e-01, -4.0935e-01,
         -1.1893e+00, -4.5670e-01, -1.3280e+00,  3.4494e-01, -1.1563e+00,
         -1.0931e+00, -8.4165e-01, -1.1085e+00, -8.9042e-01, -1.3943e-01,
         -7.2850e-01, -7.9842e-01, -5.2842e-01, -5.8157e-01, -4.1097e-01,
         -2.3199e-01,  4.4488e-01, -8.2976e-01, -3.8191e-01,  2.1901e-01,
         -6.1787e-01, -8.7953e-01, -8.4999e-01, -4.2746e-01, -2.5749e-01,
         -4.1676e-01, -7.0803e-01, -3.2501e-01, -1.0683e+00, -1.0845e+00,
         -7.0636e-01,  9.9255e-01, -3.7412e-01, -5.7979e-01, -2.2219e-01,
         -1.2825e+00, -4.4568e-01, -1.5992e+00, -4.4202e-01, -4.7013e-01,
          7.4405e-01,  5.3861e-02, -7.5765e-02, -3.3908e-02, -6.8063e-01,
         -3.6045e-01, -2.9525e-01, -5.

In [9]:
# Calculating the loss and backpropagating it
loss = (prediction - labels).sum()
loss.backward()

In [12]:
# loading an optimizer (stochastic gradient descent)
optim = torch.optim.SGD(model.parameters(), lr = 1e-2, 
        momentum = 0.9)

In [13]:
# step adjust the parameters by the gradient descent
optim.step()

### Differentiation in Autograd

In [9]:
# requires_grad sign that every operation on the tensor 
# should be tracked
a = torch.tensor([2., 3.], requires_grad=True)
b = torch.tensor([6., 4.], requires_grad=True)

${Q = 3a^3 - b²}$

In [10]:
# Q is a tensor created using a and b -> Q is the error of a NN
Q = 3*a**3-b**2
Q

tensor([-12.,  65.], grad_fn=<SubBackward0>)

${{\frac{\partial Q}{\partial a}} = 9a²}$ ______________ ${{\frac{\partial Q}{\partial b}} = -2b}$ ______________ ${\frac{dQ}{dQ} = 1}$

In [11]:
# on the gradient parameter we must pass the gradient of Q (1)
# repeating the shape of Q
external_grad = torch.tensor([1., 1.])
Q.backward(gradient=external_grad)

In [12]:
# printing the gradients
print(f"A gradients - Calculated {9*a**2} | Observed {a.grad}")
print(f"B gradients - Calculated {-2*b} | Observed {b.grad}")

A gradients - Calculated tensor([36., 81.], grad_fn=<MulBackward0>) | Observed tensor([0., 0.])
B gradients - Calculated tensor([-12.,  -8.], grad_fn=<MulBackward0>) | Observed tensor([-0., -0.])


#### Freezing the autograd

In [26]:
from torch import nn, optim

model = torchvision.models.resnet18(pretrained=True)

for param in model.parameters():
    param.requires_grad = False

In [30]:
# Suposing we have the model structure but we want to modify the classifier
# the classifier is on the fc 'layer'

# its gradients are not frozen
model.fc = nn.Linear(512, 10)

In [31]:
# optimizing the classifier
optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)