In [None]:
# CHAIN RULE: x --> [a(x)] --> y --> [b(y)] --> z
# dy/dx = dy/da * da/dx
# dz/dy = dz/db * db/dy

# first calculate--> dy/dx , then calculate--> dz/dy
# atlast calculate final gradient: dz/dx = dy/dx * dz/dy


# COMPUTATIONAL GRAPH: having inputs , operator (having function) , output

# 3 important things: [1] Forward Pass : compute loss , [2] Compute local gradients, [3] Backward pass : compute dLoss/dWeights using the chain rule.


"""let, x(i/p) * w(weights)  = y' (o/p) and  y --> actual y
      [1]  loss = (y' - y)**2 = (wx - y)**2
      [2]  calculate local (intermediate) gradients at each node (dloss/ds,ds/dy',dy'/dw)
      [3]  use chain rule, dloss/ds * ds/dy' --> dloss/dy'*dy'/dw ---> dloss/dw [final gradient]

      so, finally we have to minimize loss by updating weights."""

In [12]:
import torch

x = torch.tensor(1.0)
y = torch.tensor(2.0)

w = torch.tensor(1.0, requires_grad=True)

# forward pass and compute loss
y_hat = w * x
loss = (y_hat - y) ** 2

print(f"loss: {loss.item()}")

loss: 1.0


In [15]:
# backward pass and compute gradients
loss.backward(retain_graph=True)  # retain_graph=True allows multiple backward passes
print(f"Gradient of loss w: {w.grad.item()}")

# update weights using gradient descent
with torch.no_grad():
    w -= 0.1 * w.grad
    w.grad.zero_()  # reset gradients to zero for next iteration

print(f"Updated weight: {w.item()}")
# Note: In a real-world scenario, you would typically use an optimizer from PyTorch's `torch.optim` module to handle weight updates.
# This is a simple example to illustrate the concept of backpropagation and gradient descent.

Gradient of loss w: -6.0
Updated weight: 1.600000023841858
