In [268]:
# function that returns the derivative of x ** 2
def dy_dx(x):
    return (2 * x)

dy_dx(3)  

6

In [269]:
import math
# funcion that returns dz/dx if y = x ** 2 and z = sin(y)

def dz_dx(x):
    return 2 * x * math.cos(x ** 2)
dz_dx(3)

-5.466781571308061

In [270]:
# function that returns du/dx if y = x ** 2, z = sin(y) and u = e ** z

def du_dx(x):
    return 2 * x * math.cos(x ** 2) * math.exp(math.sin(x ** 2))

du_dx(2)

-1.226664708936842

## Using Autograd To Calculate Derivatives

In [271]:
import torch

1. y = x ** 2

In [272]:
x = torch.tensor(3.0, requires_grad=True)
x

tensor(3., requires_grad=True)

In [273]:
y = x ** 2

In [274]:
y

tensor(9., grad_fn=<PowBackward0>)

In [275]:
y.backward()   # it calculates derivates 

In [276]:
x.grad

tensor(6.)

2. y = x ** 2, z = sin(y)

In [277]:
x = torch.tensor(3.0, requires_grad=True)

In [278]:
y = x ** 2
y

tensor(9., grad_fn=<PowBackward0>)

In [279]:
z = torch.sin(y)
z

tensor(0.4121, grad_fn=<SinBackward0>)

In [280]:
z.backward()

In [281]:
x.grad

tensor(-5.4668)

3. y = x ** 2, z = sin(y), u = e ** z

In [282]:
x = torch.tensor(2.0, requires_grad=True)

In [283]:
y = x ** 2
y

tensor(4., grad_fn=<PowBackward0>)

In [284]:
z = torch.sin(y)
z

tensor(-0.7568, grad_fn=<SinBackward0>)

In [285]:
u = torch.exp(z)
u

tensor(0.4692, grad_fn=<ExpBackward0>)

In [286]:
u.backward()

In [287]:
x.grad

tensor(-1.2267)

## Manually Calculating The Gradients Of The Neural Network

In [288]:
import torch

In [289]:
# Inputs
x = torch.tensor(6.7) # Input Feature
y = torch.tensor(0.0) # True label(binary)

w = torch.tensor(1.0) # Weight
b = torch.tensor(0.0) # Bias

In [290]:
# Binary Cross Entropy Loss Function
def binary_cross_entropy_loss(prediction, target):
    epsilon = 1e-8 # To prevent log(0)
    prediction = torch.clamp(prediction, epsilon, 1 - epsilon)
    return -(target * torch.log(prediction) + (1 - target) * torch.log(1 - prediction))

In [291]:
# Forward Pass

z = w * x + b     # Weighted Sum(linear part)
y_pred = torch.sigmoid(z)    # Predicted Probability

# Compute Loss
loss = binary_cross_entropy_loss(y_pred, y)
loss

tensor(6.7012)

In [292]:
# Derivatives

# 1. (dL / dy_pred): Derivate of Loss with respect to the prediction(y_pred)

dloss_dy_pred = (y_pred - y)/(y_pred * (1 - y_pred))

# 2. (dy_pred / dz): Derivative of prediction(y_pred) with respect to z(sigmoid derivative)
dy_pred_dz = y_pred * (1 - y_pred)


#3. dz / dw and dz / db: Derivative of z with respect to w and b
dz_dw = x   # dz/dw = x
dz_db = 1   # dz/db = 1 (bias contributes directly to z)

dL_dw = dloss_dy_pred * dy_pred_dz * dz_dw
dL_db = dloss_dy_pred * dy_pred_dz * dz_db

In [293]:
print(f"Manual Calculation of Gradient Of Loss w.r.t. Weight(dL/dw): {dL_dw}")
print(f"Manual Calculation of Gradient Of Loss w.r.t. Bias(dL/db): {dL_db}")

Manual Calculation of Gradient Of Loss w.r.t. Weight(dL/dw): 6.691762447357178
Manual Calculation of Gradient Of Loss w.r.t. Bias(dL/db): 0.998770534992218


## Calculating The Gradients Of Neural Network Using Autograd

In [294]:
# Inputs
x = torch.tensor(6.7) # Input Feature
y = torch.tensor(0.0) # True label(binary)

w = torch.tensor(1.0, requires_grad=True) # Weight
b = torch.tensor(0.0, requires_grad=True) # Bias

In [295]:
z = w * x + b
z

tensor(6.7000, grad_fn=<AddBackward0>)

In [296]:
y_pred = torch.sigmoid(z)
y_pred

tensor(0.9988, grad_fn=<SigmoidBackward0>)

In [297]:
loss = binary_cross_entropy_loss(y_pred, y)
loss

tensor(6.7012, grad_fn=<NegBackward0>)

In [298]:
loss.backward()

In [299]:
w.grad

tensor(6.6918)

In [300]:
b.grad

tensor(0.9988)

## Calculating A Gradient Of Vector Input Tensor

In [301]:
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)

In [302]:
y = (x ** 2).mean()
y

tensor(4.6667, grad_fn=<MeanBackward0>)

In [303]:
y.backward()

In [304]:
x.grad

tensor([0.6667, 1.3333, 2.0000])

## Clearing Gradients

In [305]:
# clearing grad

x = torch.tensor(2.0 , requires_grad=True)
x

tensor(2., requires_grad=True)

In [306]:
y = x ** 2
y

tensor(4., grad_fn=<PowBackward0>)

In [307]:
y.backward()

In [308]:
x.grad   # gradient accumulates here if we execute the forward pass continuously in loop

tensor(4.)

In [309]:
x.grad.zero_()  # clears the gradient of previous pass

tensor(0.)

## Disable Gradient Tracking

In [310]:
x = torch.tensor(2.0, requires_grad = True)
x

tensor(2., requires_grad=True)

In [311]:
y = x ** 2
y

tensor(4., grad_fn=<PowBackward0>)

In [312]:
y.backward()

In [313]:
x.grad

tensor(4.)

In [314]:
# option 1 - requires_grad_(False)
x.requires_grad_(False)
x

tensor(2.)

In [315]:
y = x ** 2
y

tensor(4.)

In [316]:
#y.backward()  # This does not work as the gradient is disabled

In [317]:
# option 2 - detach()
x = torch.tensor(2.0, requires_grad = True)
x

tensor(2., requires_grad=True)

In [318]:
z = x.detach()
z

tensor(2.)

In [319]:
y = x ** 2
y

tensor(4., grad_fn=<PowBackward0>)

In [320]:
y1 = z ** 2
y1

tensor(4.)

In [None]:
y.backward()
#y1.backward() # It raises error

In [322]:
# option 3 - torch.no_grad()
x = torch.tensor(2.0, requires_grad = True)
x

tensor(2., requires_grad=True)

In [None]:
with torch.no_grad():
    y = x ** 2

y

tensor(4.)

In [324]:
# y.backward()   # The gradient tracking is disabled for y so it raises error