# LEC-3

In [None]:
import torch

In [None]:
# Doing Derivative without using 'argmax'
def dy_dx(x):
  return 2*x  # Equation of dy/dx

dy_dx(3)

6

In [None]:
# Now for derivation chain, where y=x^2 and z=sin(y), getting dz/dx is tough
import math

def dz_dx(x):
  return 2*x*math.cos(x**2) # Equation of dz/dx

dz_dx(3)

-5.466781571308061

# Using Autograd

In [None]:
import torch

## Example 1: y = x^2

In [None]:
# requires_grad -> to get derivatives later
x = torch.tensor(3.0, requires_grad=True)
y = x**2
print(x, y)
y.backward() # dy/dx = 2x
x.grad # x.grad contains dy/dx

tensor(3., requires_grad=True) tensor(9., grad_fn=<PowBackward0>)


tensor(6.)

## Example 2: y = x^2, z = sin(y)

In [None]:
x = torch.tensor(3.0, requires_grad=True)
y = x**2 # y = x^2
z = torch.sin(y) # z = sin(y)
print(x, y, z)
z.backward() # dz/dx
x.grad # same when without using autograd

tensor(3., requires_grad=True) tensor(9., grad_fn=<PowBackward0>) tensor(0.4121, grad_fn=<SinBackward0>)


tensor(-5.4668)

## Example 3: CGPA -> placed?

### A. Without using autograd

In [26]:
# Inputs
x = torch.tensor(6.7) # Input Fearure = CGPA
y = torch.tensor(0) # True Label(binary) = Placed?

w = torch.tensor(1.0) # Weight
b = torch.tensor(0.0) # Bias

# Bianry Cross-Entropy Loss for Scalar
def binary_cross_entropy_loss(prediction, target):
  epsilon = 1e-8  # To prevent log(0)
  prediction = torch.clamp(prediction, epsilon, 1-epsilon)
  return -(target * torch.log(prediction) + (1-target) * torch.log(1-prediction))

# Forward Pass
z = w * x + b
y_pred = torch.sigmoid(z)

# Loss
loss = binary_cross_entropy_loss(y_pred, y)
print(loss)

tensor(6.7012)


In [27]:
# Derivatives
# 1. dL/d(y_pred): Loss with respect to the prediction (y_pred)
dloss_dy_pred = (y_pred - y) / (y_pred* (1-y_pred))

# 2. dy_pred/dz: Prediction (y_pred) with respect to z (sigmoid derivative)
dy_pred_dz = y_pred * (1-y_pred)

# 3. dz/dw and dz/db: z with respect to w and b
dz_dw = x # dz/dw = x
dz_db = 1 # dz/db = 1 (bias contributes directly to z)

dL_dw = dloss_dy_pred * dy_pred_dz * dz_dw
dL_db = dloss_dy_pred * dy_pred_dz * dz_db

print(f"Manual Gradients: dL/dw = {dL_dw}, dL/db = {dL_db}")

Manual Gradients: dL/dw = 6.691762447357178, dL/db = 0.998770534992218


### B. Using Autograd

In [28]:
x = torch.tensor(6.7)
y = torch.tensor(0.0)
w = torch.tensor(1.0, requires_grad=True)
b = torch.tensor(0.0, requires_grad=True)

z = w * x + b
y_pred = torch.sigmoid(z)
loss = binary_cross_entropy_loss(y_pred, y)
print(loss)

tensor(6.7012, grad_fn=<NegBackward0>)


In [29]:
loss.backward()
print(f"Manual Gradients: dL/dw = {w.grad}, dL/db = {b.grad}")

Manual Gradients: dL/dw = 6.6917619705200195, dL/db = 0.9987704753875732


# Using Vector Inputs in Autograd

In [35]:
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
y = (x**2).mean()
y

tensor(4.6667, grad_fn=<MeanBackward0>)

In [36]:
y.backward()
x.grad

tensor([0.6667, 1.3333, 2.0000])

# Clearing Gradients

In [44]:
x = torch.tensor(2.0, requires_grad=True)
x

tensor(2., requires_grad=True)

In [61]:
y = x**2
y

tensor(4., grad_fn=<PowBackward0>)

In [62]:
y.backward()

In [63]:
x.grad
# Gradients Accumulates over running forward, backwaed, x.grad multiple times
# Like when runned 1st x.grad is 4, then 2nd time x.grad is 8, next 12 and so
# that is why we manually need to clear the previous grad

tensor(4.)

In [66]:
x.grad.zero_() # Will be used in each epoches after getting grad

tensor(0.)

# Stopping Gradinent Tracking after Training

In [67]:
x = torch.tensor(2.0, requires_grad=True)
x

tensor(2., requires_grad=True)

In [68]:
y = x**2
y

tensor(4., grad_fn=<PowBackward0>)

In [69]:
y.backward()

In [70]:
x.grad

tensor(4.)

In [81]:
# option 1 - requires_grad_(False)
x.requires_grad_(False)
print(x)
# y.backward() # Can't call y.backward() anymore

# option 2 - detach()
x = torch.tensor(2.0, requires_grad=True)
z = x.detach()
y = x**2
y1 = z**2
print(y, y1)
y.backward()
# y1.backward() # Can't call y1.backward() anymore

# option 3 - torch.no_grad()
x = torch.tensor(2.0, requires_grad=True)
# with torch.no_grad(): # Using with torch.no_grad() we cant call y.backward()
#   y = x**2
#   y.backward()
#   print(x.grad)
y = x**2
y.backward()  # As we no longer have the method, we can call y.backward()
print(x.grad)

tensor(2.)
tensor(4., grad_fn=<PowBackward0>) tensor(4.)
tensor(4.)
