In [30]:
import torch

In [31]:
# here we choose the option of requires_grad- for letting out tensor know that we will be using this for derivations
# it is bydefault false so need to make it true

x = torch.tensor(3.0 , requires_grad = True)

In [6]:
y = x**2

In [7]:
x

tensor(3., requires_grad=True)

In [8]:
#pytorch makes a computation graph - moving in forward direction- u can calculate y with the help of x
#
y

tensor(9., grad_fn=<PowBackward0>)

here
- The backward function is used for calculating the dy/dx
- the grad function is used for displaying the derivated value
- but one thing to keep in mind - backward function works with the final value while the grad attribute works with the starting vale

In [9]:
y.backward()

In [11]:
x.grad

tensor(6.)

2ND QUESTION
- y = x^2 , z = sin(y)
- chain rule has to be applied

In [13]:
# firstly trying by importing the function
import math

def func(x):
  # so firstly we will have to calculate the derivative of the given function and than put it into another function
  return 2*x*math.cos(x**2)

In [26]:
func(3)

-5.466781571308061

In [15]:
x = torch.tensor(3.0 , requires_grad = True)

In [16]:
y = x**2

In [19]:
# pytorch can also be used for creating trignometric functions
z = torch.sin(y)

In [20]:
x

tensor(3., requires_grad=True)

In [21]:
y

tensor(9., grad_fn=<PowBackward0>)

In [22]:
z

tensor(0.4121, grad_fn=<SinBackward0>)

In [23]:
z.backward()

In [25]:
x.grad

tensor(-5.4668)

EXAMPLE 3:
-Neural networks

In [27]:
import torch
x = torch.tensor(6.7)  # input feature
y = torch.tensor(0.0)  # category label binary

w = torch.tensor(1.0)  # weight
b = torch.tensor(0.0)  # bias

In [28]:
# BINARY CROSS - ENTROPY LOSS FOR SCALER
def binary_cross_entropy(prediction , target):
  epsilon = 1e-8
  prediction = torch.clamp(prediction , epsilon ,1 - epsilon )
  return -(target * torch.log(prediction) + (1- target) * torch.log(1 - prediction))

In [29]:
# forward pass
z = w*x + b # weighted sum (linear part)
y_pred = torch.sigmoid(z)

# compute binary cross - entropy loss
loss = binary_cross_entropy(y_pred , y)

In [32]:
# Derivation
# 1. Dl/d(y_pred): loss with respect to the prediction (y_pred)
dloss_dy_pred = (y_pred - y) / (y_pred*(1-y_pred))

# 2. dy_pred/dz: Prediction (y_pred ) with respect to z ( sigmoid derivation)
dy_pred_dz = y_pred * (1-y_pred)

In [33]:
dz_dw = x
dz_db = 1

In [34]:
dl_dw = dloss_dy_pred * dy_pred_dz * dz_dw
dl_db = dloss_dy_pred * dy_pred_dz * dz_db

In [35]:
loss

tensor(6.7012)

In [36]:
print("gradient of loss w.r.t. weight " , dl_dw)
print("gradient of loss w.r.t. bias " , dl_dw)

gradient of loss w.r.t. weight  tensor(2.9963, grad_fn=<MulBackward0>)
gradient of loss w.r.t. bias  tensor(2.9963, grad_fn=<MulBackward0>)


Solving this through AUTOGRAD

In [49]:
x = torch.tensor(6.7)
y = torch.tensor(0.0)

In [50]:
w = torch.tensor(1.0 , requires_grad=True)
b = torch.tensor(0.0 , requires_grad=True)

In [51]:
w

tensor(1., requires_grad=True)

In [52]:
b

tensor(0., requires_grad=True)

In [53]:
z = w*x + b
z

tensor(6.7000, grad_fn=<AddBackward0>)

In [54]:
#sigmoid
y_pred = torch.sigmoid(z)

In [55]:
y_pred

tensor(0.9988, grad_fn=<SigmoidBackward0>)

In [56]:
loss = binary_cross_entropy(y_pred , y)
loss

tensor(6.7012, grad_fn=<NegBackward0>)

In [57]:
loss.backward()

In [58]:
print(w.grad)
print(b.grad)

tensor(6.6918)
tensor(0.9988)


Vector input Tensor

In [59]:
x = torch.tensor((1.0 , 2.0 , 3.0) , requires_grad = True)

In [60]:
x

tensor([1., 2., 3.], requires_grad=True)

In [61]:
y = (x**2).mean()

In [62]:
y

tensor(4.6667, grad_fn=<MeanBackward0>)

In [63]:
y.backward()

In [64]:
x.grad

tensor([0.6667, 1.3333, 2.0000])

CLEARING GRADIENTS

the gradients are not clearned - new gradients will be added to the old ones


In [65]:
x = torch.tensor(2.0 , requires_grad=True)

In [70]:
y = x**2

In [71]:
y.backward()

In [72]:
x.grad

tensor(8.)

In [73]:
x.grad.zero_()

tensor(0.)

HOW TO DISSABLE GRADIENT TARCKING

In [74]:
# backward tracking can disabled - we enable it for calc derivatives
# but once we are done with training and odnt require backtracking so u can diable tracking

# option 1 : requires_grad(False)
x.requires_grad_(False)

tensor(2.)

In [75]:
x

tensor(2.)

In [76]:
#Option 2 - DETACH - value remains the same but it is detachhed from the computational graph
x = torch.tensor(2.0 , requires_grad=True)
x

tensor(2., requires_grad=True)

In [78]:
z = x.detach()
z

tensor(2.)

In [80]:
y1 =z**2

In [82]:
y1.backward()
# it wont work

In [83]:
# option -3 - no grad function
x = torch.tensor(2.9 , requires_grad = True)

with torch.no_grad():
  y = x**2

In [None]:
y.backward()
# it wont work but if u remove the with torch.no_grad than it will work just fine