In [42]:
import torch

In [43]:
!nvidia-smi

Tue Jan  6 05:48:25 2026       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.195.03             Driver Version: 570.195.03     CUDA Version: 13.0     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          On  |   00000000:47:00.0 Off |                   On |
| N/A   29C    P0             49W /  400W |                  N/A   |     N/A      Default |
|                                         |                        |              Enabled |
+-----------------------------------------+------------------------+----------------------+

+----------------------------------------------

In [44]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [45]:
device

'cuda'

### AutoGrad in PyTorch

#### Why AutoGrad is needed?
*Calculation of derivatives of nested functions in Neural Networks is very difficult if we have to calculate it everytime...


#### What is AutoGrad?
* AutoGrad ia a core component of PyTorch that provides automatic differentiation for tensor operations.It enables gradient computation<br>
  


In [48]:
x = torch.tensor(3.0, requires_grad=True)
# we have to set the requires_grad parameter to True

In [49]:
y = x ** 3

In [50]:
y

tensor(27., grad_fn=<PowBackward0>)

In [51]:
# for dy/dx
y.backward()

In [53]:
x.grad

tensor(27.)

In [56]:
x = torch.tensor(3.0, requires_grad=True)
y = x ** 2

In [57]:
z = torch.sin(y)

In [58]:
z

tensor(0.4121, grad_fn=<SinBackward0>)

In [59]:
z.backward()

In [61]:
x.grad

tensor(-5.4668)

In [73]:
import torch

#Inputs

X = torch.tensor(6.7)
y = torch.tensor(0.0) # true label

w = torch.tensor(1.0, requires_grad=True) # weight
b = torch.tensor(0.0, requires_grad=True) # bias

In [74]:
#Loss(BCE) for scalar

def binary_cross_entropy(prediction, target):
    epsilon = 1e-8
    prediction = torch.clamp(prediction, epsilon, 1-epsilon) #this bounds our prediction to stay between (epsilon, 1-epsilon)
    return -(target * torch.log(prediction) + (1 - target) * torch.log(1 - prediction))

In [75]:
# Forward pass

z = w * X + b
y_pred = torch.sigmoid(z)

#calculating loss

loss = binary_cross_entropy(y_pred, y)



In [76]:
loss

tensor(6.7012, grad_fn=<NegBackward0>)

In [79]:
# Derivatives using AutoGrad

loss.backward()




In [80]:
w.grad

tensor(6.6918)

In [81]:
b.grad

tensor(0.9988)

In [83]:
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
x

tensor([1., 2., 3.], requires_grad=True)

In [90]:
y = (x ** 2).mean()
y

tensor(4.6667, grad_fn=<MeanBackward0>)

In [91]:
y.backward()

In [94]:
x.grad

# how 3 gradients.. so y = (x1^2 + x2^2 + x3^2)/3
# three gradients are dy/dx1 , dy/dx2, dy/dx3

tensor([0.6667, 1.3333, 2.0000])

In [95]:
#CLearing Grad

# if we do backward() multiple times these gradient start accumulating ..ie. they start adding up...
#so we need to clear them before we do again...a

x.grad.zero_()

tensor([0., 0., 0.])

In [96]:
x.grad

tensor([0., 0., 0.])

In [100]:
#disable gradient tracking.... 


x = torch.tensor(2.0, requires_grad =True)
y = 5 * (x ** 2) + 45



In [101]:
y.backward()

In [102]:
x.grad

tensor(20.)

In [105]:
#Suppose now we want to make prediction so we dont need gradient tracking...

x.requires_grad_(False)


# or using a detach function like z = x.detach()
''' or using torch.no_grad()
with torch.no_grad():
    y = 5 * (x ** 2) + 45
'''

' or using torch.no_grad()\nwith torch.no_grad():\n    y = 5 * (x ** 2) + 45\n'