# Code example 1 — super basic gradient (1 variable)

In [3]:
import torch

In [None]:
w = torch.tensor(2.0, requires_grad=True)
y = w**2 + 3*w

# backward works best on scalar outputs (y is scalar here)
y.backward()

#) .item()
# Meaning: convert 1-value tensor → normal Python number
# Usage: logging loss value

print(w.item())
print(y.item())
print(f" dy/dw(gradient) = {w.item()}")

2.0
10.0
 dy/dw(gradient) = 2.0


# Code example 2 — gradient with multiple tensors + matrix style

In [20]:
import torch

# x: input data (no need gradient usually)
x = torch.tensor([1.0, 2.0, 3.0])

 # parameters (need gradients)
w = torch.tensor([0.1, 0.2, 0.3], requires_grad=True)
b = torch.tensor(0.0, requires_grad=True)

# prediction: y_hat = x·w + b
y_hat = (x*w).sum() + b
#print(y_hat)

# suppose target is 1.0
target = torch.tensor(1.0)

# # loss: squared error -- to protect the negative loss
loss = (target - y_hat) ** 2
print(loss)

loss.backward()

print("y_hat:", y_hat.item())
print("loss:", loss.item())
print("grad w:", w.grad)     # vector gradient
print("grad b:", b.grad.item())



tensor(0.1600, grad_fn=<PowBackward0>)
y_hat: 1.4000000953674316
loss: 0.16000007092952728
grad w: tensor([0.8000, 1.6000, 2.4000])
grad b: 0.8000001907348633


# Code example 3 — WHY we zero gradients (very important)

In [30]:
import torch
w = torch.tensor(2.0, requires_grad=True)

for step in range(3):
    loss = (w-1.0) **3
    loss.backward()
    print(f"Step {step}: grad =", w.grad.item())

  # reset gradient (very important)
    w.grad = None
    

Step 0: grad = 3.0
Step 1: grad = 3.0
Step 2: grad = 3.0


# Manual training loop using Autograd (no optimizer)

In [37]:
# w = torch.tensor(2.0, requires_grad=True)
# lr = 0.1

# for step in range(10):
#     loss = (w-5) ** 2
#     loss.backward()

#     with torch.no_grad():
#         w = w - lr * w.grad

#     w.grad = None
#     print(step, "w:", w.item(), "loss:", loss.item())

In [38]:
import torch

torch.manual_seed(0)
torch.set_printoptions(precision=4, sci_mode=False)

print("Torch version:", torch.__version__)
print("="*60)

# ============================================================
# 1) requires_grad, leaf tensors, grad_fn, is_leaf
# ============================================================
print("\n1) requires_grad, leaf tensors, grad_fn, is_leaf")

w = torch.tensor(2.0, requires_grad=True)   # leaf tensor (created by you)
b = torch.tensor(-1.0, requires_grad=True)  # leaf tensor

x = torch.tensor(3.0)  # no grad by default

y = w * x + b          # y is NOT leaf; it is a result of ops => has grad_fn
print("w.requires_grad:", w.requires_grad)
print("y.requires_grad:", y.requires_grad)
print("w.is_leaf:", w.is_leaf)
print("y.is_leaf:", y.is_leaf)
print("y.grad_fn:", y.grad_fn)  # shows how y was created

Torch version: 2.8.0

1) requires_grad, leaf tensors, grad_fn, is_leaf
w.requires_grad: True
y.requires_grad: True
w.is_leaf: True
y.is_leaf: False
y.grad_fn: <AddBackward0 object at 0x1182fcdc0>


In [47]:
x = torch.tensor(3.0, requires_grad=True)
y = x ** 2 
z = torch.sin(y)

z.backward()
x.grad

tensor(-5.4668)

In [43]:
x

tensor(3., requires_grad=True)

In [44]:
y

tensor(9., grad_fn=<PowBackward0>)

In [45]:
z

tensor(0.4121, grad_fn=<SinBackward0>)

In [None]:
x = torch.tensor(6.7)
y = torch.tensor(0.0) # label

w = torch.tensor(1.0, requires_grad=True)
b = torch.tensor(0.0, requires_grad=True)



In [50]:
w

tensor(1., requires_grad=True)

In [51]:
b

tensor(0., requires_grad=True)

In [52]:
z = w * x + b

In [53]:
z

tensor(6.7000, grad_fn=<AddBackward0>)

In [54]:
y_pred = torch.sigmoid(z)

In [55]:
y_pred

tensor(0.9988, grad_fn=<SigmoidBackward0>)

In [56]:
loss = -(y* torch.log(y_pred) + (1 - y) * torch.log(1 - y_pred))

In [57]:
loss

tensor(6.7012, grad_fn=<NegBackward0>)

In [58]:
loss.backward()

In [59]:
w.grad

tensor(6.6918)

In [60]:
b.grad

tensor(0.9988)

In [None]:
x = torch.tensor([1.0, 0.99, 0.89, 0.52])
y = torch.tensor(0.5)

w = torch.tensor([0.1, 0.2, 0.3, 0.4], requires_grad=True)
b = torch.tensor([0.001, 0.002, 0.003, 0.004], requires_grad=True)


z  = x * w + b
y_pred = torch.sigmoid(z)

loss_vec = -(y * torch.log(y_pred) + (1 - y) * torch.log(1 - y_pred))
loss = loss_vec.mean()     # ✅ scalar

print(z)
print(y_pred)
print(loss)

loss.backward()




tensor([0.1010, 0.2000, 0.2700, 0.2120], grad_fn=<AddBackward0>)
tensor([0.5252, 0.5498, 0.5671, 0.5528], grad_fn=<SigmoidBackward0>)
tensor(0.6984, grad_fn=<MeanBackward0>)


In [67]:
w.grad

tensor([0.0063, 0.0123, 0.0149, 0.0069])

In [68]:
b.grad

tensor([0.0063, 0.0125, 0.0168, 0.0132])

In [None]:
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
y = (x**2).mean()

y.backward()



In [71]:
x.grad

tensor([0.6667, 1.3333, 2.0000])

In [None]:
z = torch.tensor([2.0, 1.0, 0.1], requires_grad=True)
target = torch.tensor([1., 0., 0.])
softmax = torch.softmax(z, dim=0)
loss = -(target * torch.log(softmax)).sum()
print(softmax)
print("loss:", loss)
print("sum:", softmax.sum())

tensor([0.6590, 0.2424, 0.0986], grad_fn=<SoftmaxBackward0>)
loss: tensor(0.4170, grad_fn=<NegBackward0>)
sum: tensor(1.0000, grad_fn=<SumBackward0>)


In [83]:
loss.backward()

In [84]:
z.grad

tensor([-0.3410,  0.2424,  0.0986])