In [1]:
import torch

# 1. Define Training Data (y = 3x + 2)
# Features (x)
X = torch.tensor([[1.0], [2.0], [3.0]], dtype=torch.float32)
# Labels (y)
Y = torch.tensor([[5.0], [8.0], [11.0]], dtype=torch.float32)

# 2. Initialize Model Parameters Manually
# We use torch.randn (random normal) or torch.rand (random uniform) for initialization
# REQUIRES_GRAD=TRUE is the key to tracking gradients
W = torch.randn(1, 1, requires_grad=True) # Weight (slope)
B = torch.rand(1, requires_grad=True)     # Bias (y-intercept)

# Hyperparameters
learning_rate = 0.01

In [None]:
# --- 1. FORWARD PASS ---
# Manual Linear Model: Y_pred = X @ W + B
Y_pred = X @ W + B 

# --- 2. LOSS CALCULATION (Mean Squared Error) ---
# We use torch.sum and torch.pow to calculate MSE manually
loss = torch.mean(torch.pow(Y_pred - Y, 2))

# --- 3. BACKWARD PASS (Gradient Computation) ---
# Calculates d(loss)/d(W) and d(loss)/d(B) and stores them in W.grad and B.grad
loss.backward()

# --- 4. PARAMETER UPDATE (Optimization) ---
# We update the tensors using the calculated gradients (W.grad and B.grad).
# We wrap this in torch.no_grad() because the update itself is not part of the forward
# calculation and should not have its gradient tracked.
with torch.no_grad():
    W -= learning_rate * W.grad
    B -= learning_rate * B.grad
    
    # Crucial: Manually zero the gradients after the update 
    # (otherwise they accumulate from one iteration to the next)
    W.grad.zero_()
    B.grad.zero_()

# --- 5. RESULTS ---
print(f"Loss: {loss.item():.4f}")
print(f"Weight (W) Gradient: {W.grad.item():.4f}")
print(f"New Weight (W): {W.item():.4f}")
print(f"New Bias (B): {B.item():.4f}")

Loss: 90.0378
Weight (W) Gradient: 0.0000
New Weight (W): -0.4616
New Bias (B): 0.9742


In [None]:
import torch

# 1. Define Training Data (y = 3x + 2)
X = torch.tensor([[1.0], [2.0], [3.0]], dtype=torch.float32)
Y = torch.tensor([[5.0], [8.0], [11.0]], dtype=torch.float32)

# Hyperparameters
learning_rate = 0.01

# --- Initialize Parameters ---
# Create W and B with the same initial random values for a fair comparison
W_initial = torch.randn(1, 1, requires_grad=True)
B_initial = torch.rand(1, requires_grad=True)



In [7]:
print(X)

tensor([[1.],
        [2.],
        [3.]])


In [9]:
# print(W_initial,'\n',B_initial)
W_step = W_initial.clone().detach().requires_grad_(True)
B_step = B_initial.clone().detach().requires_grad_(True)

print(W_step,'\n',B_step)


optimizer = torch.optim.SGD([W_step,B_step],lr=learning_rate)

print('pytorch using step function')
print(f'initial W: {W_step.item():.4f} and initial B: {B_step.item():.4f}')

Y_pred = X @ W_step + B_step

loss_step = torch.mean(torch.pow(Y_pred - Y,2))

optimizer.zero_grad()
loss_step.backward()

optimizer.step()

# Print results
print(f"Final Loss: {loss_step.item():.4f}")
print(f"Final W: {W_step.item():.4f}, Final B: {B_step.item():.4f}")



# Create fresh, identical copies of initial parameters
W_manual = W_initial.clone().detach().requires_grad_(True)
B_manual = B_initial.clone().detach().requires_grad_(True)

# Print initial state
print("\n--- Method 2: Manual Update (Replacing step()) ---")
print(f"Initial W: {W_manual.item():.4f}, Initial B: {B_manual.item():.4f}")

# 1. Forward Pass
Y_pred_manual = X @ W_manual + B_manual

# 2. Loss Calculation
loss_manual = torch.mean(torch.pow(Y_pred_manual - Y, 2))

# 3. Backward Pass
loss_manual.backward() # Computes and stores new gradients

# 4. Parameter Update (Manual SGD Logic)
with torch.no_grad():
    # Manual update formula: parameter = parameter - lr * gradient
    W_manual -= learning_rate * W_manual.grad
    B_manual -= learning_rate * B_manual.grad
    
    # Manual zeroing of gradients
    W_manual.grad.zero_()
    B_manual.grad.zero_()

# Print results
print(f"Final Loss: {loss_manual.item():.4f}")
print(f"Final W: {W_manual.item():.4f}, Final B: {B_manual.item():.4f}")


tensor([[1.1754]], requires_grad=True) 
 tensor([0.1492], requires_grad=True)
pytorch using step function
initial W: 1.1754 and initial B: 0.1492
Final Loss: 32.4693
Final W: 1.4197, Final B: 0.2592

--- Method 2: Manual Update (Replacing step()) ---
Initial W: 1.1754, Initial B: 0.1492
Final Loss: 32.4693
Final W: 1.4197, Final B: 0.2592
