In [4]:
import torch

# Step 1: Initialize a single data point and parameters
x = torch.tensor(2.0)  # Single input value
true_w, true_b = 3.0, -1.0  # True parameters for synthetic data
y = true_w * x + true_b  # True output (no noise added for simplicity)

# Randomly initialize w and b
# Using torch.randn to initialize w and b with random values
w = torch.randn(1, requires_grad=True)  # Slope (parameter to be learned)
b = torch.randn(1, requires_grad=True)  # Intercept (parameter to be learned)

# Set the learning rate and number of iterations
learning_rate = 0.01  # Step size
n_iter = 100  # Number of iterations for gradient descent

# Step 2: Gradient Descent Loop
for iteration in range(n_iter):
    # Step 3: Forward Pass
    # Compute the predicted value (y_pred) using the current parameters w and b
    y_pred = w * x + b  # Forward pass: y_pred = w * x + b

    # Step 4: Loss Computation
    # Compute the Mean Squared Error (MSE) for the current prediction
    loss = (y_pred - y) ** 2  # Loss = ((w * x + b) - y)^2

    # Step 5: Backward Pass (Automatic Gradient Computation)
    # Compute gradients of loss with respect to w and b
    # `loss.backward()`:
    # - Computes the partial derivatives (gradients) of the loss with respect to all tensors that have `requires_grad=True`.
    # - Stores the gradients in the `.grad` attribute of those tensors.
    loss.backward()

    # Step 6: Update Parameters (Gradient Descent Step)
    # Update w and b using the gradients stored in their `.grad` attribute
    # `w.grad` and `b.grad` contain dL/dw and dL/db, respectively.
    w = w - learning_rate * w.grad  # New w = Current w - (learning_rate * grad_w)
    b = b - learning_rate * b.grad  # New b = Current b - (learning_rate * grad_b)

    # Step 7: Zero Gradients
    # PyTorch accumulates gradients, so we need to zero them after each update
    # This prevents gradients from being added to previous values in subsequent iterations.
    w.grad.zero_()
    b.grad.zero_()

    # Optional: Print loss every 10 iterations
    if iteration % 10 == 0:
        print(f"Iteration {iteration:3d}: Loss = {loss.item():.4f}, w = {w.item():.4f}, b = {b.item():.4f}")

# Step 8: Print Final Parameters
print(f"\nFinal learned parameters: w = {w.item():.4f}, b = {b.item():.4f}")
print(f"True parameters: w = {true_w}, b = {true_b}")


  w.grad.zero_()


AttributeError: 'NoneType' object has no attribute 'zero_'

In [2]:
import torch

# Step 1: Initialize a single data point and parameters
x = torch.tensor(2.0)  # Single input value
true_w, true_b = 3.0, -1.0  # True parameters for synthetic data
y = true_w * x + true_b  # True output (no noise added for simplicity)

# Randomly initialize w and b
# Using torch.randn to initialize w and b with random values
w = torch.randn(1, requires_grad=True)  # Slope (parameter to be learned)
b = torch.randn(1, requires_grad=True)  # Intercept (parameter to be learned)

# Define optimizer
# `params` takes a list of tensors to optimize (in this case, [w, b])
# `lr` sets the learning rate
optimizer = torch.optim.SGD([w, b], lr=0.01)

# Set the number of iterations
n_iter = 100  # Number of iterations for gradient descent

# Step 2: Gradient Descent Loop
for iteration in range(n_iter):
    # Step 3: Forward Pass
    # Compute the predicted value (y_pred) using the current parameters w and b
    y_pred = w * x + b  # Forward pass: y_pred = w * x + b

    # Step 4: Loss Computation
    # Compute the Mean Squared Error (MSE) for the current prediction
    loss = (y_pred - y) ** 2  # Loss = ((w * x + b) - y)^2

    # Step 5: Zero Gradients
    # Zero out gradients before computing them to avoid accumulation
    optimizer.zero_grad()

    # Step 6: Backward Pass
    # Compute gradients of loss with respect to w and b
    loss.backward()

    # Step 7: Update Parameters
    # Use the optimizer to update w and b
    optimizer.step()

    # Optional: Print loss every 10 iterations
    if iteration % 10 == 0:
        print(f"Iteration {iteration:3d}: Loss = {loss.item():.4f}, w = {w.item():.4f}, b = {b.item():.4f}")

# Step 8: Print Final Parameters
print(f"\nFinal learned parameters: w = {w.item():.4f}, b = {b.item():.4f}")
print(f"True parameters: w = {true_w}, b = {true_b}")



Iteration   0: Loss = 14.2339, w = 0.7376, b = 0.1292
Iteration  10: Loss = 1.7305, w = 1.6223, b = 0.5715
Iteration  20: Loss = 0.2104, w = 1.9307, b = 0.7258
Iteration  30: Loss = 0.0256, w = 2.0383, b = 0.7795
Iteration  40: Loss = 0.0031, w = 2.0758, b = 0.7983
Iteration  50: Loss = 0.0004, w = 2.0888, b = 0.8048
Iteration  60: Loss = 0.0000, w = 2.0934, b = 0.8071
Iteration  70: Loss = 0.0000, w = 2.0950, b = 0.8079
Iteration  80: Loss = 0.0000, w = 2.0955, b = 0.8082
Iteration  90: Loss = 0.0000, w = 2.0957, b = 0.8083

Final learned parameters: w = 2.0958, b = 0.8083
True parameters: w = 3.0, b = -1.0
