# Gradient Descent Module Testing Notebook
This notebook tests the combined C++ gradient descent library with Python bindings.
### Setup
Make sure you've built the module using:
```bash
cd src/notebooks
python3 setup.py build_ext --inplace
```

In [None]:
import sys
import numpy as np
import gradientdescent as gd

print("Gradient Descent module loaded successfully!")

## Part 1: Automatic Differentiation

In [None]:
# Create variables
x = gd.Variable.create(2.0, True)  # requires_grad=True
y = gd.Variable.create(3.0, True)

print(f"x = {x}")
print(f"y = {y}")

In [None]:
# Arithmetic operations
z1 = x + y
z2 = x * y
z3 = x - y
z4 = x / y

print(f"x + y = {z1.value}")
print(f"x * y = {z2.value}")
print(f"x - y = {z3.value}")
print(f"x / y = {z4.value}")

In [None]:
# Simple gradient: f(x) = x^2, df/dx = 2x
x = gd.Variable.create(3.0, True)
f = x * x

print(f"f(x) = x^2 where x = {x.value}")
print(f"f = {f.value}")

f.backward()
print(f"df/dx = {x.grad} (expected: {2 * x.value})")

## Part 2: Linear Regression with Gradient Descent

In [None]:
# Generate synthetic data for linear regression
np.random.seed(42)
n_samples = 100
n_features = 2

# True parameters: w = [2.5, -1.5]
true_w = np.array([2.5, -1.5])

# Generate random X data
X = np.random.randn(n_samples, n_features)

# Generate y = X * w + noise
y = X.dot(true_w) + np.random.randn(n_samples) * 0.5

print(f"Generated {n_samples} samples with {n_features} features")
print(f"True parameters: {true_w}")

In [None]:
# Convert data to the format expected by our C++ code
X_list = X.tolist()
y_list = y.tolist()

# Initialize weights with random values
w = [gd.Variable.create(np.random.randn(), True) for _ in range(n_features)]
print(f"Initial weights: [{w[0].value}, {w[1].value}]")

# Create loss function and optimizer
loss_fn = gd.MSE()
optimizer = gd.Vanilla()

In [None]:
# Training parameters
learning_rate = 0.01
n_epochs = 100

# Training loop
losses = []
weights_history = []

for epoch in range(n_epochs):
    # Train one step
    optimizer.train(w, X_list, y_list, loss_fn, learning_rate)
    
    # Compute current predictions and loss for monitoring
    y_pred = []
    for i in range(n_samples):
        pred = gd.Variable.create(0.0)
        for j in range(n_features):
            x_ij = gd.Variable.create(X_list[i][j])
            pred = pred + w[j] * x_ij
        y_pred.append(pred)
    
    loss = loss_fn.compute(y_pred, y_list)
    losses.append(loss.value)
    weights_history.append([w[0].value, w[1].value])

    if epoch % 10 == 0:
        print(f"Epoch {epoch}: Loss = {loss.value:.6f}, Weights = [{w[0].value:.4f}, {w[1].value:.4f}]")

print(f"\nFinal weights: [{w[0].value:.4f}, {w[1].value:.4f}]")
print(f"True weights:  [{true_w[0]:.4f}, {true_w[1]:.4f}]")

In [None]:
# Training Results Summary
print("\n=== Training Results ===")
print(f"Final Loss: {losses[-1]:.6f}")
print(f"Loss Reduction: {((losses[0] - losses[-1]) / losses[0] * 100):.1f}%")
print(f"\nWeight Convergence:")
print(f"w[0]: {w[0].value:.4f} -> target: {true_w[0]:.4f} (error: {abs(w[0].value - true_w[0]):.4f})")
print(f"w[1]: {w[1].value:.4f} -> target: {true_w[1]:.4f} (error: {abs(w[1].value - true_w[1]):.4f})")
print(f"\nLoss progression (every 10 epochs):")
for i in range(0, len(losses), 10):
    print(f"Epoch {i:2d}: {losses[i]:.6f}")