# Solution: Momentum Implementation

**Objective**: Compare Vanilla GD vs Momentum.

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def f(x):
    return x**4 - 2*x**2

def df(x):
    return 4*x**3 - 4*x

def gd_momentum(start_x, lr, momentum, epochs):
    x = start_x
    v = 0
    path = [x]
    for _ in range(epochs):
        grad = df(x)
        # v = friction * v - lr * grad
        v = momentum * v - lr * grad
        x = x + v
        path.append(x)
    return np.array(path)

# Compare
start = 0.1 # Slightly off-center
lr = 0.1

# 1. Vanilla (momentum = 0)
path_vanilla = gd_momentum(start, lr, 0.0, 20)

# 2. Momentum (beta = 0.9)
path_mom = gd_momentum(start, lr, 0.9, 20)

# Plot
x_range = np.linspace(-2, 2, 100)
plt.plot(x_range, f(x_range), color='gray', alpha=0.3)

plt.plot(path_vanilla, f(path_vanilla), 'o-', label="Vanilla GD", alpha=0.6)
plt.plot(path_mom, f(path_mom), 'o-', label="Momentum", color='red')

plt.title("Momentum vs Vanilla")
plt.legend()
plt.show()

## Conclusion
Momentum (Red) accelerates down the hill much faster than Vanilla GD (Blue), effectively "rolling" into the minimum.