In this part of the assignment you have to implement multivariate gradient descent to find the minimas (local and global) of the given function:
Note : you can find different minimas by changing your initialisation.  
$f(x) = x^4 + x^2y^2 - y^2 + y^4 + 6$

In [8]:
import numpy as np

In [None]:
def objective(point: np.ndarray) -> float:
    """Return f(x, y) = x^4 + x^2 y^2 - y^2 + y^4 + 6."""
    x, y = point
    return x**4 + (x**2) * (y**2) - y**2 + y**4 + 6

In [None]:
def gradient(point: np.ndarray) -> np.ndarray:
    """Analytical gradient of the objective function."""
    x, y = point
    dx = 4 * x**3 + 2 * x * (y**2)
    dy = 4 * y**3 - 2 * y + 2 * y * (x**2)
    return np.array([dx, dy], dtype=np.float64)

In [None]:
def multivariate_gradient_descent(
    grad_fn,
    start_point,
    learning_rate: float = 0.1,
    tolerance: float = 1e-6,
    max_iter: int = 10_000,
    backtracking: bool = True
):
    point = np.array(start_point, dtype=np.float64)
    history = [point.copy()]
    step = learning_rate
    last_grad_norm = None
    for iteration in range(1, max_iter + 1):
        grad_value = grad_fn(point)
        last_grad_norm = np.linalg.norm(grad_value)
        if last_grad_norm <= tolerance:
            break

        candidate = point - step * grad_value
        if backtracking and objective(candidate) > objective(point):
            step *= 0.5
            if step < 1e-8:
                break
            continue

        point = candidate
        history.append(point.copy())

    return {
        "point": point,
        "value": objective(point),
        "iterations": iteration,
        "gradient_norm": last_grad_norm,
        "trajectory": np.vstack(history)
    }

In [None]:
start_point = np.array([1.0, 1.0])
result = multivariate_gradient_descent(gradient, start_point, learning_rate=0.2)
result

(array([9.94526076e-06, 7.07106781e-01]), 1071)

In [None]:
start_point = np.array([-2.0, -2.0])
result = multivariate_gradient_descent(gradient, start_point, learning_rate=0.15)
result

(array([-9.97043752e-06, -7.07106781e-01]), 1073)

In [None]:
start_point = np.array([50.0, 50.0])
result = multivariate_gradient_descent(gradient, start_point, learning_rate=0.01)
result

array([99.99999756, 99.99999756])

849