In [40]:
import numpy as np
import sympy as sp

In [41]:
# Define a symbolic function
x, y = sp.symbols('x y')
f = 5*x**2 + 2*y**2

# Compute symbolic gradients
grad_f = [sp.diff(f, var) for var in (x, y)]

# Convert symbolic gradients to numerical functions
grad_f_num = [sp.lambdify((x, y), g, 'numpy') for g in grad_f]

# Nesterov momentum parameters
learning_rate = 0.1
momentum = 0.9
point = np.array([1.5, 2.0])
velocity = np.zeros_like(point)

epsilon = np.inf
iteration = 0

while epsilon > 0.1:
    iteration += 1
    # Lookahead point
    lookahead = (point + momentum * velocity).round(3)
    grad = np.array([g(lookahead[0], lookahead[1]) for g in grad_f_num]).round(3)
    epsilon = np.linalg.norm(grad).round(3)
    velocity = (momentum * velocity - learning_rate * (1 - momentum) * grad).round(3)
    point += np.round(velocity, 3)

    print(f"Iteration {iteration}")
    print(f"Lookahead: {lookahead}")
    print(f"Grad at lookahead: {grad}")
    print(f"Velocity: {velocity}")
    print(f"Minimum at: {point}")
    print(f"Grad norm: {epsilon}")
    print("="*40)

Iteration 1
Lookahead: [1.5 2. ]
Grad at lookahead: [15.  8.]
Velocity: [-0.15 -0.08]
Minimum at: [1.35 1.92]
Grad norm: 17.0
Iteration 2
Lookahead: [1.215 1.848]
Grad at lookahead: [12.15   7.392]
Velocity: [-0.256 -0.146]
Minimum at: [1.094 1.774]
Grad norm: 14.222
Iteration 3
Lookahead: [0.864 1.643]
Grad at lookahead: [8.64  6.572]
Velocity: [-0.317 -0.197]
Minimum at: [0.777 1.577]
Grad norm: 10.855
Iteration 4
Lookahead: [0.492 1.4  ]
Grad at lookahead: [4.92 5.6 ]
Velocity: [-0.334 -0.233]
Minimum at: [0.443 1.344]
Grad norm: 7.454
Iteration 5
Lookahead: [0.142 1.134]
Grad at lookahead: [1.42  4.536]
Velocity: [-0.315 -0.255]
Minimum at: [0.128 1.089]
Grad norm: 4.753
Iteration 6
Lookahead: [-0.155  0.859]
Grad at lookahead: [-1.55   3.436]
Velocity: [-0.268 -0.264]
Minimum at: [-0.14   0.825]
Grad norm: 3.769
Iteration 7
Lookahead: [-0.381  0.587]
Grad at lookahead: [-3.81   2.348]
Velocity: [-0.203 -0.261]
Minimum at: [-0.343  0.564]
Grad norm: 4.475
Iteration 8
Lookahead: [-0

In [43]:
f.subs({x: -0.343, y: 0.564})

1.22443700000000