In [2]:
import numpy as np

# Define the function f(x, y)
def f(x, y):
    return 10*x**4 - 20*x**2*y + x**2 + 10*y**2 - 2*x + 1

# Define the gradient of f(x, y)
def gradient_f(x, y):
    df_dx = 40*x**3 - 40*x*y + 2*x - 2
    df_dy = -20*x**2 + 20*y
    return np.array([df_dx, df_dy])

# Armijo's Rule for step size selection
def armijo_rule(x, y, alpha, beta, gamma, grad):
    while f(x - alpha*grad[0], y - alpha*grad[1]) > f(x, y) - gamma*alpha*np.linalg.norm(grad)**2:
        alpha *= beta
    return alpha

# Gradient Descent function
def gradient_descent(x0, y0, alpha, beta, gamma, tol=1e-6, max_iter=1000):
    x = x0
    y = y0
    iterates = []
    for i in range(max_iter):
        grad = gradient_f(x, y)
        alpha = armijo_rule(x, y, alpha, beta, gamma, grad)
        x_new = x - alpha*grad[0]
        y_new = y - alpha*grad[1]
        if np.linalg.norm([x_new - x, y_new - y]) < tol:
            break
        x = x_new
        y = y_new
        iterates.append((x, y, alpha, f(x, y)))
    return iterates

# Initial point and parameters for Armijo's Rule
x0 = 1.0
y0 = 1.0
alpha = 1.0
beta = 0.5
gamma = 0.1

# Run gradient descent
iterates = gradient_descent(x0, y0, alpha, beta, gamma)

print("Iterates:")
print("Iter  |   x     |    y     |   alpha  |   f(x, y)")
print("-------------------------------------------------")
for i in range(min(10, len(iterates))):
    print(f"{i+1:<6}| {iterates[i][0]:.6f} | {iterates[i][1]:.6f} | {iterates[i][2]:.6f} | {iterates[i][3]:.6f}")

# Output the optimal point if iterates exist
if iterates:
    optimal_point = iterates[-1]
    print("\nOptimal Point:")
    print(f"x* = {optimal_point[0]}, y* = {optimal_point[1]}, f(x*, y*) = {optimal_point[3]}")
else:
    print("\nGradient descent did not converge within the maximum number of iterations.")

# Output the parameters chosen for Armijo's Rule
print("\nParameters for Armijo's Rule:")
print(f"alpha = {alpha}, beta = {beta}, gamma = {gamma}")

Iterates:
Iter  |   x     |    y     |   alpha  |   f(x, y)
-------------------------------------------------

Gradient descent did not converge within the maximum number of iterations.

Parameters for Armijo's Rule:
alpha = 1.0, beta = 0.5, gamma = 0.1
