In [2]:
import numpy as np

def gradient_descent(f, grad_f, x0, eta, tol=1e-4, max_iter=10000):
    """
    Generic Gradient Descent Solver
    x0: starting point (can be float or np.array)
    eta: learning rate
    tol: accuracy threshold for the gradient norm
    """
    x = np.array(x0, dtype=float)
    for i in range(max_iter):
        gradient = np.array(grad_f(x), dtype=float)

        # Check convergence: norm of gradient < tolerance
        if np.linalg.norm(gradient) < tol:
            return x, i

        # Update step: x_{new} = x_{old} - eta * grad
        x = x - eta * gradient

    return x, max_iter

# --- Problem Definitions ---

# a) f(x) = (x-3)^2 + 2
f_a = lambda x: (x - 3)**2 + 2
grad_a = lambda x: 2 * (x - 3)

# b) f(x) = x^4 + x^2 + x
f_b = lambda x: x**4 + x**2 + x
grad_b = lambda x: 4 * x**3 + 2 * x + 1

# c) f(x,y) = 3x^2 + 2xy + 2y^2 - 4x + y
f_c = lambda v: 3*v[0]**2 + 2*v[0]*v[1] + 2*v[1]**2 - 4*v[0] + v[1]
def grad_c(v):
    df_dx = 6*v[0] + 2*v[1] - 4
    df_dy = 2*v[0] + 4*v[1] + 1
    return np.array([df_dx, df_dy])

# --- Execution ---

problems = [
    ("a", grad_a, 0, [0.1, 0.5]),
    ("b", grad_b, 1, [0.1, 0.5]),
    ("c", grad_c, [0, 0], [0.1, 0.5])
]

for label, grad_func, start_pt, etas in problems:
    print(f"--- Problem {label} ---")
    for e in etas:
        sol, iters = gradient_descent(None, grad_func, start_pt, e)
        print(f"Eta: {e} | Minimum at: {sol} | Iterations: {iters}")
    print()

--- Problem a ---
Eta: 0.1 | Minimum at: 2.9999571825692186 | Iterations: 50
Eta: 0.5 | Minimum at: 3.0 | Iterations: 1

--- Problem b ---
Eta: 0.1 | Minimum at: -0.3854353879474503 | Iterations: 25
Eta: 0.5 | Minimum at: nan | Iterations: 10000

--- Problem c ---
Eta: 0.1 | Minimum at: [ 0.89998207 -0.69997098] | Iterations: 32


  grad_b = lambda x: 4 * x**3 + 2 * x + 1
  x = x - eta * gradient
  df_dx = 6*v[0] + 2*v[1] - 4
  df_dy = 2*v[0] + 4*v[1] + 1
  x = x - eta * gradient


Eta: 0.5 | Minimum at: [nan nan] | Iterations: 10000

