## Define the Multi-Modal Test Function

We'll use: $f(x, y) = \sin(x) \cos(y) + 0.1(x^2 + y^2)$

This function has:
- **Multiple local minima** (many valleys)
- **One global minimum** near the origin
- **Smooth gradients** (well-defined everywhere)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots


# Set random seed for reproducibility
np.random.seed(509)

In [8]:
def multi_modal(x):
    """Multi-modal function with multiple local minima"""
    return np.sin(x[0]) * np.cos(x[1]) + 0.1 * (x[0]**2 + x[1]**2)

def multi_modal_grad(x):
    """Gradient of multi-modal function"""
    dx = np.cos(x[0]) * np.cos(x[1]) + 0.2 * x[0]
    dy = -np.sin(x[0]) * np.sin(x[1]) + 0.2 * x[1]
    return np.array([dx, dy])

In [11]:
x_range = np.linspace(-5, 5, 300)
y_range = np.linspace(-5, 5, 300)
X, Y = np.meshgrid(x_range, y_range)

Z = multi_modal([X, Y])

In [12]:
Z

array([[5.27201056, 5.24121609, 5.21033827, ..., 4.65677732, 4.69211799,
        4.72798944],
       [5.20777751, 5.17669467, 5.14556336, ..., 4.6548863 , 4.68997349,
        4.72555657],
       [5.1434985 , 5.11212478, 5.08073777, ..., 4.65349339, 4.68832488,
        4.72361708],
       ...,
       [5.1434985 , 5.11212478, 5.08073777, ..., 4.65349339, 4.68832488,
        4.72361708],
       [5.20777751, 5.17669467, 5.14556336, ..., 4.6548863 , 4.68997349,
        4.72555657],
       [5.27201056, 5.24121609, 5.21033827, ..., 4.65677732, 4.69211799,
        4.72798944]])

In [15]:
# 3D Surface Plot
fig = go.Figure(data=[go.Surface(
    x=X, y=Y, z=Z,
    colorscale='Viridis',
    opacity=0.9,
    contours=dict(
        z=dict(show=True, usecolormap=True, highlightcolor="limegreen", project=dict(z=True))
    )
)])

fig.update_layout(
    title='3D Surface: Multi-Modal Function (Multiple Local Minima!)',
    scene=dict(
        xaxis_title='x',
        yaxis_title='y',
        zaxis_title='f(x,y)',
        camera=dict(eye=dict(x=1.5, y=1.5, z=1.3))
    ),
    width=900,
    height=700
)

fig.show()


In [16]:
fig = go.Figure()

fig.add_trace(go.Contour(
    x=x_range,
    y=y_range,
    z=Z,
    colorscale='Turbo',
    showscale=True,
    contours=dict(start=-2, end=4, size=0.2),
    colorbar=dict(title="f(x,y)")
))

fig.update_layout(
    title='Contour Plot: The Valleys Represent Local Minima',
    xaxis_title='x',
    yaxis_title='y',
    width=800,
    height=700,
    xaxis=dict(range=[-5, 5]),
    yaxis=dict(range=[-5, 5])
)

fig.show()

In [57]:
def lr_schedule_constant(t):
    return 0.1


def lr_schedule_step(t):
    """Step learning rate schedule"""
    if t < 50:
        return 0.1
    elif t < 100:
        return 0.01
    else:
        return 0.001
    
def lr_schedule_exponential(t):
    """Exponential decay learning rate schedule"""
    return 0.1 * (0.95 ** t)


In [58]:
def gradient_descent(f, grad_f, x0, lr_schedule, max_iter=200, tol=1e-6):
    x = x0.copy()
    f_values = [f(x)]
    grad_norms = [np.linalg.norm(grad_f(x))]
    trajectory = [x]
    
    for t in range(max_iter):
        grad = grad_f(x)
        grad_norm = np.linalg.norm(grad)
        grad_norms.append(grad_norm)
        trajectory.append(x)
        
        lr = lr_schedule(t)
        
        x = x - lr * grad 

        f_values.append(f(x))
        
        if grad_norm < tol:
            print(f"Stopped after {t} iterations")
            break

    return {'x': x, 
            'f_values': np.array(f_values), 
            'grad_norms': np.array(grad_norms),
            'trajectory': np.array(trajectory)}



In [63]:
x0 = np.array([2, -2])

lr_options = [0.1] #[0.0001, 0.1, 30]

res_for_different_lrs = {}

for lr in lr_options:
    print(f"Learning Rate: {lr}")
    res_dict = gradient_descent(multi_modal, multi_modal_grad, x0, lr_schedule=lr_schedule_exponential)
    res_for_different_lrs[lr] = res_dict

Learning Rate: 0.1


In [64]:
lr = 0.1#30 # 0.0001

res_dict = res_for_different_lrs[lr]

f_values = res_dict['f_values']
grad_norms = res_dict['grad_norms']

px.line(x=np.arange(len(f_values)), y=f_values,
        labels={'x': 'Iteration', 'y': 'f(x)'}, title='Gradient Descent Progression')


In [65]:
grad_norms = res_dict['grad_norms']

px.line(x=np.arange(len(f_values)), y=f_values,
        labels={'x': 'Iteration', 'y': 'f(x)'}, title='Gradient Descent Progression')

px.line(x=np.arange(len(grad_norms)), y=grad_norms,
        labels={'x': 'Iteration', 'y': 'log ||∇f(x)||'}, title='Gradient log Norm Progression', log_y=True)


In [66]:
# Plot trajectories for different learning rates on the contour plot
fig = go.Figure()

# Add contour plot
fig.add_trace(go.Contour(
    x=x_range,
    y=y_range,
    z=Z,
    colorscale='Turbo',
    showscale=False,
    contours=dict(start=-2, end=4, size=0.2),
    name='Function'
))

# Add trajectories for each learning rate
colors = ['red', 'blue', 'green']
for i, lr in enumerate(lr_options):
    trajectory = res_for_different_lrs[lr]['trajectory']
    
    fig.add_trace(go.Scatter(
        x=trajectory[:, 0],
        y=trajectory[:, 1],
        mode='lines+markers',
        name=f'LR={lr}',
        line=dict(color=colors[i], width=2),
        marker=dict(size=4)
    ))
    
    # Mark starting point
    fig.add_trace(go.Scatter(
        x=[trajectory[0, 0]],
        y=[trajectory[0, 1]],
        mode='markers',
        name=f'Start (LR={lr})',
        marker=dict(color=colors[i], size=10, symbol='star')
    ))

fig.update_layout(
    title='Gradient Descent Trajectories with Different Learning Rates',
    xaxis_title='x',
    yaxis_title='y',
    width=900,
    height=700,
    xaxis=dict(range=[-5, 5]),
    yaxis=dict(range=[-5, 5])
)

fig.show()