In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
class Optimizers:
    def __init__(self, learning_rate=0.01):
        self.learning_rate = learning_rate
        
    def gradient_descent(self, params, gradients):
        """
        Standard Gradient Descent
        Simple but effective for convex problems
        """
        return params - self.learning_rate * gradients
    
    def momentum(self, params, gradients, velocity, momentum=0.9):
        """
        Momentum-based Gradient Descent
        Helps overcome local minima and speeds up convergence
        """
        velocity = momentum * velocity + self.learning_rate * gradients
        return params - velocity, velocity
    
    def rmsprop(self, params, gradients, cache, decay_rate=0.9, epsilon=1e-8):
        """
        RMSprop Optimizer
        Adapts learning rates based on recent gradients
        """
        cache = decay_rate * cache + (1 - decay_rate) * np.square(gradients)
        update = self.learning_rate * gradients / (np.sqrt(cache) + epsilon)
        return params - update, cache
    
    def adam(self, params, gradients, moment, velocity, t, 
            beta1=0.9, beta2=0.999, epsilon=1e-8):
        """
        Adam Optimizer
        Combines benefits of momentum and RMSprop
        """
        moment = beta1 * moment + (1 - beta1) * gradients
        velocity = beta2 * velocity + (1 - beta2) * np.square(gradients)
        
        # Bias correction
        moment_corrected = moment / (1 - beta1**t)
        velocity_corrected = velocity / (1 - beta2**t)
        
        update = self.learning_rate * moment_corrected / (np.sqrt(velocity_corrected) + epsilon)
        return params - update, moment, velocity

In [3]:
# Create a simple 2D optimization problem (bowl-shaped function)
def objective_function(x, y):
    return x**2 + y**2

def gradient_function(x, y):
    return np.array([2*x, 2*y])

In [4]:
# Set up the optimization problem
x = np.linspace(-2, 2, 100)
y = np.linspace(-2, 2, 100)
X, Y = np.meshgrid(x, y)
Z = objective_function(X, Y)

In [5]:
# Initialize starting points and parameters
start_points = np.array([-1.5, 1.5])
learning_rate = 0.1
n_iterations = 50

# Initialize optimizers
optimizer = Optimizers(learning_rate=learning_rate)

In [7]:
# Track paths for different optimizers
paths = {
    'GD': [start_points.copy()],
    'Momentum': [start_points.copy()],
    'RMSprop': [start_points.copy()],
    'Adam': [start_points.copy()]
}

# Optimization loop
current_points = {
    'GD': start_points.copy(),
    'Momentum': start_points.copy(),
    'RMSprop': start_points.copy(),
    'Adam': start_points.copy()
}

# Initialize optimizer states
velocity = np.zeros_like(start_points)  # For Momentum
cache = np.zeros_like(start_points)     # For RMSprop
moment = np.zeros_like(start_points)    # For Adam
velocity_adam = np.zeros_like(start_points)  # For Adam

In [8]:
# Run optimization
for t in range(n_iterations):
    # Gradient Descent
    grad = gradient_function(*current_points['GD'])
    current_points['GD'] = optimizer.gradient_descent(current_points['GD'], grad)
    paths['GD'].append(current_points['GD'].copy())
    
    # Momentum
    grad = gradient_function(*current_points['Momentum'])
    current_points['Momentum'], velocity = optimizer.momentum(
        current_points['Momentum'], grad, velocity)
    paths['Momentum'].append(current_points['Momentum'].copy())
    
    # RMSprop
    grad = gradient_function(*current_points['RMSprop'])
    current_points['RMSprop'], cache = optimizer.rmsprop(
        current_points['RMSprop'], grad, cache)
    paths['RMSprop'].append(current_points['RMSprop'].copy())
    
    # Adam
    grad = gradient_function(*current_points['Adam'])
    current_points['Adam'], moment, velocity_adam = optimizer.adam(
        current_points['Adam'], grad, moment, velocity_adam, t+1)
    paths['Adam'].append(current_points['Adam'].copy())

In [10]:
# Print final results
print("\nFinal positions and objective values:")
print("-" * 50)
for method, point in current_points.items():
    final_value = objective_function(*point)
    print(f"{method}:")
    print(f"  Final position: ({point[0]:.4f}, {point[1]:.4f})")
    print(f"  Final value: {final_value:.4f}")
    print()


Final positions and objective values:
--------------------------------------------------
GD:
  Final position: (-0.0000, 0.0000)
  Final value: 0.0000

Momentum:
  Final position: (0.0457, -0.0457)
  Final value: 0.0042

RMSprop:
  Final position: (0.0000, -0.0000)
  Final value: 0.0000

Adam:
  Final position: (-0.0764, 0.0764)
  Final value: 0.0117

