### Adagrad optimizer

In [1]:
import numpy as np
class Adagrad:
    def __init__(self, learning_rate=0.01, epsilon=1e-8):
        self.learning_rate = learning_rate
        self.epsilon = epsilon
        self.cache = None # Will hold the sum of the squares of the gradients
    def update(self, weights, gradients):
        """
        Update weights using Adagrad algorithm
        
        weights: current values of model parameters (numpy array)
        gradients: gradients of the loss w.r.t the weights (numpy array)
        """
        
        # Initilaize cache if it is None
        if self.cache is None:
            self.cache = np.zeros_like(weights)
        
        # Accumulate the squared gradients
        self.cache += gradients**2

        # Update weights
        weights -= self.learning_rate * gradients / (np.sqrt(self.cache) + self.epsilon)

        return weights

if __name__ == "__main__":
        # Example usage
    # Initial weights (parameters of a simple model)
    weights = np.array([0.5, -0.3, 0.8, -0.1])  # Example weight vector

    # Simulated gradients (gradients of the loss w.r.t these weights)
    gradients = np.array([0.1, -0.2, 0.3, -0.1])  # Example gradient vector

    # Initialize Adagrad optimizer
    adagrad_optimizer = Adagrad(learning_rate=0.1)

    # Perform a weight update using the gradients
    updated_weights = adagrad_optimizer.update(weights, gradients)

    print("Updated weights:", updated_weights)


Updated weights: [ 4.00000010e-01 -2.00000005e-01  7.00000003e-01 -9.99999898e-09]


In [7]:
def adagrad_optimizer(gradient_function, params, learning_rate=0.01, epsilon=1e-8, iterations=1000):
    """
    Adagrad optimizer implementation.

    Parameters:
        gradient_function: Callable
            Function to compute gradients w.r.t parameters.
        params: np.ndarray
            Initial parameters for optimization.
        learning_rate: float
            Initial learning rate.
        epsilon: float
            Small value to avoid division by zero.
        iterations: int
            Number of optimization steps.

    Returns:
        params: np.ndarray
            Optimized parameters.
    """
    accumulated_grads = np.zeros_like(params)

    for i in range(iterations):
        gradients = gradient_function(params)
        accumulated_grads += gradients**2
        adjusted_learning_rates = learning_rate / (np.sqrt(accumulated_grads) + epsilon)
        params -= adjusted_learning_rates * gradients

        if i % 100 == 0:  # Logging progress every 100 iterations
            loss = np.sum(gradients**2)  # Example: L2 loss
            print(f"Iteration {i}, Loss: {loss:.4f}")

    return params

# Example Usage:

def quadratic_gradient(params):
    """Gradient of a quadratic function: f(x) = x^2."""
    return 2 * params

initial_params = np.array([3.0, -1.5])  # Starting points for optimization
optimized_params = adagrad_optimizer(quadratic_gradient, initial_params)
print("Optimized Parameters:", optimized_params)

Iteration 0, Loss: 45.0000
Iteration 100, Loss: 38.6792
Iteration 200, Loss: 36.0957
Iteration 300, Loss: 34.2036
Iteration 400, Loss: 32.6686
Iteration 500, Loss: 31.3611
Iteration 600, Loss: 30.2147
Iteration 700, Loss: 29.1900
Iteration 800, Loss: 28.2611
Iteration 900, Loss: 27.4104
Optimized Parameters: [ 2.40558342 -0.93238559]
