# AdaGrad

$$
G_t = G_{t-1} + g_t \odot g_t
$$
$$
\theta_{t+1} = \theta_t - \frac{\eta}{\sqrt{G_t + \epsilon}} \odot g_t
$$

In [4]:
import numpy as np

class AdaGrad:
    def __init__(self, lr=0.01, epsilon=1e-8):
        """
        Initialize AdaGrad optimizer.
        
        :param lr: float, learning rate
        :param epsilon: float, small constant to avoid division by zero
        """
        self.lr = lr
        self.epsilon = epsilon
        self.gradient_squared_accumulator = None

    def update_params(self, params, grads):
        """
        Update parameters using AdaGrad optimization algorithm.
        
        :param params: numpy.array, parameters to be updated
        :param grads: numpy.array, gradients of the parameters
        :return: numpy.array, updated parameters
        """
        if self.gradient_squared_accumulator is None:
            # Initialize the accumulator with the same shape as the parameters array
            self.gradient_squared_accumulator = np.zeros_like(params)

        # Accumulate the square of gradients
        self.gradient_squared_accumulator += grads**2

        # Adjust learning rate for each parameter
        adjusted_lr = self.lr / (np.sqrt(self.gradient_squared_accumulator) + self.epsilon)

        # Update parameters
        params -= adjusted_lr * grads

        return params

# Example usage
if __name__ == "__main__":
    # Initialize optimizer
    optimizer = AdaGrad(lr=0.1)

    # Dummy parameters and gradients
    params = np.array([1.5, -0.3])
    grads = np.array([0.3, -0.1])

    # Update parameters
    updated_params = optimizer.update_params(params, grads)
    print("Updated Parameters:", updated_params)


Updated Parameters: [ 1.4        -0.20000001]
