### Adagrad optimizer

In [1]:
import numpy as np
class Adagrad:
    def __init__(self, learning_rate=0.01, epsilon=1e-8):
        self.learning_rate = learning_rate
        self.epsilon = epsilon
        self.cache = None # Will hold the sum of the squares of the gradients
    def update(self, weights, gradients):
        """
        Update weights using Adagrad algorithm
        
        weights: current values of model parameters (numpy array)
        gradients: gradients of the loss w.r.t the weights (numpy array)
        """
        
        # Initilaize cache if it is None
        if self.cache is None:
            self.cache = np.zeros_like(weights)
        
        # Accumulate the squared gradients
        self.cache += gradients**2

        # Update weights
        weights -= self.learning_rate * gradients / (np.sqrt(self.cache) + self.epsilon)

        return weights

if __name__ == "__main__":
        # Example usage
    # Initial weights (parameters of a simple model)
    weights = np.array([0.5, -0.3, 0.8, -0.1])  # Example weight vector

    # Simulated gradients (gradients of the loss w.r.t these weights)
    gradients = np.array([0.1, -0.2, 0.3, -0.1])  # Example gradient vector

    # Initialize Adagrad optimizer
    adagrad_optimizer = Adagrad(learning_rate=0.1)

    # Perform a weight update using the gradients
    updated_weights = adagrad_optimizer.update(weights, gradients)

    print("Updated weights:", updated_weights)


Updated weights: [ 4.00000010e-01 -2.00000005e-01  7.00000003e-01 -9.99999898e-09]


In [12]:
import numpy as np

class AdagradOptimizer:
    def __init__(self, learning_rate=0.01, epsilon=1e-8):
        self.learning_rate = learning_rate
        self.epsilon = epsilon
        self.grad_squared = None

    def update(self, gradients, params):
        if self.grad_squared is None:
            self.grad_squared = [np.zeros_like(grad) for grad in gradients]

        for i, (grad, param) in enumerate(zip(gradients, params)):
            self.grad_squared[i] += grad ** 2
            adjusted_lr = self.learning_rate / (np.sqrt(self.grad_squared[i]) + self.epsilon)
            params[i] -= adjusted_lr * grad
        return params

# Linear Regression using Adagrad
# Generate synthetic data
np.random.seed(42)
X = np.random.randn(100, 1)
y = 3 * X.squeeze() + 2 + np.random.randn(100) * 0.5

# Add bias term to input
X_bias = np.c_[np.ones((X.shape[0], 1)), X]

# Initialize parameters
params = np.random.randn(2)
optimizer = AdagradOptimizer(learning_rate=0.1)

# Loss function (Mean Squared Error)
def mse_loss(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

# Gradient computation
def compute_gradients(X, y, params):
    y_pred = X @ params
    error = y_pred - y
    gradients = 2 * X.T @ error / len(y)
    return gradients

# Training loop
num_epochs = 300
for epoch in range(num_epochs):
    gradients = compute_gradients(X_bias, y, params)
    params = optimizer.update([gradients], [params])[0]
    loss = mse_loss(y, X_bias @ params)
    print(f"Epoch {epoch + 1}: Loss = {loss:.4f}, Params = {params}")

Epoch 1: Loss = 6.1367, Params = [0.45778736 0.66078453]
Epoch 2: Loss = 5.7277, Params = [0.52612285 0.73002019]
Epoch 3: Loss = 5.4105, Params = [0.58070821 0.78580217]
Epoch 4: Loss = 5.1472, Params = [0.62715929 0.8336065 ]
Epoch 5: Loss = 4.9204, Params = [0.66807876 0.8759813 ]
Epoch 6: Loss = 4.7203, Params = [0.70492123 0.914354  ]
Epoch 7: Loss = 4.5408, Params = [0.7385964  0.94961856]
Epoch 8: Loss = 4.3778, Params = [0.76971778 0.98237843]
Epoch 9: Loss = 4.2285, Params = [0.79872223 1.01306356]
Epoch 10: Loss = 4.0905, Params = [0.82593388 1.04199308]
Epoch 11: Loss = 3.9624, Params = [0.8516011  1.06941156]
Epoch 12: Loss = 3.8427, Params = [0.87591919 1.09551132]
Epoch 13: Loss = 3.7305, Params = [0.89904505 1.12044684]
Epoch 14: Loss = 3.6249, Params = [0.92110696 1.14434445]
Epoch 15: Loss = 3.5252, Params = [0.94221145 1.16730898]
Epoch 16: Loss = 3.4308, Params = [0.96244813 1.18942861]
Epoch 17: Loss = 3.3412, Params = [0.98189323 1.21077832]
Epoch 18: Loss = 3.2559

In [13]:
[1,2] @ params

7.396593322390995