## Original Adam Optimizer

In [9]:
import math

class Adam:
    def __init__(self, parameters, lr=0.001, betas=(0.9, 0.999), eps=1e-8, weight_decay=0):
        self.parameters = parameters
        self.lr = lr
        self.betas = betas
        self.eps = eps
        self.weight_decay = weight_decay
        self.state = {}

        for p in self.parameters:
            self.state[p] = {
                'step': 0,
                'exp_avg': 0,
                'exp_avg_sq': 0,
            }

    def step(self):
        for p in self.parameters:
            if p.grad is None:
                continue

            grad = p.grad
            state = self.state[p]

            exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
            beta1, beta2 = self.betas

            state['step'] += 1

            exp_avg = beta1 * exp_avg + (1 - beta1) * grad
            exp_avg_sq = beta2 * exp_avg_sq + (1 - beta2) * (grad ** 2)
            state['exp_avg'], state['exp_avg_sq'] = exp_avg, exp_avg_sq

            denom = (exp_avg_sq.sqrt() / math.sqrt(state['step'])) + self.eps

            step_size = self.lr / (1 - beta1 ** state['step'])

            p.data -= step_size * (exp_avg / denom + self.weight_decay * p.data)




0.5
0.8


## Refactored Code Using Facade and Adapter Patterns

**Explanation**

1. Parameter Class:

- Represents a parameter with data and gradient attributes.
- Includes a zero_grad method to reset the gradient.

2. ParameterAdapter Class:

- Adapts the Parameter class to the interface expected by the optimizer.
- Provides properties to access and modify data and grad.

3. Adam Class:

 - Implements the Adam optimizer with the necessary steps for parameter updates.

4. OptimizerFacade Class:

- Simplifies the interface for using the optimizer, providing zero_grad and step methods.


In [7]:
import math

class Parameter:
    def __init__(self, data):
        self.data = data
        self.grad = None

    def zero_grad(self):
        self.grad = None

class ParameterAdapter:
    def __init__(self, parameter):
        self.parameter = parameter

    @property
    def data(self):
        return self.parameter.data

    @data.setter
    def data(self, value):
        self.parameter.data = value

    @property
    def grad(self):
        return self.parameter.grad

    @grad.setter
    def grad(self, value):
        self.parameter.grad = value

    def zero_grad(self):
        self.parameter.zero_grad()

class Adam:
    def __init__(self, parameters, lr=0.001, betas=(0.9, 0.999), eps=1e-8, weight_decay=0):
        self.parameters = parameters
        self.lr = lr
        self.betas = betas
        self.eps = eps
        self.weight_decay = weight_decay
        self.state = {}

        for p in self.parameters:
            self.state[p] = {
                'step': 0,
                'exp_avg': 0,
                'exp_avg_sq': 0,
            }

    def step(self):
        for p in self.parameters:
            if p.grad is None:
                continue

            grad = p.grad
            state = self.state[p]

            exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
            beta1, beta2 = self.betas

            state['step'] += 1

            exp_avg = beta1 * exp_avg + (1 - beta1) * grad
            exp_avg_sq = beta2 * exp_avg_sq + (1 - beta2) * (grad ** 2)
            state['exp_avg'], state['exp_avg_sq'] = exp_avg, exp_avg_sq

            denom = (exp_avg_sq ** 0.5) + self.eps

            step_size = self.lr / (1 - beta1 ** state['step'])

            p.data -= step_size * (exp_avg / denom + self.weight_decay * p.data)

class OptimizerFacade:
    def __init__(self, optimizer):
        self.optimizer = optimizer

    def zero_grad(self):
        for param in self.optimizer.parameters:
            param.zero_grad()

    def step(self):
        self.optimizer.step()

# Example usage
params = [Parameter(0.5), Parameter(0.8)]
adapted_params = [ParameterAdapter(p) for p in params]
adam = Adam(adapted_params)
optimizer = OptimizerFacade(adam)

# Simulate gradient calculation
for param in params:
    param.grad = 0.1  # Example gradient

# Optimization step
optimizer.zero_grad()
optimizer.step()

# Output the updated parameter values
for param in params:
    print(param.data)


0.5
0.8


## Results
- This refactored code should now correctly update the parameter values using the Adam optimizer, while adhering to the Facade and Adapter design patterns. ​