## Simple Optimizer 

- SGD + Momentum
- Adam


### 一个基类：SimpleOptimizer
### 2个step

In [1]:
import numpy as np 

In [4]:
class SimpleOptimizer:
    def __init__(self,params,lr = 0.01):
        self.params = params
        self.lr = lr
    
    # 留空，这里是一个要运行的函数，但是我们不知道是运行momentum还是Adam，所以留空写成step
    def step(self,grads):
        raise NotImplementedError



In [5]:
class SGD(SimpleOptimizer):
    def __init__(self,params,lr = 0.01, momentum = 0.9):
        super().__init__(params,lr)


        # vs = momentum * vs + grad
        # w = w - lr * vs 
        self.momentum = momentum
        self.vs = [np.zeros_like(p) for p in self.params]

    def step(self,grads):
        for i , (param,grad) in enumerate(zip(self.params,grads)):
            self.vs[i] = self.momentum * self.v[i] + grad

            param -= self.lr * self.v[i]
            

In [6]:
class Adam(SimpleOptimizer):
    def __init__(self,params,lr = 0.001,beta1 = 0.9,beta2 = 0.999,eps = 1e-8):
        super().__init__(params,lr)

        self.beta1 = beta1
        self.beta2 = beta2
        self.eps = eps

        # mt = beta1 * mt + (1-beta1) * grad
        # vt = beta2 * vt + (1-beta2) * grad**2
        # m_hat = mt / (1 - beta1**t)
        # v_hat = vt / (1 - beta2**t)
        # w = w - lr * m_hat / (sqrt(v_hat) + eps)

        self.m = [np.zeros_like(p) for p in self.params]
        self.v = [np.zeros_like(p) for p in self.params]
        self.t = 0
        
    def step(self,grads):
        self.t += 1
        for i,(param,grad) in enumerate(zip(self.params,grads)):
            self.m[i] = self.beta1 * self.m[i] + (1-self.beta1) * grad
            self.v[i] = self.beta2 * self.v[i] + (1-self.beta2) *(grad ** 2)

            m_hat = self.m[i] / (1 - self.beta1 ** self.t)
            v_hat = self.v[i] / (1 - self.beta2 ** self.t)

            param -= self.lr * m_hat / (np.sqrt(v_hat) + self.eps)
        