In [None]:
# default_exp updates

In [None]:
#hide
%load_ext autoreload
%autoreload 2

# updates

The various update rules implemented in ```gradless``` are collected here. Each update rules is constructed as a class so that they can store any information they need to update the parameters, but all updates return a numpy array of floats to be used as a step to update the model parameters. 



In [None]:
#hide
from nbdev.showdoc import *



In [None]:
#export
import numpy
import scipy
from abc import ABC, abstractmethod 

In [None]:
#export

class UpdateBase(ABC):
    @abstractmethod
    #This is the workhorse of the class
    def evaluate(self): pass


In [None]:
def get_step(theta, nu, m_t,v_t,G_t, beta1, beta2, t, epsilon ):
    m_hat=m_t/(1-beta1**t)
    v_hat=v_t/(1-beta2**t)
    
    theta_new=nu*m_hat/(v_hat**.5+epsilon)
    return theta_new

def get_step_adagrad(theta, nu, m_t,v_t,G_t, beta1, beta2, t, epsilon ):

    theta_new=m_t*nu/(G_t+epsilon)**.5
    return theta_new

def get_step_NADAM(theta, nu, m_t,v_t,g_t, beta1, beta2, t, epsilon ):
    if t>=2:
        m_hat=m_t/(1-beta1**(t-1))
    else:
        m_hat=0.
    v_hat=v_t/(1-beta2**t)

    
    part_1=(nu/(v_hat**.5+epsilon))
    part_2=beta1*m_hat
    part_3=(1-beta1)*g_t/(1-beta1**t)
    theta_new=part_1*(part_2+part_3) 
    return theta_new


### The standard SPSA step

In [None]:
#export
class StandardSPSA(UpdateBase):
    def __init__(self):
        pass

    def evaluate(self,ghat, nu, t=0. ):

        return nu*ghat

### The ADAGRAD step update

In [None]:
#export
class ADAGRAD(UpdateBase):
    def __init__(self):
        self.G_t=None

    def evaluate(self,ghat, nu, t=0. ):
        if self.G_t is None:
            self.G_t=numpy.zeros(ghat.shape)
        self.G_t+=ghat**2
        return nu*ghat/(self.G_t+self.eps)**.5

### The ADAM step update

In [None]:
#export
class ADAM(UpdateBase):
    def __init__(self,beta1=.9, beta2=.999, eps=1e-8):
        self.beta1=beta1
        self.beta2=beta2
        self.eps=eps
        
        self.m_t=[0.]
        self.v_t=[0.]
    def evaluate(self,ghat, nu, t ):
        #Update the gradient histories
        self.m_t.append(self.beta1*self.m_t[-1]+(1-self.beta1)*ghat)
        self.v_t.append(self.beta2*self.v_t[-1]+(1-self.beta2)*ghat**2)
        
        #compute the bias corrections
        m_hat=self.m_t[-1]/(1.-self.beta1**t)
        v_hat=self.v_t[-1]/(1.-self.beta2**t)
        
        #compute the proposed step
        return nu*m_hat/(v_hat**.5+self.eps)

### The Nestorov-accelerated ADAM step update

In [None]:
#export
class NADAM(UpdateBase):
    def __init__(self,beta1=.9, beta2=.999, eps=1e-8):
        self.beta1=beta1
        self.beta2=beta2
        self.eps=eps
        
        self.m_t=[0.]
        self.v_t=[0.]
    def evaluate(self,ghat, nu, t ):
        #Update the gradient histories
        self.m_t.append(self.beta1*self.m_t[-1]+(1-self.beta1)*ghat)
        self.v_t.append(self.beta2*self.v_t[-1]+(1-self.beta2)*ghat**2)
        

        if t>=2:
            m_hat=self.m_t[-1]/(1-self.beta1**(t-1))
        else:
            m_hat=0.
        v_hat=self.v_t[-1]/(1-self.beta2**t)


        part_1=(nu/(v_hat**.5+self.eps))
        part_2=self.beta1*m_hat
        part_3=(1-self.beta1)*ghat/(1-self.beta1**t)
        step=part_1*(part_2+part_3) 
        return step