In [None]:
# default_exp costs

In [None]:
#hide
%load_ext autoreload
%autoreload 2

# costs - Wrapping objective functions and data 


A collection of classes and functions to structuring models to be optimized.
***
The `GradientDescent` optimizer expects the cost function to be stored within a class with an ```evaluate()``` method, which takes a 1D numpy array of parameter values and returns a float. A generic class `Model` is provided to organize user-defined cost functions and data so they can be easily passed to the optimizer. 

Should this class prove insufficient in some circumstances, a description of the API expected by `GradientDescent` along with an abstract base class is provided.

In [None]:
#hide
from nbdev.showdoc import *
from matplotlib import pyplot

import hypothesis
from hypothesis import given
import hypothesis.strategies as st
import hypothesis.extra.numpy as hypo_numpy
from hypothesis import note
from numpy.testing import *

NameError: name 'st' is not defined

In [None]:
#export
import numpy
import scipy
import scipy.stats

from abc import ABC, abstractmethod

In [None]:
#export
class ModelBase(ABC):
    """A helper class that provides a standard means to create
    classes to store models used by GradientDescent."""
    

    RV=None
    update_rvs=True
    @abstractmethod     #Require that all cost functions have the .evaluate method
    
    def evaluate(self): pass

    def sample_rvs(self):
        """This can be used to regenerate a random variable used by the cost function.
        It may be desirable hold some random variables constant during gradient evaluations, for example"""
        if self.RV is not None:
            self.z=self.RV.rvs()

#         raise NotImplementedError("Cost functions must included a boolean attribute 'update_rvs'")
        
        

***


The base class `ModelBase` is relatively simple, consisting of two methods and two attributes

The methods are
1. ```.evaluate``` which must be defined in any particular cost function class written by the user
2. ```.sample_rvs``` which is automatically included in the base class, and draws a random variable if a random variable generator is stored in the RV attribute

The two attributes are
1. ```RV``` which by default is ```None```, but may be used to store a function that can generate random variables that are used in the evaluation of the cost function (e.g. ```scipy.stats.norm.rvs```)
2. ```update_rvs``` is a flag that tells the `SPSAGradient` class to update the random variable each time it evaluates the gradient. By default this is ```True```, though this will only have an effect if a random variable generator is included in the cost.

In [None]:
#export
class Model(ModelBase):
    """A class for passing objective functions and data to the GradientDescent
    optimizer 
    
    Inputs:
    
    cost - the objective function to be minimized
    data - the data to which the model will be fit (optional)
    RV - An object with a .rvs() method to generate random variables
        for the cost function
    update_rvs: True/False call RV.rvs() before each gradient evaluation
    """
    def __init__(self, cost, data=None, RV=None, update_rvs=False):

        self.cost=cost
        self.data=data
        self.RV=RV
        if self.RV is not None:
            self.z=self.sample_rvs()
        else: self.z=None
        
        if RV is None:
            self.update_rvs=False
        else:
            assert type(update_rvs) is bool
            self.update_rvs=update_rvs
    def evaluate(self, theta):
        if self.data is None and self.RV is None:
            return self.cost(theta)
        if self.data is None and self.RV is not None:
            return self.cost(theta, self.z)
        if self.RV is None:
            return self.cost(theta, self.data)
        else:
            return self.cost(theta, self.data, self.z)

In [None]:
#hide

## Here's a simple cost function for tests without data

def quadratic(x,):
    return x**2

## Here's a simple cost function for tests without data, but with RV

def quadratic_rv(x,z):
    return x**2

### Here's simple cost function for tests with data
def MSE(theta, data, ):
    x,y=data['x'], data['y']
    y_pred=theta[0]*x+theta[1]
    return numpy.mean((y-y_pred)**2)

### Here's simple cost function for tests with data, but with RV
def MSE_rv(theta, data, z):
    x,y=data['x'], data['y']
    y_pred=theta[0]*x+theta[1]
    return numpy.mean((y-y_pred)**2)

### Here's some sample data
x=scipy.stats.norm.rvs(0, 5, size=200)
err=scipy.stats.norm.rvs(0, 2, size=200)
slope=2
intercept=5
y=x*slope+intercept +err

data={'x':x,
     'y':y}

### These are just tests about behavior, so we're looking
### to make sure errors aren't raised, not for consist
### Let's make sure we can construct the model

def test_Model_construction(cost, data):
    model=Model(cost,data)
test_Model_construction(MSE, data)


### Let's make sure evaluate correctly calls the cost function 
### in the four possible scenarios

def test_Model_evaluate_call(cost_no_data_no_rv,
                             cost_no_data_with_rv,
                             cost_with_data_no_rv,
                             cost_with_data_with_rv,
                             data,
                             RV):
    
    model_no_data_no_RV=Model(cost_no_data_no_rv)
    model_no_data_with_RV=Model(cost_no_data_with_rv, RV=RV)
    model_with_data_no_RV=Model(cost_with_data_no_rv, data)
    model_with_data_with_RV=Model(cost_with_data_with_rv, data, RV=RV)
    #call evaluate for each
    test_param=5
    model_no_data_no_RV.evaluate(test_param)
    model_no_data_with_RV.evaluate(test_param)
    test_param=[5,4]
    model_with_data_no_RV.evaluate(test_param)
    model_with_data_with_RV.evaluate(test_param)
    
test_Model_evaluate_call(quadratic, quadratic_rv,MSE, MSE_rv,data, scipy.stats.norm(0,1) )    

### Let's check to make sure evaluate returns the output of the cost function
### This would likely fail if the cost was stochastic

#Let's check without data
def test_Model_without_data_evaluate_matches_cost(cost):
    model=Model(cost)
    test_param=5
    assert (model.evaluate(test_param)==cost(test_param)), ".evaluate output does not match cost"
test_Model_without_data_evaluate_matches_cost(quadratic)    

# let's check with data
def test_Model_with_data_evaluate_matches_cost(cost, data):
    model=Model(cost,data)
    test_param=[5,2]
    assert (model.evaluate(test_param)==cost(test_param,data)), ".evaluate output does not match cost"
test_Model_with_data_evaluate_matches_cost(MSE, data)

### Let's make sure we can update the RV sample

def test_Model_rvs(cost, data, rvs):
    model=Model(cost,data,rvs)
    z0=model.z
    model.sample_rvs()
    z1=model.z
    try:
        assert_array_equal(z0,z1)
    except (AssertionError): 
        return
    raise (AssertionError ("Random variables should not match after update"))
test_Model_rvs(MSE_rv, data, scipy.stats.norm([0]*10, [1]*10))    

Let's organized this data as a dictionary

Now we need a function that takes a vector of parameter values and data and uses these to returns a float. Note that it does not matter how the data is organized, so long as the function can interpret it internally.


***
In general, though, it shouldn't be necessary to use the base class, as most simple cost functions can be wrapped in an instance of the `Model` class. The `Model` class can be used to wrapped both a cost function and data

The only required argument is a user-defined cost function. If the model is to be fit to data, then the data--organized in a way that the cost function expects--should also passed to the class.

The ```cost``` function should take three arguments:

1. ```theta```: A 1-d numpy array of model parameters
2. ```data```: (Optional) A variable storing the data
3. ```z```: (Optional) The outcome of a random variable used to compute the objective function.

The ```data``` may be organized however the ```cost``` expects, for example as dictionary, a list, or a numpy array.

The two optional arguments taken by `Model` relate to a random variable that may be passed to the cost function:

1. ```RV``` may be a passed a object with a ```.rvs()``` method that generates a random variable when called (e.g. a frozen scipy distribution such are ```scipy.stats.norm([0]*10,[1]*10)``` which can be used to generate 10 samples from a unit normal distribution.
2. ```update_rvs``` stores a True/False value, telling the gradient approximation whether to regenerate the random variable after each gradient estimate.

The current value the random variable drawn from ```RV``` is stored as the attribute ```z```.

If these options are employed, the gradient approximation holds the random variable ```z``` constant during the forward and backward perturbations used to approximate the gradient. In general, I think it may be best to not use these options--my intution is that noise in the gradient due a simulator employed in the objective function ought to be left inside in the gradient (but I could be wrong). However, there are cases where it absolutely make sense to eliminate sources of randomness from the gradient evaluation, for example the Monte Carlo integration often employed to compute the objective function used in variational inference.

## Example usage

Okay, let's try out an example, where we are interested in the mean-squared error of a simple linear regression.

Now let's generate 200 data points from a simple linear relationship, with a slope of 2 and an intercept of 5:

$$x \sim normal(0, 5)$$

$$\epsilon \sim normal(0,2)$$

$$y=2x+5+\epsilon$$




In [None]:
x=scipy.stats.norm.rvs(0, 5, size=200)
err=scipy.stats.norm.rvs(0, 2, size=200)
slope=2
intercept=5
y=x*slope+intercept +err
# pyplot.scatter(x,y)


Let's organized this data as a dictionary

In [None]:
data={'x':x,
     'y':y}

Now we need a function that takes a vector of parameter values and data and uses these to returns a float. Note that it does not matter how the data is organized, so long as the function can interpret it internally.


In [None]:
def MSE(theta, data):
    x,y=data['x'], data['y']
    y_pred=theta[0]*x+theta[1]
    return numpy.mean((y-y_pred)**2)

Okay, now we'll wrap the cost function and the data in the `Model` class

In [None]:
mse_cost=Model(MSE, data)

Because we didn't pass any random variable generators, both ```update_rvs``` and ```RV``` remain at their default values:

In [None]:
print (f"mse_cost.RV = {mse_cost.RV}")
print (f"mse_cost.update_rvs = {mse_cost.update_rvs}")

We can evaluate the cost function on the data by passing a proposed parameter value to the ```.evaluate()``` method:

In [None]:
mse_cost.evaluate([2,5])

We could further use this to visualize the landscape of the objective function a grid of different combinations of (slope, intercept) parameters (the parameter value used to generate the data is marked with a red X):

In [None]:
X,Y,Z=[],[],[]
for slope in numpy.arange(-10, 10,.2):

    for intercept in numpy.arange(-10, 10,.2):
        Y.append(intercept)
        X.append(slope)
        Z.append(mse_cost.evaluate([slope,intercept]))
sc=pyplot.scatter(X,Y, c=numpy.log(Z))
pyplot.scatter(2, 5, marker='x',s=100, c='r')
cbar=pyplot.colorbar(sc)
cbar.set_label('log (MSE)', size=14)
pyplot.ylabel('Intercept')
pyplot.xlabel('Slope')
pyplot.xlim(-10,10)
pyplot.ylim(-10,10)