In [None]:
import numpy as np

import matplotlib.pyplot as plt
from matplotlib import cm
import mpl_toolkits.mplot3d.axes3d as p3
from matplotlib import colors

from scipy.optimize import approx_fprime, line_search, rosen

import IPython, ipywidgets as widgets
from ipywidgets import interact, fixed

import warnings

%matplotlib inline

In [None]:
# some toy examples
functions = {
    'square':     lambda x_: x_[0]**2 + x_[1]**2, 
    'Rosenbrock': rosen,
    'sixhumps':   lambda x_: ((4 - 2.1*x_[0]**2 + x_[0]**4 / 3.) * x_[0]**2 + 
                              x_[0] * x_[1] + (-4 + 4*x_[1]**2) * x_[1] **2),
    'Himmelblau': lambda x_: (x_[0]**2 + x_[1] -11)**2 + (x_[0] + x_[1]**2 -7)**2,
    'Beale':      lambda x_:   (1.500 - x_[0] + x_[0]*x_[1])**2 
                             + (2.250 - x_[0] + x_[0]*x_[1]**2)**2
                             + (2.625 - x_[0] + x_[0]*x_[1]**3)**2,
    'Eggholder':  lambda x_:  -(x_[1] + .47)*np.sin(np.abs(x_[0]/2 + (x_[1] + .47)))
                             - x_[0] * np.sin(np.abs(x_[0] - (x_[1] + .47))),
    'Styblinski': lambda x_:   (x_[0]**4 - 16*x_[0]**2 + 5*x_[0]) 
                             - (x_[1]**4 - 16*x_[1]**2 + 5*x_[1]),
    'Matlab 1':   lambda x_: np.log(1 + 3*(x_[1] - (x_[0]**3 - x_[0]))**2 + (x_[0] - 4./3.)**2),
    'Matlab 2':   lambda x_: np.log(1 + 100. * (x_[0]**2 - x_[1])**2 + (1. - x_[1])**2)
}

In [None]:
plt.set_cmap(cm.twilight_shifted)
plt.set_cmap(cm.coolwarm)
# plt.set_cmap(cm.RdBu_r)
# plt.set_cmap(cm.gist_earth)

def plotOptimization(f, x, s, y, grads, xyrange=1.):
    
    # norm = colors.NoNorm()
    norm = colors.CenteredNorm()
    # norm = colors.TwoSlopeNorm(vcenter=0.)

    xx, yy = np.meshgrid(np.arange(-xyrange, xyrange, .01), 
                         np.arange(-xyrange, xyrange, .01))
    Z = f([xx,yy])
    
    fig, axs = plt.subplots(4,1, figsize=(10,15), height_ratios=[5,1,1,1])
    axs = axs.ravel()

    im=axs[0].pcolormesh(xx,yy,Z, norm=norm, alpha=.5, zorder=1)
    plt.colorbar(im)
    axs[0].contour(xx,yy,Z, levels=11, norm=norm, alpha=.75, zorder=3)
    axs[0].autoscale(enable=False, axis='both', tight=None)
    

    x = np.array(x).transpose()
    grads = np.array(grads)
    s = np.array(s)
    y = np.array(y).transpose()
    
    axs[0].scatter(x[0][0], x[1][0], marker='x', facecolor='r', s=100)
    axs[0].plot(x[0], x[1], 'g', marker='+')
    
    axs[1].set_title('Update history')
    # axs[1].plot(grads)
    axs[1].plot(s[:,0], s[:,1], label='$\\alpha_t s_t$')
    axs[1].legend()
    
    axs[2].set_title('Update step sizes')
    axs[2].plot(np.linalg.norm(s, ord=2, axis=-1), label='$|\\alpha_t s_t|$')
    axs[2].set_yscale('log')
    axs[2].legend()
    
    axs[3].set_title('Objective history')
    hdl1 = axs[3].plot(y, label='$f(x_t)$')
    ax2 = axs[3].twinx()
    hdl2 = ax2.plot(np.linalg.norm(grads, ord=2, axis=-1), c='r', label='$\\nabla f(x_t)$')
    ax2.set_yscale('log')
    
    axs[3].legend(handles = hdl1 + hdl2)
    
# when to terminate the iterations
def terminate(i, s, y):
    if i > 0:
        b = False
        
        # too tiny steps
        b = b or (np.linalg.norm(s[-1], ord=2) < 1e-6)
        
        # too little improvement
        b = b or (np.abs(y[-1] - y[-2]) < 1e-5)
        
        # too large values
        b = b or (y[-1] > 1e50)
        return b
    else:
        return False

In [None]:
# initial solution
x0 = [-.5,0.]

## Steepest Decent

In [None]:
@interact(f = widgets.ToggleButtons(
                options     = functions,
                description = 'Function $f$',
              ),
          x0 = fixed(x0),
          a = widgets.FloatLogSlider(value=1e-1,
                                     base=10, 
                                     min=-10, max=0, 
                                     step=.1, 
                                     continuous_update=False,
                                     description='learning rate $\\alpha$',
                                     style={'description_width':'initial'},
                                     readout_format='.1e'),
          max_iter = (1,15000, 1),
          xyrange = (1,5,1)
         )
def doSD(f, xt=[0,0], a=1e-2, max_iter=150, xyrange=1.):
    
    xt = x0
    at = 0.

    x = list()
    s = list()
    y = list()
    grads = list()
    
    for i in range(0,max_iter):

        dy = approx_fprime(xt, f, epsilon=1e-8)
        # dy /= np.linalg.norm(dy, ord=2)

        st = -dy
        at = a

        if i == 0:
            st *= 0

        xt = xt + at * st 
        yt = f(xt)

        grads.append(dy)
        s.append(st)
        x.append(xt)
        y.append(yt)

        if terminate(i, s, y):
            break
            
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        plotOptimization(f, x, s, y, grads, xyrange=xyrange)

## Stepest Descent with 1-D Line Search

In [None]:
@interact(f = widgets.ToggleButtons(
                options     = functions,
                description = 'Function $f$',
              ),
          x0 = fixed(x0),
          a = fixed(1e-1),
          # a = widgets.FloatLogSlider(value=1e-1,
          #                            base=10, 
          #                            min=-10, max=0, 
          #                            step=.1, 
          #                            continuous_update=False,
          #                            description='learning rate $\\alpha$',
          #                            style={'description_width':'initial'},
          #                            readout_format='.1e'),
          max_iter = (1,15000, 1),
          xyrange = (1,5,1)
         )
def doSD(f, x0=[0,0], a=1e-2, max_iter=150, xyrange=1.):
    
    xt = x0
    at = 0.

    x = list()
    s = list()
    y = list()
    grads = list()
        
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        
        for i in range(0,max_iter):

            dy = approx_fprime(xt, f, epsilon=1e-8)
            # dy /= np.linalg.norm(dy, ord=2)

            st = -dy

            if i == 0:
                st *= 0
            else:
                at = line_search(f, lambda x_: approx_fprime(x_, f), xt, st)[0] or a

            xt = xt + at * st 
            yt = f(xt)

            grads.append(dy)
            s.append(st)
            x.append(xt)
            y.append(yt)

            if terminate(i, s, y):
                break

        plotOptimization(f, x, s, y, grads, xyrange=xyrange)

## Conjugate Gradients

In [None]:
@interact(f = widgets.ToggleButtons(
                options     = functions,
                description = 'Function $f$',
              ),
          x0 = fixed(x0),
          max_iter = (1,1500, 1),
          xyrange = (1,5,1)
         )
def doCG(f, x0=[0,0], max_iter=150, xyrange=1.):
    
    xt = x0
    at = 0.

    x = list()
    s = list()
    y = list()
    grads = list()
    
    for i in range(0,max_iter):
        dy = approx_fprime(xt, f, epsilon=1e-8)
        
        if i > 1:
            g_0 = np.linalg.norm(dy, ord=2, axis=-1)**2
            g_1 = np.linalg.norm(grads[-1], ord=2, axis=-1)**2
            bt = g_0 / g_1
            st = -dy + bt * s[-1]
            
        else:
            st = -dy
            
        if i == 0:
            st *= 0
        else:           
            at = line_search(f, lambda x_: approx_fprime(x_, f), xt, st)[0] or 1e-1
            
        xt = xt + at * st
        yt = f(xt)

        grads.append(dy)
        s.append(st)
        x.append(xt)
        y.append(yt)

        if terminate(i, s, y):
            break
            
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        plotOptimization(f, x, s, y, grads, xyrange)