## Imports

In [1]:
import numpy as np 
import matplotlib.pyplot as plt
import time
import os
import imageio
from matplotlib.animation import FuncAnimation

## Function to minimize

In [2]:
def f(x,y):
    return -2 * np.exp(-((x - 1) * (x - 1) + y * y) / .2) + -3 * np.exp(-((x + 1) * (x + 1) + y * y) / .2) + x * x + y * y

## Gradient


In [3]:
x0 = 0
y0 = 1
x0 = 3
y0 = 1
h=0.01
def grad_f(x,y):
    grad_x = (f(x+h,y) - f(x,y)) / h
    grad_y = (f(x,y+h) - f(x,y)) / h
    return grad_x, grad_y

## Visualize

In [4]:
n = 240
x = np.linspace(-3.0, 3.0, n)
y = np.linspace(-3.0, 3.0, n)
X, Y = np.meshgrid(x, y)
X=np.reshape(X,(n,n))
Y=np.reshape(Y,(n,n))

Z = f(X,Y)

plt.imshow(Z)

<matplotlib.image.AxesImage at 0x7f6535a99d30>

## SGD

In [5]:
def get_sgd_path(x0, y0, learning_rate, num_steps):
    X_path = x0
    Y_path = y0
    
    for i in range(num_steps):
        grad_x,grad_y = grad_f(x0,y0)
        x1 = x0 - learning_rate * grad_x
        y1 = y0 - learning_rate * grad_y
        X_path = np.append(X_path,x1)
        Y_path = np.append(Y_path,y1)
        x0 = x1
        y0 = y1
    return X_path, Y_path

## Optimize

In [6]:
X_path_SGD, Y_path_SGD = get_sgd_path(x0,y0, 0.02,100)

## Visualize path

In [7]:
# n = 240
# x = np.linspace(-3.0, 3.0, n)
# y = np.linspace(-3.0, 3.0, n)
# X, Y = np.meshgrid(x, y)
# X=np.reshape(X,(n,n))
# Y=np.reshape(Y,(n,n))

# Z = f(X,Y)

# fig, ax = plt.subplots()

# ax.imshow(Z,
#    extent=[X.min(),X.max(),Y.min(),Y.max()],
#    origin="lower")
# ax.axes.set_autoscale_on(False)    
# line, = ax.plot(X_path_SGD[0],Y_path_SGD[0],'ro')


# def update(i):
    
#     line.set_ydata(Y_path_SGD[i])
#     line.set_xdata(X_path_SGD[i])
#     return line

    
# anim = FuncAnimation(fig, update, frames=np.arange(0, X_path_SGD.size), interval=1)  
    
# anim.save('SGD.gif', dpi=80, writer='imagemagick')

# # plt.imshow

## Momentum

In [8]:
def get_momentum_path(x0, y0, learning_rate, num_steps,momentum):
    X_path = x0
    Y_path = y0
    v_x = 0
    v_y = 0 
    
    for i in range(num_steps):
        grad_x,grad_y = grad_f(x0,y0)
        v_x = momentum * v_x - learning_rate * grad_x
        v_y = momentum * v_y - learning_rate * grad_y
        
        x1 = x0 + v_x
        y1 = y0 + v_y
        X_path = np.append(X_path,x1)
        Y_path = np.append(Y_path,y1)
        x0 = x1
        y0 = y1
    return X_path, Y_path

   ## Optimize

In [9]:
X_path_MOM, Y_path_MOM = get_momentum_path(x0,y0, 0.02,100,0.8)

## Visualize

In [10]:
# n = 240
# x = np.linspace(-3.0, 3.0, n)
# y = np.linspace(-3.0, 3.0, n)
# X, Y = np.meshgrid(x, y)
# X=np.reshape(X,(n,n))
# Y=np.reshape(Y,(n,n))

# Z = f(X,Y)

# fig, ax = plt.subplots()

# ax.imshow(Z,
#    extent=[X.min(),X.max(),Y.min(),Y.max()],
#    origin="lower")
# ax.axes.set_autoscale_on(False)    
# line, = ax.plot(X_path_MOM[0],Y_path_MOM[0],'ro')


# def update(i):
    
#     line.set_ydata(Y_path_MOM[i])
#     line.set_xdata(X_path_MOM[i])
#     return line

    
# anim = FuncAnimation(fig, update, frames=np.arange(0, X_path_MOM.size), interval=1)  
    
# anim.save('Momentum.gif', dpi=80, writer='imagemagick')

## RMSPROP


In [11]:
def get_RMSPROP_path(x0, y0, learning_rate, num_steps,decay_rate,eps):
    X_path = x0
    Y_path = y0
    cache_x = 0
    cache_y = 0 
    
    for i in range(num_steps):
        grad_x,grad_y = grad_f(x0,y0)
        cache_x = decay_rate * cache_x + (1 - decay_rate) * grad_x**2
        cache_y = decay_rate * cache_y + (1 - decay_rate) * grad_y**2
        
        x1 = x0 - learning_rate * grad_x / np.sqrt(cache_x + eps)
        y1 = y0 - learning_rate * grad_y / np.sqrt(cache_y + eps)
        X_path = np.append(X_path,x1)
        Y_path = np.append(Y_path,y1)
        x0 = x1
        y0 = y1
    return X_path, Y_path

## Optimize

In [12]:
X_path_RMS, Y_path_RMS = get_RMSPROP_path(x0,y0, 0.02,100,0.99,1e-6)

## Visualize

In [13]:
# n = 240
# x = np.linspace(-3.0, 3.0, n)
# y = np.linspace(-3.0, 3.0, n)
# X, Y = np.meshgrid(x, y)
# X=np.reshape(X,(n,n))
# Y=np.reshape(Y,(n,n))

# Z = f(X,Y)

# fig, ax = plt.subplots()

# ax.imshow(Z,
#    extent=[X.min(),X.max(),Y.min(),Y.max()],
#    origin="lower")
# ax.axes.set_autoscale_on(False)    
# line, = ax.plot(X_path_RMS[0],Y_path_RMS[0],'ro')


# def update(i):
    
#     line.set_ydata(Y_path_RMS[i])
#     line.set_xdata(X_path_RMS[i])
#     return line

    
# anim = FuncAnimation(fig, update, frames=np.arange(0, X_path_RMS.size), interval=1)  
    
# anim.save('RMSPROP.gif', dpi=80, writer='imagemagick')

## Adam

In [14]:
def get_adam_path(x0, y0, learning_rate, num_steps,beta1,beta2,eps):
    X_path = x0
    Y_path = y0
    v_x = 0
    v_y = 0 
    m_x = 0
    m_y = 0
    
    for i in range(num_steps):
        grad_x,grad_y = grad_f(x0,y0)
        m_x = beta1 *m_x + (1-beta1) * grad_x
        m_y = beta1 *m_y + (1-beta1) * grad_y
        
        v_x = beta2 *v_x + (1-beta2) * grad_x**2
        v_y = beta2 *v_y + (1-beta2) * grad_y**2


        x1 = x0 - learning_rate * m_x / np.sqrt(v_x+eps)
        y1 = y0 - learning_rate * m_y / np.sqrt(v_y+eps)
                
        X_path = np.append(X_path,x1)
        Y_path = np.append(Y_path,y1)
        x0 = x1
        y0 = y1
    return X_path, Y_path

## Optimize

In [15]:
X_path_ADAM, Y_path_ADAM = get_adam_path(x0,y0, 0.02,100,0.9,0.999,1e-6)

## Visualize

In [16]:
# n = 240
# x = np.linspace(-3.0, 3.0, n)
# y = np.linspace(-3.0, 3.0, n)
# X, Y = np.meshgrid(x, y)
# X=np.reshape(X,(n,n))
# Y=np.reshape(Y,(n,n))

# Z = f(X,Y)

# fig, ax = plt.subplots()

# ax.imshow(Z,
#    extent=[X.min(),X.max(),Y.min(),Y.max()],
#    origin="lower")
# ax.axes.set_autoscale_on(False)    
# line, = ax.plot(X_path_ADAM[0],Y_path_ADAM[0],'ro')


# def update(i):
    
#     line.set_ydata(Y_path_ADAM[i])
#     line.set_xdata(X_path_ADAM[i])
#     return line

    
# anim = FuncAnimation(fig, update, frames=np.arange(0, X_path_ADAM.size), interval=1)  
    
# anim.save('Adam.gif', dpi=80, writer='imagemagick')

## Visualize all

In [18]:
n = 240
x = np.linspace(-3.0, 3.0, n)
y = np.linspace(-3.0, 3.0, n)
X, Y = np.meshgrid(x, y)
X=np.reshape(X,(n,n))
Y=np.reshape(Y,(n,n))

Z = f(X,Y)

fig, ax = plt.subplots()

ax.imshow(Z,
   extent=[X.min(),X.max(),Y.min(),Y.max()],
   origin="lower")
ax.axes.set_autoscale_on(False)    
line_ADAM, = ax.plot(X_path_ADAM[0],Y_path_ADAM[0],'ro',label='ADAM')
line_RMS, = ax.plot(X_path_RMS[0],Y_path_RMS[0],'bo', label='RMSProp')
line_MOM, = ax.plot(X_path_MOM[0],Y_path_MOM[0],'go', label='Momentum')
line_SGD, = ax.plot(X_path_SGD[0],Y_path_SGD[0],'ko', label='SGD')
ax.legend()

def update(i):
    
    line_ADAM.set_ydata(Y_path_ADAM[i])
    line_ADAM.set_xdata(X_path_ADAM[i])
    
    line_RMS.set_ydata(Y_path_RMS[i])
    line_RMS.set_xdata(X_path_RMS[i])
    
    line_MOM.set_ydata(Y_path_MOM[i])
    line_MOM.set_xdata(X_path_MOM[i])
    
    line_SGD.set_ydata(Y_path_SGD[i])
    line_SGD.set_xdata(X_path_SGD[i])
    
    return line_ADAM

    
anim = FuncAnimation(fig, update, frames=np.arange(0, X_path_ADAM.size), interval=100)  
    
# anim.save('All.gif', dpi=80, writer='imagemagick')
from IPython.display import HTML
HTML(anim.to_jshtml())