In [None]:
# %% Deep learning - Section 6.35
#    Code challenge 2: 2D gradient ascent

#    1) Modify the 2D gradient descent into a gradient 'ascent' algorithm that
#       finds local maxima
#    2) There are at least two ways to do it, try to figure out both of them

# This code pertains a deep learning course provided by Mike X. Cohen on Udemy:
#   > https://www.udemy.com/course/deeplearning_x
# The "base" code in this repository is adapted (with very minor modifications)
# from code developed by the course instructor (Mike X. Cohen), while the
# "exercises" and the "code challenges" contain more original solutions and
# creative input from my side. If you are interested in DL (and if you are
# reading this statement, chances are that you are), go check out the course, it
# is singularly good.


In [None]:
# %% Libraries and modules
import numpy               as np
import matplotlib.pyplot   as plt
import sympy               as sym
import copy

from mpl_toolkits.mplot3d             import Axes3D
from google.colab                     import files
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('svg')


In [None]:
# %% Define function

# Use the function peaks as before

def peaks(x,y):

    # Expand to a 2D mesh
    x,y = np.meshgrid(x,y)

    z = 3*(1-x)**2 * np.exp(-(x**2) - (y+1)**2) \
        - 10*(x/5 - x**3 - y**5) * np.exp(-x**2 - y**2) \
        - 1/3*np.exp(-(x+1)**2 - y**2)

    return z


In [None]:
# %% Plotting

# Create landscape
x = np.linspace(-3,3,201)
y = np.linspace(-3,3,201)

z = peaks(x,y)

# Plot
plt.imshow(z,extent=[x[0],x[-1],y[0],y[-1]],vmin=-5,vmax=5,origin='lower',cmap='jet')
plt.show()


In [None]:
# %% Compute derivative with sympy

# Create symbols and redefine function for sympy
sx,sy = sym.symbols('sx,sy')
sz    = 3*(1-sx)**2 * sym.exp(-(sx**2) - (sy+1)**2) \
        - 10*(sx/5 - sx**3 - sy**5) * sym.exp(-sx**2 - sy**2) \
        - 1/3*sym.exp(-(sx+1)**2 - sy**2)

# Compute partial derivatives (.lambdify() transforms the symbolic function into a numpy usable function)
df_x = sym.lambdify( (sx,sy),sym.diff(sz,sx),'sympy' )
df_y = sym.lambdify( (sx,sy),sym.diff(sz,sy),'sympy' )

# Example of partial derivative computation
df_x(-1,-1).evalf()


In [None]:
# %% Gradient 'ascent' in 2D
#    Method 1, plus instead of minus
#    Method 2, modify sz in the derivative by making it negative

# 1) Random starting point (uniform between -2 and +2)
local_max = np.random.rand(2)*4-2
start_pnt = local_max[:]

print(f'Random starting local maximum: {local_max}')

# 2) Learning parameters
learning_rate   = .01
training_epochs = 1000

# 3) Loop over epochs
trajectory = np.zeros((training_epochs,2))

for i in range(training_epochs):
    gradient = np.array([ df_x(local_max[0],local_max[1]).evalf(),
                          df_y(local_max[0],local_max[1]).evalf()
                          ])
    local_max       = local_max + gradient*learning_rate
    trajectory[i,:] = local_max

print(f'Estimated local maximum: {local_max}')


In [None]:
# Plotting

plt.imshow(z,extent=[x[0],x[-1],y[0],y[-1]],vmin=-5,vmax=5,origin='lower',cmap='jet')
plt.plot(start_pnt[0],start_pnt[1],'bs')
plt.plot(local_max[0],local_max[1],'go')
plt.plot(trajectory[:,0],trajectory[:,1],'g')
plt.legend(['Rand start','Local max'])
plt.suptitle('Random starting point')
plt.title(f'Training epochs: {training_epochs} and learning rate: {learning_rate}')
plt.colorbar()

plt.savefig('figure26_code_challenge_2.png')

plt.show()

files.download('figure26_code_challenge_2.png')


In [None]:
# %% Visualise the basins of attraction for the gradient descent algorithm

# Grid for function visualization
x = np.linspace(-4,4,201)
y = np.linspace(-4,4,201)

# Gradient descent parameters
learning_rate   = 0.01
training_epochs = 100

# Generate multiple starting points in a 25x25 grid
start_x      = np.linspace(-2.5,2.5,25)
start_y      = np.linspace(-2.5,2.5,25)
start_points = np.array(np.meshgrid(start_x,start_y)).T.reshape(-1,2)

# Plot function
plt.figure(figsize=(10,8))
plt.imshow(z,extent=[x[0],x[-1],y[0],y[-1]],vmin=-5,vmax=5,origin='lower',cmap='jet')

# Run gradient descent for each starting point
for start_pnt in start_points:
    local_max  = start_pnt.copy()
    trajectory = np.zeros((training_epochs, 2))

    for i in range(training_epochs):
        gradient = np.array([df_x(local_max[0],local_max[1]),
                             df_y(local_max[0],local_max[1])
                             ])
        local_max       = local_max + gradient*learning_rate
        trajectory[i,:] = local_max

    # Plot trajectory
    plt.plot(start_pnt[0],start_pnt[1],'bs',markersize=2)
    plt.plot(local_max[0],local_max[1],'ko',markersize=3)
    plt.plot(trajectory[:,0],trajectory[:,1],'g',alpha=0.5)

plt.legend(['Start point','Local max'])
plt.suptitle('Basins of Attraction for Gradient Ascent')
plt.title(f'Training epochs: {training_epochs}, Learning rate: {learning_rate}')

plt.savefig('figure32_code_challenge_2.png')

plt.show()

files.download('figure32_code_challenge_2.png')
