In [None]:
# %% Deep learning - Section 6.33
#    Code challenge 1: Unfortunate starting value

#    1) Define function and look for minimum as in previous code
#    2) Hard-code starting value of x = 0
#    3) Additional exercises

# This code pertains a deep learning course provided by Mike X. Cohen on Udemy:
#   > https://www.udemy.com/course/deeplearning_x
# The "base" code in this repository is adapted (with very minor modifications)
# from code developed by the course instructor (Mike X. Cohen), while the
# "exercises" and the "code challenges" contain more original solutions and
# creative input from my side. If you are interested in DL (and if you are
# reading this statement, chances are that you are), go check out the course, it
# is singularly good.


In [None]:
# %% Libraries and modules
import numpy               as np
import matplotlib.pyplot   as plt
import copy

from google.colab                     import files
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('svg')


In [None]:
# %% Define function

def fx(x):
    return np.cos(2*np.pi*x) + x**2

def df(x):
    return 2*(x - np.pi*np.sin(2*np.pi*x))


In [None]:
# %% Plotting

x = np.linspace(-2,2,2001)

plt.plot(x,fx(x),x,df(x))
plt.xlim(x[[0,-1]])
plt.grid()
plt.xlabel('x')
plt.ylabel('f(x)')
plt.legend(["f(x)","f'(x)"])
plt.title('A function and its derivative')
plt.show()


In [None]:
# %% Gradient descent

# Random starting point
local_min   = np.random.choice(x,1).item()
learn_rate  = 0.01
train_epoch = 100

print(f'Random starting local mininum: {local_min:.8f}')

for i in range(train_epoch):
    gradient  = df(local_min)
    local_min = local_min - gradient*learn_rate

print(f'Estimated local mininum: {local_min:.8f}')


In [None]:
# %% Plotting

plt.plot(x,fx(x),x,df(x))
plt.plot(local_min,df(local_min),'ro')
plt.plot(local_min,fx(local_min),'ro')

plt.xlim(x[[0,-1]])
plt.grid()
plt.xlabel('')
plt.xlabel('x')
plt.ylabel('f(x)')
plt.legend(["f(x)","f'(x)","f(x) min"])
plt.suptitle('A function and its derivative')
plt.title('Empirical local minimum: %s' %np.round(local_min,4))

plt.savefig('figure7_code_challenge_1.png')

plt.show()

files.download('figure7_code_challenge_1.png')


In [None]:
# %% Gradient descent

# Fixed starting point (here at a local maximum)
local_min   = 0.0001
learn_rate  = 0.01
train_epoch = 100

print(f'Random starting local mininum: {local_min:.8f}')

for i in range(train_epoch):
    gradient  = df(local_min)
    local_min = local_min - gradient*learn_rate

print(f'Estimated local mininum: {local_min:.8f}')

# This is called vanishing gradient, the derivative is already so close to zero
# that there is no learning at all


In [None]:
# %% Plotting

plt.plot(x,fx(x),x,df(x))
plt.plot(local_min,df(local_min),'ro')
plt.plot(local_min,fx(local_min),'ro')

plt.xlim(x[[0,-1]])
plt.grid()
plt.xlabel('')
plt.xlabel('x')
plt.ylabel('f(x)')
plt.legend(["f(x)","f'(x)","f(x) min"])
plt.suptitle('A function and its derivative')
plt.title('Empirical local minimum: %s' %np.round(local_min,4))

plt.savefig('figure9_code_challenge_1.png')

plt.show()

files.download('figure9_code_challenge_1.png')


In [None]:
# %% Exercise 1
#    The derivative has a multiplicative factor of 2 in it. Is that constant necessary for the accuracy of the g.d. result?
#    Try removing that '2' from the derivative and see whether the model can still find the minimum. Before running the
#    code, think about what you expect to happen. Does reality match your expectations? Why is (or isn't) that factor necessary?

# No, the constant only stretches the derivative along the y axis and change its steepness, but doesn't change the points where
# the derivative equals zero; however, in the context of numerical discrete computations, maybe making the derivative steeper makes
# it faster to reach the area of a local minimum, and potentially allows for a more numerically stable output (not to mention
# that it's also the actual derivative anyway)


In [None]:
# %% Exercise 2
#    What about the factor of '2' inside the np.sin() function? Is that important? Can you get an accurate result if you
#    remove it?

# No. If you remove that factor the periodicity of the wave in the derivative will be messed up, meaning that the deriative will no longer
# identify the minima


In [None]:
# %% Exercise 3
#    Try setting the initial value to a small but non-zero number, e.g., .0001 or -.0001. Does that help the solution?

# In this case the function is a simple cosine with an superimposed parabola, meaning that the local maxima are technically
# just one point at the peak of the waves, and an initial value just slightly different from zero is enough to knock the gradient
# downhill. However, if the loss function had a more extended area with no growth (or not enough training epochs), this trick
# might not be enough

