In [None]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.misc import derivative

## Cost Function

Let's consider a simple cost function, $f(x) = x^2 + x + 1$.

In [None]:
def f(x):
    return x**2 + x + 1

In [None]:
# Generating data
x_1 = np.linspace(start=-3, stop=3, num=500)

In [None]:
plt.plot(x_1, f(x_1))
plt.xlabel('x', fontsize=14)
plt.ylabel('f(x)', fontsize=14)
plt.style.use('ggplot')
plt.show()

- `Slope of the curve` at a given point is the `Cost`
- `Slope of the function` will be `0`, at the point where the `cost is the lowest`
- `Slope of the function` is given by `it's derivative`

In [None]:
def df(x):
    return 2*x + 1

### Comparing the 2 functions.

In [None]:
plt.figure(figsize=[15.0, 5.0])

# Plotting cost function -> f(x)
plt.subplot(1, 2, 1)
plt.plot(x_1, f(x_1), color='blue', linewidth=1)
plt.title('Cost function', fontsize=17)
plt.xlabel('x', fontsize=16)
plt.ylabel('f(x)', fontsize=16)

# Plotting derivative to the cost function -> Slope
plt.subplot(1, 2, 2)

plt.plot(x_1, df(x_1), color='skyblue', linewidth=1)
plt.title('Slope of the cost function', fontsize=17)
plt.xlabel('x', fontsize=16)
plt.ylabel('df(x)', fontsize=16)

plt.show()

In [None]:
starting_point = 3
temporary_value = 0
learning_rate = 0.1
precision = 0.00001

x_list = [starting_point]
slope_list = [df(starting_point)]

In [None]:
for i in range(100000000):
    temporary_value = starting_point
    starting_point = starting_point - learning_rate * df(temporary_value)
    x_list.append(starting_point)
    slope_list.append(df(starting_point))
    if abs(starting_point - temporary_value) < precision:
        print("Local minimum is at: ", starting_point)
        print("Number of steps: ", i)
        break

In [None]:
print("The local minimum occurs at: ", starting_point)
print("The cost is: ", f(starting_point))
print("The slope is: ", df(starting_point))

In [None]:
plt.figure(figsize=[20, 5])

# Plotting cost function -> f(x)
plt.subplot(1, 3, 1)
plt.plot(x_1, f(x_1), color='blue', linewidth=1)
plt.title('Cost function', fontsize=17)
plt.xlabel('x', fontsize=16)
plt.ylabel('f(x)', fontsize=16)
plt.xlim(-3, 3)
plt.ylim(0, 8)

values = np.array(x_list)

plt.subplot(1, 3, 2)
plt.plot(x_1, f(x_1), color='blue', linewidth=1)
plt.title('Cost function', fontsize=17)
plt.xlabel('x', fontsize=16)
plt.ylabel('f(x)', fontsize=16)
plt.xlim(-3, 3)
plt.ylim(0, 8)
plt.scatter(x_list, f(values), color='red', s=100, alpha=0.6)

plt.subplot(1, 3, 3)
plt.plot(x_1, df(x_1), color='skyblue', linewidth=1)
plt.title('Slope of the cost function', fontsize=17)
plt.xlabel('x', fontsize=16)
plt.ylabel('df(x)', fontsize=16)
plt.xlim(-2, 3)
plt.ylim(-3, 6)
plt.scatter(x_list, slope_list, color='red', s=100, alpha=0.6)

plt.show()

In [None]:
def g(x):
    return x**4 - (4*(x**2)) + 5

In [None]:
def dg(x):
    return 4*x**3 - 8*x

In [None]:
x_2 = np.linspace(start=-2, stop=2, num=1000)

In [None]:
x_2

In [None]:
plt.figure(figsize=[15.0, 5.0])

# Plotting cost function -> f(x)
plt.subplot(1, 2, 1)
plt.plot(x_2, g(x_2), color='blue', linewidth=1)
plt.title('Cost function', fontsize=17)
plt.xlabel('x', fontsize=16)
plt.ylabel('g(x)', fontsize=16)
plt.scatter(x_list, g(np.array(x_list)), color='red', s=100, alpha=0.6)

# Plotting derivative to the cost function -> Slope
plt.subplot(1, 2, 2)

plt.plot(x_2, dg(x_2), color='skyblue', linewidth=1)
plt.title('Slope of the cost function', fontsize=17)
plt.xlabel('x', fontsize=16)
plt.ylabel('dg(x)', fontsize=16)

plt.show()

In [None]:
def gradient_descent(derivative_function, initial_guess, learning_rate=0.02, precision=0.001, max_iteration=500):
    new_x = initial_guess
    x_2_list = [new_x]
    slope_2_list = [derivative_function(new_x)]
    for i in range(max_iteration):
        temporary_value = new_x
        new_x = temporary_value - learning_rate * derivative_function(temporary_value)
        x_2_list.append(new_x)
        slope_2_list.append(derivative_function(new_x))
        if abs(new_x - temporary_value) < precision:
            print("Local minimum is at: ", new_x)
            print("Number of steps: ", i)
            print("Cost at minimum: ", f(new_x))
            break
    return new_x, x_2_list, slope_2_list

In [None]:
local_minimum, list_x, derivative_list = gradient_descent(dg, -0.1)
print(F"Number of steps: {len(list_x)}")

In [None]:
print("The local minimum occurs at: ", starting_point)
print("The cost is: ", f(starting_point))
print("The slope is: ", df(starting_point))

In [None]:
local_minimum, list_x, derivative_list = gradient_descent(dg, 0.1)


plt.figure(figsize=[15.0, 5.0])

# Plotting cost function -> f(x)
plt.subplot(1, 2, 1)
plt.plot(x_2, g(x_2), color='blue', linewidth=1)
plt.title('Cost function', fontsize=17)
plt.xlabel('x', fontsize=16)
plt.ylabel('g(x)', fontsize=16)
plt.scatter(list_x, g(np.array(list_x)), color='red', s=100, alpha=0.6)

# Plotting derivative to the cost function -> Slope
plt.subplot(1, 2, 2)

plt.plot(x_2, dg(x_2), color='skyblue', linewidth=1)
plt.title('Slope of the cost function', fontsize=17)
plt.xlabel('x', fontsize=16)
plt.ylabel('dg(x)', fontsize=16)
plt.scatter(list_x, derivative_list, color='red', s=100, alpha=0.6)

plt.show()

In [None]:
x_3 = np.linspace(start=-2.5, stop=2.5, num=1000)

In [None]:
def h(x):
    return x**5 - 2*x**4 + 2

In [None]:
def dh(x):
    return 5*x**4 - 8*x**3

In [None]:
local_minimum, list_x, derivative_list = gradient_descent(derivative_function=dh, initial_guess=0.2, max_iteration=700)


plt.figure(figsize=[15.0, 5.0])

# Plotting cost function -> f(x)
plt.subplot(1, 2, 1)
plt.xlim(-1.2, 2.5)
plt.ylim(-1, 4)
plt.plot(x_3, h(x_3), color='blue', linewidth=1)
plt.title('Cost function', fontsize=17)
plt.xlabel('x', fontsize=16)
plt.ylabel('h(x)', fontsize=16)
plt.scatter(list_x, h(np.array(list_x)), color='red', s=100, alpha=0.6)

# Plotting derivative to the cost function -> Slope
plt.subplot(1, 2, 2)
plt.xlim(-1, 2)
plt.ylim(-4, 5)
plt.plot(x_3, dh(x_3), color='skyblue', linewidth=1)
plt.title('Slope of the cost function', fontsize=17)
plt.xlabel('x', fontsize=16)
plt.ylabel('dh(x)', fontsize=16)
plt.scatter(list_x, derivative_list, color='red', s=100, alpha=0.6)

plt.show()

## Learning Rate

In [None]:
local_minimum, list_x, derivative_list = gradient_descent(dg, initial_guess=1.9, learning_rate=0.003)


plt.figure(figsize=[15.0, 5.0])

# Plotting cost function -> f(x)
plt.subplot(1, 2, 1)
plt.plot(x_2, g(x_2), color='blue', linewidth=1)
plt.title('Cost function', fontsize=17)
plt.xlabel('x', fontsize=16)
plt.ylabel('g(x)', fontsize=16)
plt.scatter(list_x, g(np.array(list_x)), color='red', s=100, alpha=0.6)

# Plotting derivative to the cost function -> Slope
plt.subplot(1, 2, 2)

plt.plot(x_2, dg(x_2), color='skyblue', linewidth=1)
plt.title('Slope of the cost function', fontsize=17)
plt.xlabel('x', fontsize=16)
plt.ylabel('dg(x)', fontsize=16)
plt.scatter(list_x, derivative_list, color='red', s=100, alpha=0.6)

plt.show()

print(F"Number of Steps: {len(list_x)}")

In [None]:
# Gradient descent, fixed amount of times
max_iter_limit = 5

low_gamma = gradient_descent(dg, initial_guess=3, learning_rate=0.003, max_iteration=max_iter_limit)

# Plotting reduction in cost per iteration
plt.figure(figsize=[15.0, 5.0])

plt.plot(x_2, g(x_2), color='blue', linewidth=1)
plt.title('Cost function', fontsize=17)
plt.xlabel('Number of Iterations', fontsize=16)
plt.ylabel('Cost', fontsize=16)
plt.scatter(list_x, g(np.array(list_x)), color='red', s=100, alpha=0.6)