In [87]:
import numpy as np

In [88]:
import matplotlib.pyplot as plt
from matplotlib import rc

plt.rcParams["figure.figsize"] = [12, 12]
# If you have problems with latex at matplotlib just comment next two lines, this might help
plt.rc('text', usetex=True)
plt.rc('font', family='serif')
 

def fix_scaling(ax=None):
    if not ax:
        xlim = plt.xlim()
        ylim = plt.ylim()
        d1 = xlim[1] - xlim[0]
        d2 = ylim[1] - ylim[0]
        if d1 > d2:
            plt.ylim((ylim[0] - (d1 - d2) / 2, ylim[1] + (d1 - d2) / 2))
        else:
            plt.xlim((xlim[0] + (d1 - d2) / 2, xlim[1] - (d1 - d2) / 2))
    else:
        xlim = ax.get_xlim()
        ylim = ax.get_ylim()
        d1 = xlim[1] - xlim[0]
        d2 = ylim[1] - ylim[0]
        if d1 > d2:
            ax.set_ylim((ylim[0] - (d1 - d2) / 2, ylim[1] + (d1 - d2) / 2))
        else:
            ax.set_xlim((xlim[0] + (d1 - d2) / 2, xlim[1] - (d1 - d2) / 2))


In [126]:
# 1.1: f, grad, grad^2 and eigenvalues
a = ord('M') - ord('A') + 1  # 13
b = ord('P') - ord('A') + 1  # 16


def func(x):
    return a * x[0] ** 2 + b * (x[0] - x[1]) ** 2 - x[0] - 2 * x[1] 


def f_grad(x):
    return np.array([2 * a * x[0] + 2 * b * (x[0] - x[1]) - 1, 2 * b * (x[1] - x[0]) - 2])


f_grad_coefficients_a = [[2 * (a + b), -2 * b], [-2 * b, 2 * b]]
f_grad_coefficients_b = [1, 2] 
f_grad2 = np.array([[2 * (a + b), -2 * b], [-2 * b, 2 * b]])
eigenvalues_f_grad2 = np.linalg.eigvals(f_grad2)
lambdas = list(eigenvalues_f_grad2)
min_point = [3 / (2 * a), 3 / (2 * a) + 1 / b]
f_min = func(min_point)
iterations_number = 25

In [90]:
from matplotlib.animation import FuncAnimation
from IPython.display import HTML


def animate_trajectory(trajectory):
    fig, ax = plt.subplots()
    n = len(trajectory)

    def step(t):
        ax.cla()
        ax.plot([min_point[0]], [min_point[1]], 'o', color='green')
        # Level contours
        delta = 0.025
        x = np.arange(-2, 4, delta)
        y = np.arange(-3, 3, delta)
        X, Y = np.meshgrid(x, y)
        Z = np.zeros_like(X)
        # print(X.shape, Y.shape)
        for i in range(X.shape[0]):
            for j in range(X.shape[1]):
                Z[i][j] = func([X[i][j], Y[i][j]])
        CS = ax.contour(X, Y, Z, [1, 6, 20], colors=['blue', 'purple', 'red'])

        ax.plot([u[0] for u in trajectory[:t]], [u[1] for u in trajectory[:t]], color='black')
        ax.plot([u[0] for u in trajectory[:t]], [u[1] for u in trajectory[:t]], 'o', color='black')
        
        fix_scaling(ax)
        ax.axis('off')

    return FuncAnimation(fig, step, frames=range(n), interval=600)

In [91]:
# 1.2: Gradient descent with optimal step
alpha = 2.0 / (sum(lambdas))
trajectory_opt_step = []
x_start = np.array([2, 2.8])
trajectory_opt_step.append(x_start.copy())
cur_x = x_start.copy()
for i in range(iterations_number):
    cur_x = cur_x - alpha * f_grad(cur_x)
    trajectory_opt_step.append(cur_x.copy())
    
# print(trajectory_opt_step)
base_animation = animate_trajectory(trajectory_opt_step)
HTML(base_animation.to_html5_video())



In [92]:
# 1.2: Chebyshev's method
phi = (lambdas[0] + lambdas[1]) / (lambdas[0] - lambdas[1])
cur_gamma = 1.0 / phi
prev_gamma = 0
print(phi - np.sqrt(phi ** 2 - 1))
print('phi', phi)
trajectory_chebyshev = []
x_start = np.array([2, 2.8])
trajectory_chebyshev.append(x_start.copy())
cur_x = x_start.copy()
prev_x = x_start.copy()
for i in range(iterations_number):
    t = cur_x
    alpha = 4 * cur_gamma / (lambdas[0] - lambdas[1])
    beta = cur_gamma * prev_gamma
    cur_x = cur_x - alpha * f_grad(cur_x) + beta * (cur_x - prev_x)
    prev_x = t
    t = cur_gamma
    cur_gamma = 1.0 / (2 * phi - cur_gamma)
    prev_gamma = t
    print('gamma', cur_gamma)
    trajectory_chebyshev.append(cur_x.copy())
    
# print(trajectory_chebyshev)
base_animation = animate_trajectory(trajectory_chebyshev)
HTML(base_animation.to_html5_video())

0.4677379368737228
phi 1.302843624078247
gamma 0.5440295567117078
gamma 0.48504657400191953
gamma 0.4715556068444575
gamma 0.4685746561468968
gamma 0.4679210649634828
gamma 0.4677780048454034
gamma 0.46774670305987737
gamma 0.46773985473707763
gamma 0.4677383564618995
gamma 0.46773802867072917
gamma 0.46773795695696047
gamma 0.467737941267509
gamma 0.46773793783498996
gamma 0.4677379370840276
gamma 0.46773793691973303
gamma 0.46773793688378884
gamma 0.467737936875925
gamma 0.46773793687420456
gamma 0.4677379368738282
gamma 0.4677379368737459
gamma 0.46773793687372783
gamma 0.46773793687372395
gamma 0.46773793687372306
gamma 0.46773793687372284
gamma 0.4677379368737228




In [93]:
# 1.2: Heavy ball method
alpha = 4.0 / (np.sqrt(lambdas[0]) + np.sqrt(lambdas[1])) ** 2
beta = (np.sqrt(lambdas[0]) - np.sqrt(lambdas[1])) / (np.sqrt(lambdas[0]) + np.sqrt(lambdas[1]))
trajectory_heavy_ball = []
x_start = np.array([2, 2.8])
trajectory_heavy_ball.append(x_start.copy())
cur_x = x_start.copy()
prev_x = x_start.copy()
for i in range(iterations_number):
    t = cur_x
    cur_x = cur_x - alpha * f_grad(cur_x) + beta * (cur_x - prev_x)
    prev_x = t
    trajectory_heavy_ball.append(cur_x.copy())
    
# print(trajectory_heavy_ball)
base_animation = animate_trajectory(trajectory_heavy_ball)
HTML(base_animation.to_html5_video())



In [94]:
# 1.2: Nesterov method
alpha = 1 / (lambdas[0] + 1)
beta = (np.sqrt(lambdas[0]) - np.sqrt(lambdas[1])) / (np.sqrt(lambdas[0]) + np.sqrt(lambdas[1]))
trajectory_nesterov = []
x_start = np.array([2, 2.8])
trajectory_nesterov.append(x_start.copy())
cur_x = x_start.copy()
cur_y = x_start.copy()

for i in range(iterations_number):
    t = cur_x
    cur_x = cur_y - alpha * f_grad(cur_y)
    cur_y = cur_x + beta * (cur_x - t)
    trajectory_nesterov.append(cur_x.copy())
    
base_animation = animate_trajectory(trajectory_nesterov)
HTML(base_animation.to_html5_video())



In [117]:
# 1.3: ||x_k - x*||
fig, ax = plt.subplots()
plt.title("\|x_k - x*\|")
u = np.array(trajectory_opt_step)
ax.plot(range(iterations_number + 1), [np.linalg.norm(t - min_point) for t in u], label='Optimal step size')
u = np.array(trajectory_heavy_ball)
ax.plot(range(iterations_number + 1), [np.linalg.norm(t - min_point) for t in u], label='Heaby ball')
u = np.array(trajectory_chebyshev)
ax.plot(range(iterations_number + 1), [np.linalg.norm(t - min_point) for t in u], label='Chebyshev')
u = np.array(trajectory_nesterov)
ax.plot(range(iterations_number + 1), [np.linalg.norm(t - min_point) for t in u], label='Nesterov')
plt.legend()
plt.plot()
plt.show()

In [125]:
# 1.3: |f(x_k) - f(x*)|
fig, ax = plt.subplots()
plt.title("\|f(x_k) - f(x*)\|")
u = np.array(trajectory_opt_step)
ax.plot(range(iterations_number + 1), [np.linalg.norm(func(t) - f_min) for t in u], label='Optimal step size')
u = np.array(trajectory_heavy_ball)
ax.plot(range(iterations_number + 1), [np.linalg.norm(func(t) - f_min) for t in u], label='Heavy ball')
u = np.array(trajectory_chebyshev)
ax.plot(range(iterations_number + 1), [np.linalg.norm(func(t) - f_min) for t in u], label='Chebyshev')
u = np.array(trajectory_nesterov)
ax.plot(range(iterations_number + 1), [np.linalg.norm(func(t) - f_min) for t in u], label='Nesterov')
plt.legend()
plt.plot()
plt.show()

In [127]:
# 1.3: point of minimum returned by solving f_grad = 0
min_point_2 = np.linalg.solve(f_grad_coefficients_a, f_grad_coefficients_b)
assert min_point[0] == min_point_2[0]
assert min_point[1] == min_point_2[1]

In [142]:
def animate_trajectory_many(trajectories, labels):
    fig, ax = plt.subplots()
    n = len(trajectories[0])

    def step(t):
        ax.cla()
        ax.plot([min_point[0]], [min_point[1]], 'o', color='green')
        # Level contours
        delta = 0.025
        x = np.arange(-2, 4, delta)
        y = np.arange(-3, 3, delta)
        X, Y = np.meshgrid(x, y)
        Z = np.zeros_like(X)
        # print(X.shape, Y.shape)
        for i in range(X.shape[0]):
            for j in range(X.shape[1]):
                Z[i][j] = func([X[i][j], Y[i][j]])
        CS = ax.contour(X, Y, Z, [1, 6, 20], colors=['blue', 'purple', 'red'])

        for i in range(len(trajectories)):
            ax.plot([u[0] for u in trajectories[i][:t]], [u[1] for u in trajectories[i][:t]], label=labels[i])
            ax.plot([u[0] for u in trajectories[i][:t]], [u[1] for u in trajectories[i][:t]], 'o')
        
        plt.legend()
        fix_scaling(ax)
        ax.axis('off')

    return FuncAnimation(fig, step, frames=range(n), interval=600)

In [143]:
# 1.4: draw all trajectories on one plot
trajectories = [trajectory_opt_step, trajectory_chebyshev, trajectory_heavy_ball, trajectory_nesterov]
labels = ['Optimal step size', 'Heavy ball', 'Chebyshev', 'Nesterov']
base_animation = animate_trajectory_many(trajectories, labels)
HTML(base_animation.to_html5_video())