# Chapter 9 - Gradient Descent

In [None]:
import sys
sys.path.append("../")
from utils import *

## Gradient Descent Algorithm and Animation Implementation

In [None]:
def VanillaGradientDescent(f, f_grad, init=np.random.uniform(-1, 1, 2), eta=lambda t: .1, delta=1e-5):
    steps, gradients, deltas = [init], [], [delta]

    t = 1
    while deltas[-1] >= delta:
        g, eta_t = f_grad(steps[-1]), eta(t)
        step = steps[-1] - eta_t * g
        
        gradients.append(g)
        steps.append(step)
        deltas.append(np.sum((steps[-1] - steps[-2])**2)**.5)        
        t += 1
        
    return np.array(steps), np.array(gradients), np.array(deltas[1:])

In [None]:
def Animate_GradientDescent(f, f_grad, init, eta, delta, axis_range, frame_time=500):
    def as_array(x):
        return np.array([x]) if np.isscalar(x) else x

    def function_contour(fun, vals):
        xx, yy = np.meshgrid(vals, vals)
        z = fun(np.c_[xx.ravel(), yy.ravel()]).reshape(len(vals), len(vals))
        return go.Contour(x = vals, y=vals, z=z, opacity=.4, colorscale="Electric")
    
    def function_surface(fun, vals):
        xx, yy = np.meshgrid(vals, vals)
        z = fun(np.c_[xx.ravel(), yy.ravel()]).reshape(len(vals), len(vals))
        return go.Surface(x = vals, y=vals, z=z, opacity=.4, colorscale="Electric", showscale=False)

    
    steps, gradients, deltas = VanillaGradientDescent(f, f_grad, init, eta, delta)
    surface, contour = function_surface(f, axis_range), function_contour(f, axis_range)
    
    frames, markers = [], []
    for i in range(1, len(steps) + 1):
        z = as_array(f(steps[:i]))       
        frames.append(go.Frame(data=[
            # 3D visualization of progress
            go.Scatter3d(x=steps[:i,0], y=steps[:i,1], z=z[:i],
                         marker=dict(size=3, color="black"), showlegend=False),
            go.Scatter3d(x=[steps[i-1,0]], y=[steps[i-1,1]], z=[z[i-1]],
                         marker=dict(size=5, color="orange"), showlegend=False), 
            surface,
            
            # 2D visualization of progress
            go.Scatter(x=steps[:i,0], y=steps[:i,1],
                       marker=dict(size=3, color="black"), showlegend=False),
            go.Scatter(x=[steps[i-1,0]], y=[steps[i-1,1]], 
                       marker=dict(size=5, color="orange"), showlegend=False), 
            contour],
            traces=[0, 1, 2, 3, 4, 5],
            layout=go.Layout(title=rf"$\text{{Iteration }} {i}/{steps.shape[0]}$" )))

    return make_subplots(rows=1, cols=2, specs=[[{'type':'scene'}, {}]],
                         subplot_titles=('3D Visualization Of Function', '2D Visualization Of Function'))\
        .add_traces(data=frames[0]["data"], rows=[1, 1, 1, 1, 1, 1], cols=[1, 1, 1, 2, 2, 2])\
        .update(frames = frames)\
        .update_layout(width=900, height = 330, title = frames[0].layout.title,
                       updatemenus = [dict(type="buttons", buttons=[AnimationButtons.play(frame_time,0), 
                                                                    AnimationButtons.pause()])])

## Gradient Descent Over Gaussian Function

In [None]:
from numpy.linalg import solve, det

def negative_gaussian(mu=np.zeros(2), cov=np.eye(2)):
    from scipy.stats import multivariate_normal
    
    def _evaluate(x: np.ndarray):
        return  - multivariate_normal(mu, cov).pdf(x)

    def _gradient(x: np.ndarray):
        z = solve(cov,x-mu)
        return np.exp(-z @ (x-mu) /2) * z / (2*np.sqrt((2*np.pi)**mu.shape[0] * det(cov)))
    
    return _evaluate, _gradient


Animate_GradientDescent(*negative_gaussian(cov=[5,10]*np.eye(2)),
                        init=np.array([-5,-5]), 
                        eta= lambda t: 300, 
                        delta=1e-2, 
                        axis_range=np.linspace(-5, 5, 50))


## Gradient Descent For RSS

In [None]:
from scipy.stats import ortho_group

def residual_sum_of_squares(X: np.ndarray, y: np.ndarray):
    def _evaluate(w: np.ndarray):
        Y = np.broadcast_to(y[..., np.newaxis], (y.shape[0], w.shape[0]))
        return np.sum( (X @ w.T - Y)**2, axis=0)
    
    def _gradient(w: np.ndarray):
        return 2 * X.T @ (X @ w.T - y)
    
    return _evaluate, _gradient


n = 20
w = np.random.random(size = (2, ))
X = np.c_[np.random.uniform(low=-3, high=3, size=(n, 1)), np.ones((n, 1))]
y = X @ w + np.random.normal(0, 1, size=(n,))

In [None]:
def Animate_GradientDescent_regression(f, f_grad, X, y, init, eta, delta, axis_range, frame_time=500):
    def as_array(x):
        return np.array([x]) if np.isscalar(x) else x

    def function_contour(fun, vals):
        xx, yy = np.meshgrid(vals, vals)
        z = fun(np.c_[xx.ravel(), yy.ravel()]).reshape(len(vals), len(vals))
        return go.Contour(x = vals, y=vals, z=z, opacity=.4, colorscale="Electric")
    
    def function_surface(fun, vals):
        xx, yy = np.meshgrid(vals, vals)
        z = fun(np.c_[xx.ravel(), yy.ravel()]).reshape(len(vals), len(vals))
        return go.Surface(x = vals, y=vals, z=z, opacity=.4, colorscale="Electric", showscale=False)

    steps, gradients, deltas = VanillaGradientDescent(f, f_grad, init, eta, delta)
    surface = function_surface(f, axis_range)
    contour = function_contour(f, axis_range)
    
    frames, markers = [], []
    for i in range(1, len(steps)+1):
        z = as_array(f(steps[:i]))
        frames.append(go.Frame(data=[
            # 3D visualization of progress
            go.Scatter3d(x=steps[:i,0], y=steps[:i,1], z=z[:i],
                         marker=dict(size=3, color="black"), showlegend=False),
            go.Scatter3d(x=[steps[i-1,0]], y=[steps[i-1,1]], z=[z[i-1]],
                         marker=dict(size=5, color="orange"), showlegend=False), 
            surface,
            
            # 2D visualization of progress
            go.Scatter(x=steps[:i,0], y=steps[:i,1],
                       marker=dict(size=3, color="black"), showlegend=False),
            go.Scatter(x=[steps[i-1,0]], y=[steps[i-1,1]], 
                       marker=dict(size=5, color="orange"), showlegend=False), 
            contour,
            
            # Visualization of regression line and data
            go.Scatter(x=X[:, 0], y=y, marker=dict(size=5, color="black"), mode = 'markers', showlegend=False),
            go.Scatter(x=[X[:, 0].min(), X[:, 0].max()], 
                       y=[X[:, 0].min()*steps[i-1,0] + steps[i-1,1], X[:, 0].max()*steps[i-1,0] + steps[i-1,1]],
                       marker=dict(size=3, color="red"), mode='lines', showlegend=False)],
            traces=[0, 1, 2, 3, 4, 5, 6, 7],
            layout=go.Layout(title=rf"$\text{{Iteration }} {i}/{steps.shape[0]}$" )))

    return make_subplots(rows=2, cols=2, specs=[[{'type':'scene'}, {}], [{'colspan':2}, None]],
                         subplot_titles=("3D Visualization Of Function", "2D Visualization Of Function", "Fitted Model"))\
        .add_traces(frames[0]["data"], rows=[1, 1, 1, 1, 1, 1, 2, 2], cols=[1, 1, 1, 2, 2, 2, 1, 1])\
        .update(frames = frames)\
        .update_layout(width=900, height = 500, title = frames[0].layout.title,
                       updatemenus = [dict(type="buttons", buttons=[AnimationButtons.play(frame_time,0), 
                                                                    AnimationButtons.pause()])])\
        .update_yaxes(range=[-3, 3], row=2, col=1)


Animate_GradientDescent_regression(*residual_sum_of_squares(X, y), X, y,
                        init=np.array([4.5,-4]), 
                        eta=lambda t: .01, 
                        delta=1e-4, 
                        axis_range=np.linspace(-5, 5, 50),
                        frame_time = 1000)

In [None]:
def non_convex_function():
    def _evaluate(x: np.ndarray):
        x = np.stack(x, axis=0)
        z = np.sin(x[:, 0] * x[:, 1]) / np.sqrt(x[:, 0]**2 + x[:, 1]**2)

        return np.array([[z]]) if np.isscalar(z) else z

    
    def _gradient(x: np.ndarray):
        X, Y = x[0], x[1]
        a = np.array([(Y*np.cos(X*Y)*(X**2 + Y**2) - X*np.sin(X*Y)) / (X**2 + Y**2)**(1.5),
                     (X*np.cos(X*Y)*(X**2 + Y**2) - Y*np.sin(X*Y)) / (X**2 + Y**2)**(1.5)])
        return a
    
    return _evaluate, _gradient


Animate_GradientDescent(*non_convex_function(),
                        init=np.random.uniform(-5,5,2),
                        eta= lambda t: 2*.1, 
                        delta=1e-3, 
                        axis_range=np.linspace(-5, 5, 50))
