# Code Written by:
**Shweta Tiwari**
*20 Oct 2023*

## Algorithm: Rmsprop

In [1]:
import time

In [2]:
!pip install --upgrade bokeh==2.4.3



In [3]:
import numpy as np
from bokeh.plotting import figure, show, output_notebook

# Algorithm

In [4]:
%%time
def gradient_descent(F, dF, x, steps=100, lr=0.001):
    loss = []

    for _ in range(steps):
        dx = dF(x)
        x -= lr * dx
        loss.append(F(x))

    return x, loss

CPU times: user 6 µs, sys: 0 ns, total: 6 µs
Wall time: 10.3 µs


In [5]:
%%time
def rmsprop(F, dF, x, steps=100, lr=0.001, decay=.9, eps=1e-8):
    loss = []
    dx_mean_sqr = np.zeros(x.shape, dtype=float)

    for _ in range(steps):
        dx = dF(x)
        dx_mean_sqr = decay * dx_mean_sqr + (1 - decay) * dx ** 2
        x -= lr * dx / (np.sqrt(dx_mean_sqr) + eps)
        loss.append(F(x))

    return x, loss

CPU times: user 6 µs, sys: 0 ns, total: 6 µs
Wall time: 9.78 µs


In [6]:
%%time
def rmsprop_momentum(F, dF, x, steps=100, lr=0.001, decay=.9, eps=1e-8, mu=.9):
    loss = []
    dx_mean_sqr = np.zeros(x.shape, dtype=float)
    momentum = np.zeros(x.shape, dtype=float)

    for _ in range(steps):
        dx = dF(x)
        dx_mean_sqr = decay * dx_mean_sqr + (1 - decay) * dx ** 2
        momentum = mu * momentum + lr * dx / (np.sqrt(dx_mean_sqr) + eps)
        x -= momentum
        loss.append(F(x))

    return x, loss

CPU times: user 5 µs, sys: 0 ns, total: 5 µs
Wall time: 9.06 µs


# Run

## Function

In [7]:
%%time
def F(x):
    residual = A @ x - np.eye(len(A), dtype=float)
    return np.sum(residual ** 2)

CPU times: user 5 µs, sys: 1e+03 ns, total: 6 µs
Wall time: 8.82 µs


In [8]:
%%time
def dF(x):
    return 2 * A.T @ (A @ x - np.eye(len(A), dtype=float))

CPU times: user 4 µs, sys: 1e+03 ns, total: 5 µs
Wall time: 8.58 µs


In [9]:
%%time
A = np.array([
    [2, 5, 1, 4, 6],
    [3, 5, 0, 0, 0],
    [1, 1, 0, 3, 8],
    [6, 6, 2, 2, 1],
    [8, 3, 5, 1, 4],
], dtype=float)

CPU times: user 33 µs, sys: 5 µs, total: 38 µs
Wall time: 41.5 µs


## Optimization

In [10]:
%%time
X, loss1 = gradient_descent(F, dF, A * 0, steps=300)
(A @ X).round(2), loss1[-1]

CPU times: user 11.9 ms, sys: 981 µs, total: 12.8 ms
Wall time: 18.8 ms


(array([[ 0.79, -0.01,  0.18,  0.19, -0.08],
        [-0.01,  0.8 ,  0.  ,  0.2 , -0.07],
        [ 0.18,  0.  ,  0.85, -0.15,  0.07],
        [ 0.19,  0.2 , -0.15,  0.66,  0.13],
        [-0.08, -0.07,  0.07,  0.13,  0.95]]),
 0.5469198476714345)

In [11]:
%%time
X, loss2 = rmsprop(F, dF, A * 0, steps=300)
(A @ X).round(2), loss2[-1]

CPU times: user 16.5 ms, sys: 967 µs, total: 17.5 ms
Wall time: 24.8 ms


(array([[ 0.84, -0.05,  0.1 ,  0.09, -0.04],
        [-0.04,  0.82,  0.03,  0.18, -0.03],
        [ 0.12,  0.03,  0.9 , -0.09,  0.04],
        [ 0.15,  0.2 , -0.12,  0.74,  0.07],
        [-0.08, -0.09,  0.04,  0.09,  0.99]]),
 0.32394591074449003)

In [12]:
%%time
X, loss3 = rmsprop_momentum(F, dF, A * 0, steps=300)
(A @ X).round(2), loss3[-1]

CPU times: user 19.7 ms, sys: 0 ns, total: 19.7 ms
Wall time: 21.2 ms


(array([[ 0.99,  0.01,  0.  , -0.01,  0.  ],
        [-0.  ,  1.  ,  0.  , -0.  ,  0.  ],
        [-0.  ,  0.01,  1.  , -0.01,  0.  ],
        [-0.01,  0.01,  0.  ,  0.99,  0.  ],
        [-0.01,  0.01,  0.  , -0.01,  1.  ]]),
 0.0006230388777241962)

In [13]:
%%time
output_notebook()

plot = figure()
plot.line(x=range(len(loss1)), y=loss1, color='steelblue', legend='gd')
plot.line(x=range(len(loss2)), y=loss2, color='green', legend='rmsprop')
plot.line(x=range(len(loss3)), y=loss3, color='red', legend='rmsprop+momentum')

show(plot)



CPU times: user 101 ms, sys: 3.74 ms, total: 105 ms
Wall time: 109 ms


# The End