In [1]:
# imports
import numpy as np
import matplotlib.pyplot as plt
import descent
from ipywidgets import interact
%matplotlib inline



# Helper utilities

Demonstrations of some useful helper functions and utilities in `descent`:

## Numerical gradient checks

Checks the given (analytic) objective and gradient function with a numerical comparison:

$$ \frac{f(x + \delta x) - f(x)}{\delta x} \approx \frac{\partial f}{\partial x} $$

In [2]:
def f_df(x):
    objective = 0.5 * np.linalg.norm(x)**2
    gradient = x.copy()
    gradient[4] = 5.     # Error! incorrect gradient here
    return objective, gradient

In [3]:
x0 = np.random.randn(10)
descent.check_grad(f_df, x0)

------------------------------------
Numerical  | Analytic   | Error          
------------------------------------
-0.7005    | -0.7005    | 0.000000 | [32m✔[0m
0.4443     | 0.4443     | 0.000000 | [32m✔[0m
-1.1194    | -1.1194    | 0.000000 | [32m✔[0m
0.5405     | 0.5405     | 0.000000 | [32m✔[0m
0.0658     | 5.0000     | 0.974004 | [31m✗[0m
0.6500     | 0.6500     | 0.000000 | [32m✔[0m
-1.2726    | -1.2726    | 0.000000 | [32m✔[0m
0.1944     | 0.1944     | 0.000000 | [32m✔[0m
1.2779     | 1.2779     | 0.000000 | [32m✔[0m
-0.4541    | -0.4541    | 0.000000 | [32m✔[0m


## Function wrapping

In [4]:
A = np.random.randn(10,5)
def f_df(theta):
    objective = 0.5 * np.linalg.norm(A.dot(theta['w']) - theta['b']) ** 2
    gradient = dict()
    gradient['w'] = A.T.dot(A.dot(theta['w']) - theta['b'])
    gradient['b'] = theta['b'] - A.dot(theta['w'])
    return objective, gradient

In [5]:
theta_init = {'w': np.random.randn(5,), 'b': np.random.randn(10,)}

In [6]:
descent.check_grad(f_df, theta_init)

------------------------------------
Numerical  | Analytic   | Error          
------------------------------------
2.0733     | 2.0733     | 0.000000 | [32m✔[0m
1.1768     | 1.1768     | 0.000000 | [32m✔[0m
0.0067     | 0.0067     | 0.000000 | [32m✔[0m
3.5532     | 3.5532     | 0.000000 | [32m✔[0m
-1.1525    | -1.1525    | 0.000000 | [32m✔[0m
-1.5507    | -1.5507    | 0.000000 | [32m✔[0m
-0.5462    | -0.5462    | 0.000000 | [32m✔[0m
1.7602     | 1.7602     | 0.000000 | [32m✔[0m
0.0614     | 0.0614     | 0.000000 | [32m✔[0m
6.0380     | 6.0380     | 0.000000 | [32m✔[0m
11.2097    | 11.2097    | 0.000000 | [32m✔[0m
-14.0068   | -14.0068   | 0.000000 | [32m✔[0m
-5.5207    | -5.5207    | 0.000000 | [32m✔[0m
5.0117     | 5.0117     | 0.000000 | [32m✔[0m
21.1382    | 21.1382    | 0.000000 | [32m✔[0m


In [7]:
opt = descent.GradientDescent(f_df, theta_init)
opt.display.every = 1000
opt.run(maxiter=1e4)

+-------------------+--------------------+-------------------+
|Iteration          | Objective          | Iteration runtime |
+-------------------+--------------------+-------------------+
|              1000 |            0.10456 |         358.105 μs|
|              2000 |           0.014125 |         468.969 μs|
|              3000 |          0.0019097 |         662.804 μs|
|              4000 |         0.00025819 |         347.137 μs|
|              5000 |         3.4907e-05 |         351.906 μs|
|              6000 |         4.7194e-06 |         362.873 μs|
|              7000 |         6.3806e-07 |         350.952 μs|
|              8000 |         8.6266e-08 |         357.151 μs|
|              9000 |         1.1663e-08 |         386.953 μs|
+-------------------+--------------------+-------------------+
➛ Final objective: 1.5800150781506989e-09
➛ Total runtime: 4.0832 s
➛ Per iteration runtime: 408.32 μs +/- 100.442 μs
➛ All done!



## Interrupts

In [8]:
opt = descent.GradientDescent(f_df, theta_init)
opt.display.every = 1000
opt.run(maxiter=1e5)

+-------------------+--------------------+-------------------+
|Iteration          | Objective          | Iteration runtime |
+-------------------+--------------------+-------------------+
|              1000 |            0.10456 |         416.994 μs|
|              2000 |           0.014125 |         399.113 μs|
|              3000 |          0.0019097 |         488.043 μs|
|              4000 |         0.00025819 |         383.139 μs|
|              5000 |         3.4907e-05 |         408.888 μs|
|              6000 |         4.7194e-06 |         385.046 μs|
+-------------------+--------------------+-------------------+
➛ Final objective: 1.6638919918461295e-06
➛ Total runtime: 2.81102 s
➛ Per iteration runtime: 431.006 μs +/- 96.423 μs
➛ All done!



In [None]:
opt

In [10]:
opt.run(10000)

+-------------------+--------------------+-------------------+
|Iteration          | Objective          | Iteration runtime |
+-------------------+--------------------+-------------------+
|              7000 |         6.3806e-07 |         348.091 μs|
|              8000 |         8.6266e-08 |         375.032 μs|
|              9000 |         1.1663e-08 |          474.93 μs|
|             10000 |         1.5769e-09 |         370.026 μs|
|             11000 |         2.1319e-10 |         494.003 μs|
|             12000 |         2.8823e-11 |         344.992 μs|
|             13000 |         3.8969e-12 |         365.973 μs|
|             14000 |         5.2686e-13 |         378.132 μs|
|             15000 |         7.1232e-14 |         355.959 μs|
|             16000 |         9.6305e-15 |         414.133 μs|
+-------------------+--------------------+-------------------+
➛ Final objective: 3.3953898314088257e-15
➛ Total runtime: 7.05847 s
➛ Per iteration runtime: 427.216 μs +/- 94.8643 μ