Add visual comparison of HPI, VFI and OPI outputs (#171)

kp992 · web-flow · commit a7b173aa1b21 · 2024-04-02T04:10:38.000+11:00
* Add visual comparison of HPI, VFI and OPI outputs using plots.

* Add visual comparison of three outputs
diff --git a/lectures/opt_invest.md b/lectures/opt_invest.md
@@ -346,7 +346,6 @@ def value_function_iteration(model, tol=1e-5):
 
 For OPI we will use a compiled JAX `lax.while_loop` operation to speed execution.
 
-
 ```{code-cell} ipython3
 def opi_loop(params, sizes, arrays, m, tol, max_iter):
     """
@@ -389,7 +388,6 @@ def optimistic_policy_iteration(model, m=10, tol=1e-5, max_iter=10_000):
 
 Here's HPI
 
-
 ```{code-cell} ipython3
 def howard_policy_iteration(model, maxiter=250):
     """
@@ -408,70 +406,92 @@ def howard_policy_iteration(model, maxiter=250):
     return σ
 ```
 
-
 ```{code-cell} ipython3
 :tags: [hide-output]
 
 model = create_investment_model()
+constants, sizes, arrays = model
+β, a_0, a_1, γ, c = constants
+y_size, z_size = sizes
+y_grid, z_grid, Q = arrays
+```
+
+```{code-cell} ipython3
+:tags: [hide-output]
+
 print("Starting HPI.")
 qe.tic()
-out = howard_policy_iteration(model)
+σ_star_hpi = howard_policy_iteration(model)
 elapsed = qe.toc()
-print(out)
+print(σ_star_hpi)
 print(f"HPI completed in {elapsed} seconds.")
 ```
 
+Here's the plot of the Howard policy, as a function of $y$ at the highest and lowest values of $z$.
+
+```{code-cell} ipython3
+fig, ax = plt.subplots(figsize=(9, 5))
+ax.plot(y_grid, y_grid, "k--", label="45")
+ax.plot(y_grid, y_grid[σ_star_hpi[:, 1]], label="$\\sigma^{*}_{HPI}(\cdot, z_1)$")
+ax.plot(y_grid, y_grid[σ_star_hpi[:, -1]], label="$\\sigma^{*}_{HPI}(\cdot, z_N)$")
+ax.legend(fontsize=12)
+plt.show()
+```
+
 ```{code-cell} ipython3
 :tags: [hide-output]
 
 print("Starting VFI.")
 qe.tic()
-out = value_function_iteration(model)
+σ_star_vfi = value_function_iteration(model)
 elapsed = qe.toc()
-print(out)
+print(σ_star_vfi)
 print(f"VFI completed in {elapsed} seconds.")
 ```
 
+Here's the plot of the VFI, as a function of $y$ at the highest and lowest values of $z$.
+
+```{code-cell} ipython3
+fig, ax = plt.subplots(figsize=(9, 5))
+ax.plot(y_grid, y_grid, "k--", label="45")
+ax.plot(y_grid, y_grid[σ_star_vfi[:, 1]], label="$\\sigma^{*}_{VFI}(\cdot, z_1)$")
+ax.plot(y_grid, y_grid[σ_star_vfi[:, -1]], label="$\\sigma^{*}_{VFI}(\cdot, z_N)$")
+ax.legend(fontsize=12)
+plt.show()
+```
+
 ```{code-cell} ipython3
 :tags: [hide-output]
 
 print("Starting OPI.")
 qe.tic()
-out = optimistic_policy_iteration(model, m=100)
+σ_star_opi = optimistic_policy_iteration(model, m=100)
 elapsed = qe.toc()
-print(out)
+print(σ_star_opi)
 print(f"OPI completed in {elapsed} seconds.")
 ```
 
-Here's the plot of the Howard policy, as a function of $y$ at the highest and lowest values of $z$.
+Here's the plot of the optimal policy, as a function of $y$ at the highest and lowest values of $z$.
 
 ```{code-cell} ipython3
-model = create_investment_model()
-constants, sizes, arrays = model
-β, a_0, a_1, γ, c = constants
-y_size, z_size = sizes
-y_grid, z_grid, Q = arrays
-```
-
-```{code-cell} ipython3
-σ_star = howard_policy_iteration(model)
-
 fig, ax = plt.subplots(figsize=(9, 5))
 ax.plot(y_grid, y_grid, "k--", label="45")
-ax.plot(y_grid, y_grid[σ_star[:, 1]], label="$\\sigma^*(\cdot, z_1)$")
-ax.plot(y_grid, y_grid[σ_star[:, -1]], label="$\\sigma^*(\cdot, z_N)$")
+ax.plot(y_grid, y_grid[σ_star_opi[:, 1]], label="$\\sigma^{*}_{OPI}(\cdot, z_1)$")
+ax.plot(y_grid, y_grid[σ_star_opi[:, -1]], label="$\\sigma^{*}_{OPI}(\cdot, z_N)$")
 ax.legend(fontsize=12)
 plt.show()
 ```
 
+We observe that all the solvers produce the same output from the above three plots.
+
+
 Let's plot the time taken by each of the solvers and compare them.
 
 ```{code-cell} ipython3
 m_vals = range(5, 600, 40)
 ```
 
 ```{code-cell} ipython3
-model = create_investment_model()
 print("Running Howard policy iteration.")
 qe.tic()
 σ_pi = howard_policy_iteration(model)
diff --git a/lectures/opt_savings_2.md b/lectures/opt_savings_2.md
@@ -11,7 +11,6 @@ kernelspec:
   name: python3
 ---
 
-
 # Optimal Savings II: Alternative Algorithms
 
 ```{include} _admonition/gpu.md
@@ -65,7 +64,6 @@ We will use the following imports:
 import quantecon as qe
 import jax
 import jax.numpy as jnp
-from collections import namedtuple
 import matplotlib.pyplot as plt
 import time
 ```
@@ -171,7 +169,6 @@ def get_greedy(v, params, sizes, arrays):
     return jnp.argmax(B(v, params, sizes, arrays), axis=-1)
 
 get_greedy = jax.jit(get_greedy, static_argnums=(2,))
-
 ```
 
 We define a function to compute the current rewards $r_\sigma$ given policy $\sigma$,
@@ -248,7 +245,6 @@ def T_σ(v, σ, params, sizes, arrays):
 T_σ = jax.jit(T_σ, static_argnums=(3,))
 ```
 
-
 The function below computes the value $v_\sigma$ of following policy $\sigma$.
 
 This lifetime value is a function $v_\sigma$ that satisfies
@@ -325,11 +321,8 @@ def get_value(σ, params, sizes, arrays):
     return jax.scipy.sparse.linalg.bicgstab(partial_L_σ, r_σ)[0]
 
 get_value = jax.jit(get_value, static_argnums=(2,))
-
 ```
 
-
-
 ## Iteration
 
 
@@ -374,7 +367,6 @@ iterate_policy_operator = jax.jit(iterate_policy_operator,
                                   static_argnums=(4,))
 ```
 
-
 ## Solvers
 
 Now we define the solvers, which implement VFI, HPI and OPI.
@@ -395,7 +387,6 @@ def value_function_iteration(model, tol=1e-5):
 
 For OPI we will use a compiled JAX `lax.while_loop` operation to speed execution.
 
-
 ```{code-cell} ipython3
 def opi_loop(params, sizes, arrays, m, tol, max_iter):
     """
@@ -436,7 +427,6 @@ def optimistic_policy_iteration(model, m=10, tol=1e-5, max_iter=10_000):
     return σ_star
 ```
 
-
 Here's HPI.
 
 ```{code-cell} ipython3
@@ -457,9 +447,9 @@ def howard_policy_iteration(model, maxiter=250):
     return σ
 ```
 
-## Plots
+## Tests
 
-Create a model for consumption, perform policy iteration, and plot the resulting optimal policy function.
+Let's create a model for consumption, and plot the resulting optimal policy function using all the three algorithms and also check the time taken by each solver.
 
 ```{code-cell} ipython3
 model = create_consumption_model()
@@ -470,55 +460,82 @@ w_size, y_size = sizes
 w_grid, y_grid, Q = arrays
 ```
 
+```{code-cell} ipython3
+print("Starting HPI.")
+start_time = time.time()
+σ_star_hpi = howard_policy_iteration(model)
+elapsed = time.time() - start_time
+print(f"HPI completed in {elapsed} seconds.")
+```
+
 ```{code-cell} ipython3
 ---
 mystnb:
   figure:
-    caption: Optimal policy function
-    name: optimal-policy-function
+    caption: Optimal policy function (HPI)
+    name: optimal-policy-function-hpi
 ---
-σ_star = howard_policy_iteration(model)
 
 fig, ax = plt.subplots()
 ax.plot(w_grid, w_grid, "k--", label="45")
-ax.plot(w_grid, w_grid[σ_star[:, 1]], label="$\\sigma^*(\cdot, y_1)$")
-ax.plot(w_grid, w_grid[σ_star[:, -1]], label="$\\sigma^*(\cdot, y_N)$")
+ax.plot(w_grid, w_grid[σ_star_hpi[:, 1]], label="$\\sigma^{*}_{HPI}(\cdot, y_1)$")
+ax.plot(w_grid, w_grid[σ_star_hpi[:, -1]], label="$\\sigma^{*}_{HPI}(\cdot, y_N)$")
 ax.legend()
 plt.show()
 ```
 
-## Tests
-
-Here's a quick test of the timing of each solver.
-
-```{code-cell} ipython3
-model = create_consumption_model()
-```
-
 ```{code-cell} ipython3
-print("Starting HPI.")
+print("Starting VFI.")
 start_time = time.time()
-out = howard_policy_iteration(model)
+σ_star_vfi = value_function_iteration(model)
 elapsed = time.time() - start_time
-print(f"HPI completed in {elapsed} seconds.")
+print(f"VFI completed in {elapsed} seconds.")
 ```
 
 ```{code-cell} ipython3
-print("Starting VFI.")
-start_time = time.time()
-out = value_function_iteration(model)
-elapsed = time.time() - start_time
-print(f"VFI completed in {elapsed} seconds.")
+---
+mystnb:
+  figure:
+    caption: Optimal policy function (VFI)
+    name: optimal-policy-function-vfi
+---
+
+fig, ax = plt.subplots()
+ax.plot(w_grid, w_grid, "k--", label="45")
+ax.plot(w_grid, w_grid[σ_star_vfi[:, 1]], label="$\\sigma^{*}_{VFI}(\cdot, y_1)$")
+ax.plot(w_grid, w_grid[σ_star_vfi[:, -1]], label="$\\sigma^{*}_{VFI}(\cdot, y_N)$")
+ax.legend()
+plt.show()
 ```
 
 ```{code-cell} ipython3
 print("Starting OPI.")
 start_time = time.time()
-out = optimistic_policy_iteration(model, m=100)
+σ_star_opi = optimistic_policy_iteration(model, m=100)
 elapsed = time.time() - start_time
 print(f"OPI completed in {elapsed} seconds.")
 ```
 
+```{code-cell} ipython3
+---
+mystnb:
+  figure:
+    caption: Optimal policy function (OPI)
+    name: optimal-policy-function-opi
+---
+
+fig, ax = plt.subplots()
+ax.plot(w_grid, w_grid, "k--", label="45")
+ax.plot(w_grid, w_grid[σ_star_opi[:, 1]], label="$\\sigma^{*}_{OPI}(\cdot, y_1)$")
+ax.plot(w_grid, w_grid[σ_star_opi[:, -1]], label="$\\sigma^{*}_{OPI}(\cdot, y_N)$")
+ax.legend()
+plt.show()
+```
+
+We observe that all the solvers produce the same output from the above three plots.
+
+Now, let's create a plot to visualize the time differences among these algorithms.
+
 ```{code-cell} ipython3
 def run_algorithm(algorithm, model, **kwargs):
     start_time = time.time()
@@ -530,7 +547,6 @@ def run_algorithm(algorithm, model, **kwargs):
 ```
 
 ```{code-cell} ipython3
-model = create_consumption_model()
 σ_pi, pi_time = run_algorithm(howard_policy_iteration, model)
 σ_vfi, vfi_time = run_algorithm(value_function_iteration, model, tol=1e-5)