From 494a7f120e63ea3d999d46772b5c3b06b18280e9 Mon Sep 17 00:00:00 2001 From: kp992 Date: Mon, 1 Apr 2024 20:02:29 +0530 Subject: [PATCH 1/2] Add visual comparison of HPI, VFI and OPI outputs using plots. --- lectures/opt_invest.md | 66 +++++++++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 23 deletions(-) diff --git a/lectures/opt_invest.md b/lectures/opt_invest.md index 5c0a8231..f3a741f6 100644 --- a/lectures/opt_invest.md +++ b/lectures/opt_invest.md @@ -346,7 +346,6 @@ def value_function_iteration(model, tol=1e-5): For OPI we will use a compiled JAX `lax.while_loop` operation to speed execution. - ```{code-cell} ipython3 def opi_loop(params, sizes, arrays, m, tol, max_iter): """ @@ -389,7 +388,6 @@ def optimistic_policy_iteration(model, m=10, tol=1e-5, max_iter=10_000): Here's HPI - ```{code-cell} ipython3 def howard_policy_iteration(model, maxiter=250): """ @@ -408,62 +406,85 @@ def howard_policy_iteration(model, maxiter=250): return σ ``` - ```{code-cell} ipython3 :tags: [hide-output] model = create_investment_model() +constants, sizes, arrays = model +β, a_0, a_1, γ, c = constants +y_size, z_size = sizes +y_grid, z_grid, Q = arrays +``` + +```{code-cell} ipython3 +:tags: [hide-output] + print("Starting HPI.") qe.tic() -out = howard_policy_iteration(model) +σ_star_hpi = howard_policy_iteration(model) elapsed = qe.toc() -print(out) +print(σ_star_hpi) print(f"HPI completed in {elapsed} seconds.") ``` +Here's the plot of the Howard policy, as a function of $y$ at the highest and lowest values of $z$. + +```{code-cell} ipython3 +fig, ax = plt.subplots(figsize=(9, 5)) +ax.plot(y_grid, y_grid, "k--", label="45") +ax.plot(y_grid, y_grid[σ_star_hpi[:, 1]], label="$\\sigma^{*}_{HPI}(\cdot, z_1)$") +ax.plot(y_grid, y_grid[σ_star_hpi[:, -1]], label="$\\sigma^{*}_{HPI}(\cdot, z_N)$") +ax.legend(fontsize=12) +plt.show() +``` + ```{code-cell} ipython3 :tags: [hide-output] print("Starting VFI.") qe.tic() -out = value_function_iteration(model) +σ_star_vfi = value_function_iteration(model) elapsed = qe.toc() -print(out) +print(σ_star_vfi) print(f"VFI completed in {elapsed} seconds.") ``` +Here's the plot of the VFI, as a function of $y$ at the highest and lowest values of $z$. + +```{code-cell} ipython3 +fig, ax = plt.subplots(figsize=(9, 5)) +ax.plot(y_grid, y_grid, "k--", label="45") +ax.plot(y_grid, y_grid[σ_star_vfi[:, 1]], label="$\\sigma^{*}_{VFI}(\cdot, z_1)$") +ax.plot(y_grid, y_grid[σ_star_vfi[:, -1]], label="$\\sigma^{*}_{VFI}(\cdot, z_N)$") +ax.legend(fontsize=12) +plt.show() +``` + ```{code-cell} ipython3 :tags: [hide-output] print("Starting OPI.") qe.tic() -out = optimistic_policy_iteration(model, m=100) +σ_star_opi = optimistic_policy_iteration(model, m=100) elapsed = qe.toc() -print(out) +print(σ_star_opi) print(f"OPI completed in {elapsed} seconds.") ``` -Here's the plot of the Howard policy, as a function of $y$ at the highest and lowest values of $z$. +Here's the plot of the optimal policy, as a function of $y$ at the highest and lowest values of $z$. ```{code-cell} ipython3 -model = create_investment_model() -constants, sizes, arrays = model -β, a_0, a_1, γ, c = constants -y_size, z_size = sizes -y_grid, z_grid, Q = arrays -``` - -```{code-cell} ipython3 -σ_star = howard_policy_iteration(model) - fig, ax = plt.subplots(figsize=(9, 5)) ax.plot(y_grid, y_grid, "k--", label="45") -ax.plot(y_grid, y_grid[σ_star[:, 1]], label="$\\sigma^*(\cdot, z_1)$") -ax.plot(y_grid, y_grid[σ_star[:, -1]], label="$\\sigma^*(\cdot, z_N)$") +ax.plot(y_grid, y_grid[σ_star_opi[:, 1]], label="$\\sigma^{*}_{OPI}(\cdot, z_1)$") +ax.plot(y_grid, y_grid[σ_star_opi[:, -1]], label="$\\sigma^{*}_{OPI}(\cdot, z_N)$") ax.legend(fontsize=12) plt.show() ``` +We observe that all the solvers produce the same output from the above three plots. + + Let's plot the time taken by each of the solvers and compare them. ```{code-cell} ipython3 @@ -471,7 +492,6 @@ m_vals = range(5, 600, 40) ``` ```{code-cell} ipython3 -model = create_investment_model() print("Running Howard policy iteration.") qe.tic() σ_pi = howard_policy_iteration(model) From 968bd669b60fa16fe98702fbd2f29ae41e1494eb Mon Sep 17 00:00:00 2001 From: kp992 Date: Mon, 1 Apr 2024 20:18:17 +0530 Subject: [PATCH 2/2] Add visual comparison of three outputs --- lectures/opt_savings_2.md | 86 +++++++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 35 deletions(-) diff --git a/lectures/opt_savings_2.md b/lectures/opt_savings_2.md index 0cd7ed61..ba74fa51 100644 --- a/lectures/opt_savings_2.md +++ b/lectures/opt_savings_2.md @@ -11,7 +11,6 @@ kernelspec: name: python3 --- - # Optimal Savings II: Alternative Algorithms ```{include} _admonition/gpu.md @@ -65,7 +64,6 @@ We will use the following imports: import quantecon as qe import jax import jax.numpy as jnp -from collections import namedtuple import matplotlib.pyplot as plt import time ``` @@ -171,7 +169,6 @@ def get_greedy(v, params, sizes, arrays): return jnp.argmax(B(v, params, sizes, arrays), axis=-1) get_greedy = jax.jit(get_greedy, static_argnums=(2,)) - ``` We define a function to compute the current rewards $r_\sigma$ given policy $\sigma$, @@ -248,7 +245,6 @@ def T_σ(v, σ, params, sizes, arrays): T_σ = jax.jit(T_σ, static_argnums=(3,)) ``` - The function below computes the value $v_\sigma$ of following policy $\sigma$. This lifetime value is a function $v_\sigma$ that satisfies @@ -325,11 +321,8 @@ def get_value(σ, params, sizes, arrays): return jax.scipy.sparse.linalg.bicgstab(partial_L_σ, r_σ)[0] get_value = jax.jit(get_value, static_argnums=(2,)) - ``` - - ## Iteration @@ -374,7 +367,6 @@ iterate_policy_operator = jax.jit(iterate_policy_operator, static_argnums=(4,)) ``` - ## Solvers Now we define the solvers, which implement VFI, HPI and OPI. @@ -395,7 +387,6 @@ def value_function_iteration(model, tol=1e-5): For OPI we will use a compiled JAX `lax.while_loop` operation to speed execution. - ```{code-cell} ipython3 def opi_loop(params, sizes, arrays, m, tol, max_iter): """ @@ -436,7 +427,6 @@ def optimistic_policy_iteration(model, m=10, tol=1e-5, max_iter=10_000): return σ_star ``` - Here's HPI. ```{code-cell} ipython3 @@ -457,9 +447,9 @@ def howard_policy_iteration(model, maxiter=250): return σ ``` -## Plots +## Tests -Create a model for consumption, perform policy iteration, and plot the resulting optimal policy function. +Let's create a model for consumption, and plot the resulting optimal policy function using all the three algorithms and also check the time taken by each solver. ```{code-cell} ipython3 model = create_consumption_model() @@ -470,55 +460,82 @@ w_size, y_size = sizes w_grid, y_grid, Q = arrays ``` +```{code-cell} ipython3 +print("Starting HPI.") +start_time = time.time() +σ_star_hpi = howard_policy_iteration(model) +elapsed = time.time() - start_time +print(f"HPI completed in {elapsed} seconds.") +``` + ```{code-cell} ipython3 --- mystnb: figure: - caption: Optimal policy function - name: optimal-policy-function + caption: Optimal policy function (HPI) + name: optimal-policy-function-hpi --- -σ_star = howard_policy_iteration(model) fig, ax = plt.subplots() ax.plot(w_grid, w_grid, "k--", label="45") -ax.plot(w_grid, w_grid[σ_star[:, 1]], label="$\\sigma^*(\cdot, y_1)$") -ax.plot(w_grid, w_grid[σ_star[:, -1]], label="$\\sigma^*(\cdot, y_N)$") +ax.plot(w_grid, w_grid[σ_star_hpi[:, 1]], label="$\\sigma^{*}_{HPI}(\cdot, y_1)$") +ax.plot(w_grid, w_grid[σ_star_hpi[:, -1]], label="$\\sigma^{*}_{HPI}(\cdot, y_N)$") ax.legend() plt.show() ``` -## Tests - -Here's a quick test of the timing of each solver. - -```{code-cell} ipython3 -model = create_consumption_model() -``` - ```{code-cell} ipython3 -print("Starting HPI.") +print("Starting VFI.") start_time = time.time() -out = howard_policy_iteration(model) +σ_star_vfi = value_function_iteration(model) elapsed = time.time() - start_time -print(f"HPI completed in {elapsed} seconds.") +print(f"VFI completed in {elapsed} seconds.") ``` ```{code-cell} ipython3 -print("Starting VFI.") -start_time = time.time() -out = value_function_iteration(model) -elapsed = time.time() - start_time -print(f"VFI completed in {elapsed} seconds.") +--- +mystnb: + figure: + caption: Optimal policy function (VFI) + name: optimal-policy-function-vfi +--- + +fig, ax = plt.subplots() +ax.plot(w_grid, w_grid, "k--", label="45") +ax.plot(w_grid, w_grid[σ_star_vfi[:, 1]], label="$\\sigma^{*}_{VFI}(\cdot, y_1)$") +ax.plot(w_grid, w_grid[σ_star_vfi[:, -1]], label="$\\sigma^{*}_{VFI}(\cdot, y_N)$") +ax.legend() +plt.show() ``` ```{code-cell} ipython3 print("Starting OPI.") start_time = time.time() -out = optimistic_policy_iteration(model, m=100) +σ_star_opi = optimistic_policy_iteration(model, m=100) elapsed = time.time() - start_time print(f"OPI completed in {elapsed} seconds.") ``` +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: Optimal policy function (OPI) + name: optimal-policy-function-opi +--- + +fig, ax = plt.subplots() +ax.plot(w_grid, w_grid, "k--", label="45") +ax.plot(w_grid, w_grid[σ_star_opi[:, 1]], label="$\\sigma^{*}_{OPI}(\cdot, y_1)$") +ax.plot(w_grid, w_grid[σ_star_opi[:, -1]], label="$\\sigma^{*}_{OPI}(\cdot, y_N)$") +ax.legend() +plt.show() +``` + +We observe that all the solvers produce the same output from the above three plots. + +Now, let's create a plot to visualize the time differences among these algorithms. + ```{code-cell} ipython3 def run_algorithm(algorithm, model, **kwargs): start_time = time.time() @@ -530,7 +547,6 @@ def run_algorithm(algorithm, model, **kwargs): ``` ```{code-cell} ipython3 -model = create_consumption_model() σ_pi, pi_time = run_algorithm(howard_policy_iteration, model) σ_vfi, vfi_time = run_algorithm(value_function_iteration, model, tol=1e-5)