In [None]:
# === Environment Setup ===
import os, sys, math, time, random, json, textwrap, warnings
from typing import Callable
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from numba import njit, prange
from scipy.optimize import minimize_scalar
from scipy.stats import norm
from IPython.display import Image, Markdown, display

# --- Configuration ---
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams.update({'figure.dpi': 130, 'font.size': 12, 'axes.titlesize': 'x-large',
    'axes.labelsize': 'large', 'xtick.labelsize': 'medium', 'ytick.labelsize': 'medium'})
np.set_printoptions(suppress=True, linewidth=120, precision=4)

# --- Utility Functions ---
def note(msg, **kwargs):
    display(Markdown(f"<div class='alert alert-info'>📝 {textwrap.fill(msg, width=100)}</div>"))
def sec(title):
    print(f"\n{100*'='}\n| {title.upper()} |\n{100*'='}")

note("Environment initialized.")

# Part 3: Dynamic Models
## Chapter 3.1: Introduction to Stochastic Dynamic Programming

### Table of Contents
1.  [The Principle of Optimality: A Cake Eating Problem](#1.-The-Principle-of-Optimality:-A-Cake-Eating-Problem)
2.  [The Bellman Equation for Infinite Horizon Problems](#2.-The-Bellman-Equation-for-Infinite-Horizon-Problems)
    *   [2.1 The Bellman Operator and Contraction Mappings](#2.1-The-Bellman-Operator-and-Contraction-Mappings)
    *   [2.2 The State-Action Value Function (Q-Function)](#2.2-The-State-Action-Value-Function-(Q-Function))
3.  [A Canonical Model: The Stochastic Growth Model](#3.-A-Canonical-Model:-The-Stochastic-Growth-Model)
    *   [3.1 Discretizing the Income Process](#3.1-Discretizing-the-Income-Process)
    *   [3.2 Building the Model Primitives](#3.2-Building-the-Model-Primitives)
4.  [Numerical Solution Algorithms](#4.-Numerical-Solution-Algorithms)
    *   [4.1 Value Function Iteration (VFI)](#4.1-Value-Function-Iteration-(VFI))
    *   [4.2 Policy Function Iteration (PFI)](#4.2-Policy-Function-Iteration-(PFI))
    *   [4.3 Howard's Policy Improvement Algorithm](#4.3-Howard's-Policy-Improvement-Algorithm)
5.  [Analysis of the Solution](#5.-Analysis-of-the-Solution)
    *   [5.1 The Value and Policy Functions](#5.1-The-Value-and-Policy-Functions)
    *   [5.2 Visualizing VFI Convergence](#5.2-Visualizing-VFI-Convergence)
6.  [Simulating the Stationary Distribution](#6.-Simulating-the-Stationary-Distribution)
7.  [Chapter Summary](#7.-Chapter-Summary)
8.  [Exercises](#8.-Exercises)

### Introduction: The Logic of Sequential Decisions
Dynamic programming (DP) is the essential framework for analyzing problems that involve a sequence of decisions over time. The core insight, developed by Richard Bellman, is the **principle of optimality**: an optimal policy has the property that whatever the initial state and initial decision are, the remaining decisions must constitute an optimal policy with regard to the state resulting from the first decision.

This allows us to break down a single, complex, infinite-horizon problem into a sequence of smaller, simpler, two-period problems. This recursive structure is formalized in the **Bellman equation**.

This chapter will:
1.  Illustrate the core concepts with a simple **cake eating problem** solved via backward induction.
2.  Introduce the **Bellman operator** and the **Contraction Mapping Theorem** that guarantees a unique solution exists for infinite horizon problems.
3.  Implement the core solution algorithms—**Value Function Iteration (VFI)**, **Policy Function Iteration (PFI)**, and **Howard's Policy Improvement**—from scratch.
4.  Solve the canonical **stochastic growth model** (Aiyagari model).
5.  Simulate the model to find the **stationary distribution** of wealth.

### 1. The Principle of Optimality: A Cake Eating Problem
To build intuition, we start with one of the simplest DP problems: the "cake eating" problem. An agent has a cake of size $x_t$ at time $t$. They must decide how much to eat, $c_t$, and how much to leave, $x_{t+1}$. The problem is finite, lasting for $T$ periods.
The agent's problem is to maximize lifetime utility: $ \max_{c_0, ..., c_T} \sum_{t=0}^{T} \beta^t u(c_t) $, subject to the resource constraint $x_{t+1} = x_t - c_t$. We solve this using **backward induction**.

- **Period T:** The agent eats whatever is left. The value function is $V_T(x_T) = u(x_T)$.
- **Period T-1:** The agent chooses $c_{T-1}$ to maximize current plus discounted future utility. This gives the Bellman equation:
$$ V_{T-1}(x_{T-1}) = \max_{0 \le c_{T-1} \le x_{T-1}} \{ u(c_{T-1}) + \beta V_T(x_{T-1} - c_{T-1}) \} $$

In [None]:
sec("Solving the Finite Horizon Cake Eating Problem")
T = 10; BETA_CE = 0.95; X_GRID = np.linspace(0.01, 10, 100)
@njit
def u_log(c): return np.log(c)

V_storage = np.zeros((T + 1, len(X_GRID)))
C_storage = np.zeros((T + 1, len(X_GRID)))

V_storage[T, :] = u_log(X_GRID)
C_storage[T, :] = X_GRID

for t in range(T - 1, -1, -1):
    V_next = V_storage[t + 1, :]
    for i, x in enumerate(X_GRID):
        def objective(c): # Minimize negative utility
            x_next = x - c
            v_next_interp = np.interp(x_next, X_GRID, V_next)
            return -(u_log(c) + BETA_CE * v_next_interp)
        res = minimize_scalar(objective, bounds=(1e-6, x), method='bounded')
        V_storage[t, i] = -res.fun
        C_storage[t, i] = res.x

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
ax1.plot(X_GRID, V_storage[0, :], label='V_0(x)'); ax1.plot(X_GRID, V_storage[T-1, :], label=f'V_{T-1}(x)'); ax1.plot(X_GRID, V_storage[T, :], label=f'V_T(x)')
ax1.set_title('Value Functions'); ax1.set(xlabel='Cake Size (x)', ylabel='Value'); ax1.legend()
ax2.plot(X_GRID, C_storage[0, :], label='c_0(x)'); ax2.plot(X_GRID, C_storage[T-1, :], label=f'c_{T-1}(x)'); ax2.plot(X_GRID, C_storage[T, :], label=f'c_T(x)')
ax2.set_title('Policy Functions (Consumption)'); ax2.set(xlabel='Cake Size (x)', ylabel='Consumption (c)'); ax2.legend()
plt.tight_layout(); plt.show()

### 2. The Bellman Equation for Infinite Horizon Problems
For an infinite horizon stochastic problem, the Bellman equation takes a more general, recursive form:
$$ V(s) = \max_{a \in \Gamma(s)} \{ u(s, a) + \beta E[V(s')] \} $$
where $s$ is the state, $a$ is the action, $u(s,a)$ is the reward, and $s'$ is the next period's state. The solution is a **value function** $V(s)$ and a **policy function** $\sigma(s) = \arg\max \{...\}$.

#### 2.1 The Bellman Operator and Contraction Mappings
The **Bellman Operator**, $T$, takes a candidate value function $v$ and returns a new function $Tv$:
$$ (Tv)(s) = \max_{a \in \Gamma(s)} \{ u(s, a) + \beta E[v(s')] \} $$
A solution to the Bellman equation is a **fixed point** of this operator, i.e., $TV = V$. If $\beta < 1$ and the utility function is bounded, the Bellman operator is a **contraction mapping** on the space of bounded continuous functions. This is because applying the operator shrinks the distance between any two functions $v_1$ and $v_2$ under the sup-norm:
$$ ||Tv_1 - Tv_2||_\infty = \sup_s |(Tv_1)(s) - (Tv_2)(s)| \le \beta \sup_s |v_1(s) - v_2(s)| = \beta ||v_1 - v_2||_\infty $$
The **Contraction Mapping Theorem** then guarantees that $T$ has a unique fixed point $V^*$, and for any initial guess $v_0$, the sequence $v_{k+1} = T v_k$ will converge to $V^*$. This justifies **Value Function Iteration (VFI)**.

#### 2.2 The State-Action Value Function (Q-Function)
An alternative representation, central to reinforcement learning, is the **state-action value function**, or **Q-function**. It represents the value of being in state $s$ and taking action $a$:
$$ Q(s, a) = u(s, a) + \beta E[V(s')] $$
The value function is then simply the maximum over all possible actions: $V(s) = \max_a Q(s, a)$. Substituting this into the first equation gives a Bellman equation for Q:
$$ Q(s, a) = u(s, a) + \beta E[\max_{a'} Q(s', a')] $$

### 3. A Canonical Model: The Stochastic Growth Model
We solve a version of the Bewley-Huggett-Aiyagari model of household savings with uninsurable income risk. The Bellman equation is:
$$ V(a, y) = \max_{0 < a' \le (1+r)a+y} \left\{ u((1+r)a+y-a') + \beta \sum_{y'} P(y'|y) V(a', y') \right\} $$

#### 3.1 Discretizing the Income Process
We assume log-income, $z_t = \log(y_t)$, follows an AR(1) process: $z_{t+1} = \rho z_t + \sigma \epsilon_{t+1}$. We use **Tauchen's method** to approximate this with a finite-state Markov chain.

In [None]:
sec("Discretizing the Income Process")
@njit
def tauchen(rho, sigma_u, m=3, n=7):
    z_max = m * sigma_u / np.sqrt(1 - rho**2)
    z_grid = np.linspace(-z_max, z_max, n)
    step = (z_grid[-1] - z_grid[0]) / (n - 1)
    P = np.empty((n, n))
    for i in range(n):
        for j in range(n):
            z_j_low = z_grid[j] - step/2
            z_j_high = z_grid[j] + step/2
            P[i, j] = norm.cdf((z_j_high - rho * z_grid[i]) / sigma_u) - \
                      norm.cdf((z_j_low - rho * z_grid[i]) / sigma_u)
    return z_grid, P

rho_y, sigma_y = 0.95, 0.1
z_grid, P_TRANS = tauchen(rho_y, sigma_y)
Y_STATES = np.exp(z_grid)

#### 3.2 Building the Model Primitives
To solve the model, we need to create the reward matrix `R` and the transition probability matrix `Q`. The state is the tuple `(assets, income)`, and the action is the choice of next period's assets `a'`. `R[s, a]` is the utility from being in state `s` and choosing action `a`. `Q[s, a, s']` is the probability of transitioning to state `s'` given state `s` and action `a`.

In [None]:
sec("Building the Model Primitives")

# --- 1. Define Model Parameters ---
sgm_params = {
    'beta': 0.96,       # Discount factor
    'gamma': 2.0,       # CRRA coefficient
    'r': 0.03,          # Risk-free interest rate
    'a_grid': np.linspace(0.01, 50, 150), # Asset grid
    'y_states': Y_STATES, # Income states from Tauchen
    'P_trans': P_TRANS  # Transition matrix from Tauchen
}

n_a = len(sgm_params['a_grid'])
n_y = len(sgm_params['y_states'])

# --- 2. Create Reward and Transition Matrices ---
@njit
def u_crra(c, gamma):
    return (c**(1 - gamma)) / (1 - gamma)

R = np.full((n_a, n_y, n_a), -1e10)
for i in range(n_a):
    for j in range(n_y):
        for k in range(n_a):
            a = sgm_params['a_grid'][i]
            y = sgm_params['y_states'][j]
            a_prime = sgm_params['a_grid'][k]
            c = (1 + sgm_params['r']) * a + y - a_prime
            if c > 0: R[i, j, k] = u_crra(c, sgm_params['gamma'])

Q = np.zeros((n_a, n_y, n_a, n_y))
for j in range(n_y):
    Q[:, j, :, :] = sgm_params['P_trans'][j, :]

note("Built R and Q matrices for the stochastic growth model.")

### 4. Numerical Solution Algorithms
For pedagogical clarity, we now implement the core solution algorithms directly in the notebook, using Numba's `@njit` decorator to ensure high performance.

#### 4.1 Value Function Iteration (VFI)
VFI directly implements the Bellman operator as an update rule.

In [None]:
@njit
def value_function_iteration(R, Q, beta, tol=1e-6, max_iter=1000):
    """Solves a discrete DP model using value function iteration.

    Parameters
    ----------
    R : np.ndarray, shape (n_s1, n_s2, n_a)
        Reward matrix.
    Q : np.ndarray, shape (n_s1, n_s2, n_a, n_s2)
        State transition probability matrix.
    beta : float
        Discount factor.
    tol : float, optional
        Tolerance for convergence.
    max_iter : int, optional
        Maximum number of iterations.

    Returns
    -------
    V : np.ndarray, shape (n_s1, n_s2)
        Converged value function.
    history : list
        List of value functions at each iteration.
    """
    V = np.zeros((n_a, n_y))
    history = []
    for i in range(max_iter):
        EV = Q @ V
        V_new = np.empty_like(V)
        for s_i in range(n_a):
            for s_j in range(n_y):
                q_s = R[s_i, s_j, :] + beta * EV[s_i, s_j, :, :].sum(axis=1)
                V_new[s_i, s_j] = np.max(q_s)
        
        if np.max(np.abs(V - V_new)) < tol: break
        V = V_new
        history.append(V.flatten())
    return V, history

note("VFI algorithm compiled.")

#### 4.2 Policy Function Iteration (PFI)
PFI alternates between evaluating a policy and improving upon it.

In [None]:
@njit
def policy_function_iteration(R, Q, beta, tol=1e-6, max_iter=100):
    # ... (Implementation would go here)
    # This is more complex to implement efficiently with Numba due to matrix inversion
    # For this course, we focus on VFI and Howard's PFI.
    pass
note("PFI is conceptually important but often less practical to implement from scratch than VFI.")

#### 4.3 Howard's Policy Improvement Algorithm
Howard's PFI is a more practical variant that avoids the costly matrix inversion of the policy evaluation step. Instead, it approximates the value of the current policy by running a fixed, small number of VFI steps.

In [None]:
@njit
def howard_policy_iteration(R, Q, beta, m=20, max_iter=200):
    """Solves a discrete DP model using Howard's Policy Iteration.
    
    This is a faster variant of PFI that approximates the policy evaluation
    step with a fixed number of VFI steps.
    
    Parameters
    ----------
    m : int, optional
        Number of VFI steps to run in the evaluation phase.
    max_iter : int, optional
        Maximum number of policy improvement iterations.
        
    Returns
    -------
    V : np.ndarray
        Converged value function.
    policy : np.ndarray
        Optimal policy array.
    """
    V = np.zeros((n_a, n_y))
    policy = np.zeros((n_a, n_y), dtype=np.int32)
    for i in range(max_iter):
        # Policy Evaluation (m VFI steps)
        for _ in range(m):
            EV = Q @ V
            V_policy = np.empty_like(V)
            for s_i in range(n_a):
                for s_j in range(n_y):
                    a = policy[s_i, s_j]
                    V_policy[s_i, s_j] = R[s_i, s_j, a] + beta * EV[s_i, s_j, a, :].sum()
            V = V_policy
        
        # Policy Improvement
        EV = Q @ V
        policy_new = np.empty_like(policy)
        for s_i in range(n_a):
            for s_j in range(n_y):
                q_s = R[s_i, s_j, :] + beta * EV[s_i, s_j, :, :].sum(axis=1)
                policy_new[s_i, s_j] = np.argmax(q_s)
        
        if np.array_equal(policy, policy_new): break
        policy = policy_new
    return V, policy

note("Howard's PFI algorithm compiled.")

### 5. Analysis of the Solution
Now we solve the model and analyze the results.

In [None]:
sec("Solving the SGM and Comparing Runtimes")
BETA = sgm_params['beta']
print("Solving with VFI...")
%time V_star_vfi, v_history = value_function_iteration(R, Q, BETA)

print("\nSolving with Howard's PFI...")
%time V_star_hpi, policy_indices_hpi = howard_policy_iteration(R, Q, BETA)

# Get the final policy from VFI for comparison
EV = Q @ V_star_vfi
policy_indices_vfi = np.empty((n_a, n_y), dtype=np.int32)
for s_i in range(n_a):
    for s_j in range(n_y):
        q_s = R[s_i, s_j, :] + BETA * EV[s_i, s_j, :, :].sum(axis=1)
        policy_indices_vfi[s_i, s_j] = np.argmax(q_s)

##### 5.1 The Value and Policy Functions
- **Value Function:** $V(a,y)$ is increasing and concave in assets $a$, reflecting diminishing marginal utility.
- **Policy Function:** $\sigma(a,y)$ shows the optimal level of assets to hold next period. We can see the effect of **precautionary savings**: an agent saves more when their current income is lower to buffer against future bad shocks.

In [None]:
sec("Analyzing the Solution")

# --- 1. Interactive 3D Value Function Plot ---
import plotly.graph_objects as go
A_grid_mesh, Y_grid_mesh = np.meshgrid(sgm_params['a_grid'], sgm_params['y_states'])

fig = go.Figure(data=[go.Surface(z=V_star_vfi.T, x=A_grid_mesh, y=Y_grid_mesh, colorscale='Viridis')])
fig.update_layout(title='Interactive 3D Value Function V(a, y)',
                  scene = dict(
                      xaxis_title='Assets (a)',
                      yaxis_title='Income (y)',
                      zaxis_title='Value'),
                  autosize=False, width=800, height=600,
                  margin=dict(l=65, r=50, b=65, t=90))
fig.show()

# --- 2. 2D Policy Function Plot ---
policy_star = sgm_params['a_grid'][policy_indices_vfi]
fig, ax = plt.subplots(figsize=(10, 7))
ax.plot(sgm_params['a_grid'], policy_star[:, 0], label=f'$\sigma(a, y_L)$')
ax.plot(sgm_params['a_grid'], policy_star[:, -1], label=f'$\sigma(a, y_H)$')
ax.plot(sgm_params['a_grid'], sgm_params['a_grid'], 'k--', label="45-degree line (a'=a)")
ax.set_title("Policy Functions: Next Period's Assets (a')"); 
ax.set(xlabel='Current Assets (a)', ylabel="Next Period's Assets (a')")
ax.legend()
plt.show()

#### 5.2 Visualizing VFI Convergence
The plot below provides a direct visualization of the **Contraction Mapping Theorem** in action. We start with an initial guess ($V_0=0$) and repeatedly apply the Bellman operator. Each application brings the function closer to the true value function, with the distance between successive iterates shrinking, until it converges to the unique fixed point.

In [None]:
sec("Visualizing VFI Convergence")
fig, ax = plt.subplots(figsize=(10, 6))
plot_indices = [0, 2, 5, 10, 20, 40, 60, 100]
for i in plot_indices:
    if i < len(v_history):
        V_interim = v_history[i].reshape(n_a, n_y)
        ax.plot(A_GRID, V_interim[:, -1], label=f'Iteration {i}')

ax.plot(A_GRID, V_star_vfi[:, -1], color='k', lw=2, label=f'Converged V')
ax.set_title('Convergence of the Value Function (High Income State)')
ax.set(xlabel='Assets (a)', ylabel='Value'); ax.legend(loc='upper left')
plt.show()

### 6. Simulating the Stationary Distribution
After solving for the optimal policy, a key economic question is what the long-run distribution of assets looks like. We can find this by simulating a large panel of households over a long time horizon, where each household follows the optimal policy rule.

In [None]:
sec("Simulating the Stationary Asset Distribution")
@njit
def simulate_asset_distribution(policy_indices, P_trans, n_agents=5000, t_steps=500):
    a_indices = np.zeros(n_agents, dtype=np.int32)
    y_indices = np.zeros(n_agents, dtype=np.int32)
    
    for t in range(t_steps):
        # Update assets based on policy
        new_a_indices = np.empty_like(a_indices)
        for i in range(n_agents):
            new_a_indices[i] = policy_indices[a_indices[i], y_indices[i]]
        a_indices = new_a_indices
        
        # Update income based on Markov chain
        new_y_indices = np.empty_like(y_indices)
        for i in range(n_agents):
            new_y_indices[i] = np.searchsorted(np.cumsum(P_trans[y_indices[i], :]), np.random.rand())
        y_indices = new_y_indices
        
    return A_GRID[a_indices]

final_assets = simulate_asset_distribution(policy_indices_vfi, P_TRANS)

plt.figure(figsize=(10, 6))
plt.hist(final_assets, bins=50, density=True)
plt.title('Stationary Distribution of Assets')
plt.xlabel('Asset Holdings'); plt.ylabel('Density')
plt.show()

### 7. Chapter Summary
- **The Bellman Equation** is the cornerstone of dynamic programming, breaking down complex sequential problems into simpler recursive steps.
- **The Contraction Mapping Theorem** provides the theoretical guarantee that for a discounted dynamic programming problem, a unique value function exists and that Value Function Iteration will converge to it.
- **Solution Algorithms:** VFI is simple and robust. Policy Function Iteration (especially Howard's variant) is often much faster as it takes more intelligent steps, but can be more complex to implement.
- **Discretization:** To solve continuous-state models, we must first discretize the state space. Tauchen's method is a standard way to approximate a continuous AR(1) process with a discrete Markov chain.
- **Stationary Distribution:** By simulating the model with the optimal policy, we can analyze the long-run aggregate implications of individual behavior, such as the emergence of wealth inequality from uninsurable income risk.

### 8. Exercises

1.  **The Role of Patience:** Re-solve the stochastic growth model with a lower discount factor (`beta = 0.90`), representing a more impatient agent. Plot the new policy function and the new stationary asset distribution. How does impatience affect savings and long-run wealth? Provide an economic intuition.

2.  **The Role of Risk Aversion:** Re-solve the model with a higher coefficient of relative risk aversion (`gamma = 4.0`). How does this affect the policy function and the amount of precautionary savings? How does the stationary distribution change?

3.  **The Role of Income Persistence:** How does the stationary wealth distribution change if income shocks are less persistent? Re-solve and re-simulate the model using a lower `rho_y` (e.g., 0.8) in the Tauchen method. Is wealth inequality higher or lower? Why?

4.  **Certainty Equivalence:** Consider a version of the model with no income risk, where income is always the unconditional mean income. Solve for the optimal policy function in this deterministic case. How does it compare to the policy functions from the stochastic model? The difference in savings between the deterministic and stochastic models for an agent in the low-income state is a measure of their precautionary savings.

5.  **Alternative Utility:** Re-solve the model using an exponential utility function, $u(c) = -e^{-\gamma c}$, which exhibits constant absolute risk aversion (CARA) instead of constant relative risk aversion (CRRA). How do the resulting policy functions differ in shape from the CRRA case?