# Backward induction from terminal value function

In [297]:
import numpy as np
from scipy.optimize import minimize
from scipy.optimize import fsolve
from scipy.interpolate import RectBivariateSpline
from numba import jit
import copy

In [298]:
# Define parameters
beta = 0.9 # Discount rate
r = 0.1 # Return on A
a = 1 # Productivity in production of K
delta = 0.05 # Depreciation rate of K
alpha = 0.5 # Weight on leisure
gamma = 5 # Weight on bequest
sigma = 0.5 # Unused
params = (beta, r, a, delta, alpha, gamma, sigma)

# Grid parameters
A_grid_min = 1
A_grid_max = 10
A_grid_size = 10
K_grid_min = 1
K_grid_max = 10
K_grid_size = 10
T = 10

In [299]:
# Define g(x) and g'(x) (labor market equilibrium tradeoff) function
@jit
def g(x):
    k = 5/4
    s = 1/2
    a = 1/(k**0.5 - s)
    y = k - (x/a + s)**2
    
    return y

@jit
def g_prime(x):
    k = 5/4
    s = 1/2
    a = 1/(k**0.5 - s)
    y = (-2*(x/a + s))/a
    
    return y

In [300]:
# Define terminal utility
@jit
def utility_T(values, state, params):
    beta, r, a, delta, alpha, gamma, sigma = params # Unpack parameters
    A0, K0 = state # Unpack chosen state
    c, l = values # Unpack values to evaluate at
    
    # Put constraints into utility function
    A1 = (1+r)*A0 + (1-l)*K0 - c
    
    # Evaluate utility
    u = - np.log(c) - alpha*np.log(l) - gamma*np.log(A1)
    return u

In [301]:
# Set up state grids
K_grid = np.linspace(K_grid_min, K_grid_max, K_grid_size)
A_grid = np.linspace(A_grid_min, A_grid_max, A_grid_size)
T_grid = np.linspace(0, T, T+1, dtype = int)

# Value grid
V_grid = np.ones((A_grid_size, K_grid_size, T))

# Policy grid
c_policy = np.ones((A_grid_size, K_grid_size, T))
l_policy = np.ones((A_grid_size, K_grid_size, T))
x_policy = np.ones((A_grid_size, K_grid_size, T))

# Store interpolated value function in dict
V_dict = {}

In [302]:
# Compute value function and policy at t = T
for a_ind, A in np.ndenumerate(A_grid):
    for k_ind, K in np.ndenumerate(K_grid):
        
        # Compute value function at terminal state
        state = (A, K)
        results = minimize(utility_T, [0.5, 0.5], bounds = ((0, None), (0, 1)), args = (state, params,), tol = 1e-16)
        
        # Store policy results and value function
        V_grid[a_ind, k_ind, T-1] = -results.fun
        c_policy[a_ind, k_ind, T-1] = results.x[0]
        l_policy[a_ind, k_ind, T-1] = results.x[1]

In [303]:
# Interpolate terminal value function and store in dict
V_dict[T-1] = RectBivariateSpline(A_grid, K_grid, V_grid[:, :, T-1])

In [304]:
# Wrap evaluation of interpolation in a function
def value_function(state, V_T_interpolate):
    A, K = state
    return RectBivariateSpline.__call__(V_T_interpolate, A, K)[0][0]

In [305]:
# Define objective function for t < T
def objective(values, state, V_T_interpolate, params):
    beta, r, a, delta, alpha, gamma, sigma = params # Unpack parameters
    A0, K0 = state # Unpack current state
    c, l, x = values # Unpack values to evaluate at
    
    # State evolution
    A1 = (1+r)*A0 + g(x)*(1-l)*K0 - c
    K1 = (1 + a*x*(1-l) - delta)*K0
    
    # No borrowing and strictly positive leisure and consumption
    if A1 < 0 or c <= 0 or l <= 0:
        return 1e9999
    
    # Value next period
    V1 = value_function([A1, K1], V_T_interpolate)
    
    # Evaluate objective
    u = - np.log(c) - alpha*np.log(l) - beta*V1
    return u

In [306]:
# Compute value function and policy at t
for t in range(T-2, -1, -1):
    for a_ind, A in np.ndenumerate(A_grid):
        for k_ind, K in np.ndenumerate(K_grid):

            state = (A, K)

            # Use interpolated value function next period
            V_interpolate = V_dict[t+1]

            # Compute value function at t
            results = minimize(objective, [0.5, 0.5, 0.5], bounds = ((0, None), (0, 1), (0, 1)), args = (state, V_interpolate, params,), tol = 1e-6)

            # Store results
            V_grid[a_ind, k_ind, t] = -results.fun
            c_policy[a_ind, k_ind, t] = results.x[0]
            l_policy[a_ind, k_ind, t] = results.x[1]
            x_policy[a_ind, k_ind, t] = results.x[2]

            # Interpolate value function at t and store in dict
            V_dict[t] = RectBivariateSpline(A_grid, K_grid, V_grid[:, :, t])

In [307]:
c_policy[5,:,9]

array([1.16923081, 1.32307703, 1.4769233 , 1.63076911, 1.78461547,
       1.9384613 , 2.09230751, 2.24615437, 2.39999941, 1.45168255])

In [308]:
l_policy[5,:,9]

array([0.58461537, 0.3307692 , 0.24615384, 0.20384615, 0.17846153,
       0.16153845, 0.14945054, 0.14038462, 0.13333334, 0.19291641])

In [309]:
x_policy[5,:,9]

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [310]:
V_grid[5,:,9]

array([ 8.71686543,  9.17378255,  9.68605582, 10.18630565, 10.660716  ,
       11.10705138, 11.5264004 , 11.92082099, 12.29255048, 12.45830807])

In [311]:
K_grid

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])

In [312]:
A_grid

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])

In [313]:
results = minimize(utility_T, [0.5, 0.5], bounds = ((0, None), (0, 1)), args = ([56, 45], params,), tol = 1e-16)

Encountered the use of a type that is scheduled for deprecation: type 'reflected list' found for argument 'state' of function 'utility_T'.

For more information visit http://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-reflection-for-list-and-set-types
[1m
File "<ipython-input-300-08e905ac708d>", line 3:[0m
[1m@jit
[1mdef utility_T(values, state, params):
[0m[1m^[0m[0m
[0m


In [314]:
results

      fun: -23.97961340339937
 hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64>
      jac: array([ 0.00000000e+00, -1.77635684e-06])
  message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 66
      nit: 15
   status: 0
  success: True
        x: array([16.40002927,  0.18222205])