# Problem Set 3
## Jonathan Elliott
## December 8, 2017

In [135]:
%pylab inline
import numpy as np
import pandas as pd
from scipy.optimize import minimize

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


### Part 1: Computing the HZ Model

In [136]:
# Underflow safe function
def logsumexp(X): # x is a 3-d matrix, len(x_grid) x len(lim_seq) x len([0, 1])
    A = np.amax(X, axis=2)
    return np.log(np.sum(np.exp(X - A[:,:,None]), axis=2)) + A

Calculate (analytically) the gradient of the log-likelihood function in Rust with respect to the parameters of
the model and write down the analytic results.

### Part 2: Estimation via MLE and MPEC

Estimate the model using the NPMLE approach of Rust. You will want to use the gradient.

In [137]:
# Import data
heads = ["ID", "x_t", "d_t"]
dataset = pd.read_csv('rustdata1.csv', names=heads)
data = {}
for head in heads:
    data[head.lower()] = np.asfarray(np.array(dataset[head][1:]), float)
    
# Parameters
k = 5 # number of transition probabilities
beta = 0.95 # β isn't identified, so need to provide it

# Compute transition probabilities from data
Delta_xt = np.array([])
for i in np.arange(1, len(data['x_t'])):
    if data['d_t'][i-1] == 0:
        Delta_xt = np.append(Delta_xt, data['x_t'][i] - data['x_t'][i-1])
    if data['d_t'][i-1] == 1:
        Delta_xt = np.append(Delta_xt, data['x_t'][i] - 0)
n = len(Delta_xt)
transition_pr = np.array([])
lim_seq = np.linspace(0, 2, k)
for i in np.arange(1, len(lim_seq)): # I think data in in 1000s
    p_hat = np.count_nonzero((Delta_xt >= lim_seq[i-1]) * (Delta_xt < lim_seq[i])) / n
    transition_pr = np.append(transition_pr, p_hat)
transition_pr = np.append(transition_pr, 1 - np.sum(transition_pr))

# Define our arbitrary cost function
q = 1 # number of parameters in the following function
def c(x, theta_1): # currently defined as a linear function, which apparently Rust preferred
    return theta_1 * x

# Define profit function
def pi(x, i, theta_1, RC): # x is an array, i is not
    if i == 0:
        pi = -c(x, theta_1)
    if i == 1:
        pi = -(RC - c(np.zeros(x.shape), theta_1))
    return pi

# Compute EV(x, θ) via the fixed point
l = 4 # how fine the discretized x grid is, l=1 is as fine as Δy from lim_seq, l=2 is twice as fine, etc.
def EV_fp(theta_1, RC):
    x_max = 14 # maximum value of x - in data, nothing above 13.8...
    Delta_y = lim_seq[1] # Δy - the size of the increments in lim_seq
    x = np.linspace(0, x_max, x_max/Delta_y*l+1) # need such that it includes the addition of Δy, which is characterized by lim_seq
    maxx = np.amax(x) # maximum value in the discretized x grid
    i = np.array([0, 1])
    
    # Construct the payoffs
    Deltay = np.tile(lim_seq, (len(x), 1))
    xplusDeltay = np.tile(x, (len(lim_seq), 1)).T + Deltay # matrix where each column is x + multiple of Δy
    xplusDeltay = np.where(xplusDeltay <= maxx, xplusDeltay, maxx) # keep the highest values of x within the specified domain
    idx = np.tile(np.arange(len(x)), (len(lim_seq), 1)).T + np.arange(len(lim_seq)) * l
    idx = np.where(idx <= len(x) - 1, idx, len(x) - 1) # index used to differentially move up columns
    pi0 = pi(xplusDeltay, 0, theta_1, RC)
    pi1 = pi(xplusDeltay, 1, theta_1, RC)
    
    # Construct p(x_t+1 | i=1)
    pr_i1 = np.vstack((np.ones(len(lim_seq)), np.zeros((len(x) - 1, len(lim_seq)))))
    
    # Initialize the loop
    init_EV = np.zeros((len(x), len(i)))
    EV_tau = init_EV
    err = 1
    err_tol = 1e-12
    iter_num = 1
    iter_lim = 5000
    while err > err_tol and iter_num < iter_lim:
        pdv0 = pi0 + beta * (np.tile(EV_tau[:, 0], (len(lim_seq), 1)).T)[idx, np.arange(len(lim_seq))]
        pdv1 = pi1 + beta * (np.tile(EV_tau[:, 1], (len(lim_seq), 1)).T)[idx, np.arange(len(lim_seq))]
        logsumexp_prepr = logsumexp(np.dstack((pdv0, pdv1)))
        EV_tau1_i0 = np.sum(logsumexp_prepr * transition_pr, axis=1)
        EV_tau1_i1 = np.sum(logsumexp_prepr * pr_i1, axis=1)
        EV_tau1 = np.vstack((EV_tau1_i0, EV_tau1_i1)).T
        err = np.amax(np.abs(EV_tau1 - EV_tau))
        EV_tau = EV_tau1
        iter_num += 1
    if iter_num == iter_lim:
        print("EV didn't converge.")
    return x, EV_tau

# Define EV function
def EV_fct(EV_fp, x_space, x, i): # x is an array, i is not
    return np.interp(x, x_space, EV_fp[:,i]) # interpolate 

# Define choice-specific value function
def v(x, i, theta_1, RC, EV, x_space): # x is an array, i is not
    v = pi(x, i, theta_1, RC) + beta * EV_fct(EV, x_space, x, i)
    return v

# Construct CCP given EV(x, θ)
def CCP(x, i, theta_1, RC):
    x_space, EV = EV_fp(theta_1, RC)
    V1 = v(x, 1, theta_1, RC, EV, x_space)
    V0 = v(x, 0, theta_1, RC, EV, x_space)
    Vi = V0*(i == 0) + V1*(i == 1)
    pr = np.divide(np.exp(Vi), np.exp(V0) + np.exp(V1))
    return pr

# Construct the likelihood
def loglikelihood(theta, x, i):
    theta_1 = theta[:-1]
    RC = theta[-1]
    logl = np.sum(np.log(CCP(x, i, theta_1, RC)))
    return -logl # negative because we are using a minimizer, but it's MLE

# Construct likelihood's gradient
def loglikelihood_grad(theta, x, i): # must accept same arguments as likelihood()
    return 0 # need to compute this by hand (and write it in question 1)

# Solve via MLE
init_guess = np.zeros(q + 1)
# res = minimize(loglikelihood, init_guess, args=(data['x_t'], data['d_t']), method='BFGS', jac=loglikelihood_grad)



1842.55549794


Estimate the model using the MPEC method of Su and Judd.

In [138]:
a = np.ones(3)
print(a)
b = np.zeros((5,3))
print(np.vstack((a, b)))

[ 1.  1.  1.]
[[ 1.  1.  1.]
 [ 0.  0.  0.]
 [ 0.  0.  0.]
 [ 0.  0.  0.]
 [ 0.  0.  0.]
 [ 0.  0.  0.]]


Compare the results in a table, including the nonparametric answers below and discuss the results.

NameError: name 'iter_num' is not defined

Plot the $EV(\cdot)$ you have obtained for both estimators.

### Part 3: The Stata Estimator

This is taken from Han Hong’s problem set at Stanford, the idea is that we can use the arguments in Hotz-Miller (1993), or Pesendorfer Schmidt-Dengler (2008) to construct an optimization free method to recover the utility pa- rameters in the Rust problem.

We begin by defining the choice specific value function with $\varepsilon_{it}$ i.i.d. and EV.

$$v(x,d) = u(x,d) + \beta \int \log \left( \sum_{d' \in D} \exp(v(x', d')) \right) p(x'|x, d) \mathrm{d}x'$$
$$v(x,d) = u(x,d) + \beta \int \log \left( \sum_{d' \in D} \exp(v(x', d') - v(x', 1)) \right) p(x'|x, d) \mathrm{d}x' + \beta \int v(x,1)p(x'|x,1) \mathrm{d}x'$$

1\. Estimate $p(x′|x,d)$ non parametrically or parametrically (for example as a set of multinomial with $n$ outcomes
or an exponential distribution). Call your estimate $\hat{p}(x′|x,d)$.

2\. Estimate $p(d|x)$ (the CCP) non-parametrically. You can use the binomial logit model with a basis function
(increasing number of terms) or you can use a kernel such as **ksdensity** or **ecdf**.

3\. Now use the Hotz-Miller inversion to estimate: $\hat{v}(x, d) − \hat{v}(x, 1) = \log \hat{p}(d|x) − \log \hat{p}(1|x)$.