In [72]:
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
from scipy.optimize import minimize
from numpy.linalg import inv

np.set_printoptions(precision=3, suppress=True)
np.set_printoptions(legacy='1.13')

delta_tolerance = 0.001
multim_n_consumer = 5
DEBUG = 1

## 🧪 Practice 6: Solving for Mean Utilities via Contraction Mapping

### 🎯 Goal
Find the vector of mean utilities $\delta_j$ that rationalizes observed market shares $s_j^{\text{obs}}$,  
by matching simulated shares $s_j^{\text{pred}}(\delta)$ to observed shares.

---

### 🧠 Theory

Contraction mapping update rule:

$$
\delta_j^{\text{new}} = \delta_j^{\text{old}} + \log(s_j^{\text{obs}}) - \log(s_j^{\text{pred}})
$$

- Adjust $\delta_j$ by the log difference between observed and predicted shares.
- Repeat until $\delta$ converges.

---

### ⚙️ Steps

1. Initialize $\delta_j$ (e.g., zeros)
2. Given $\delta_j$, compute choice probabilities and aggregate to $s_j^{\text{pred}}$
3. Update $\delta_j$ using the contraction formula
4. Check if the change in $\delta_j$ is below a tolerance (e.g., $10^{-6}$)
5. Repeat until convergence

---

### 📈 Purpose

- Recovers the mean utilities that exactly match observed market shares.
- Forms the **core inner loop** of the BLP estimation algorithm.
- Prepares for GMM estimation of random coefficient parameters (Practice 7).

In [73]:
# Specification: define functions 

base_utility = pd.read_csv("simulated_utilities.csv")
delta_guess = [0,0,0,0,0]

# print(base_utility)
dataset = pd.read_csv("multi_market_data.csv")

market = dataset[dataset['market_id'] == 0].reset_index(drop=True) #extract multi market dataset

observed_share = market['share'].values #extract observed share 

def predicted_share(input_utilities, input_delta): #from Practice 5 - output predicted shares 
    adjusted_utilities = input_utilities.values + input_delta
    adjusted_utilities = np.clip(adjusted_utilities, -700, 700)
    exp_utilities = np.exp(adjusted_utilities)
    sum_exp = np.sum(exp_utilities, axis=1, keepdims=True)  # Sum over products for each consumer
    prob_choice = exp_utilities / (sum_exp + 1)
    pred_share = np.mean(prob_choice, axis=0)  # Average over consumers
    return pred_share

adjusted_share = predicted_share(base_utility, delta_guess)
# print(adjusted_share)

In [74]:
# Contraction mapping algorithm 
difference = 1
delta_guess = [0,0,0,0,0]
# we already have base utility and delta guess 

while difference > delta_tolerance:    
    adjusted_share = predicted_share(base_utility, delta_guess)
    delta_new = delta_guess + np.log(observed_share) - np.log(adjusted_share)
    difference = np.max(np.abs(delta_new - delta_guess))
    # print(delta_guess, delta_new, difference)
    delta_guess = delta_new

# a function out of it 
def delta_iteration(f_base_utility, f_observed_share):
    # print("here", f_base_utility)
    f_delta_guess = [0,0,0,0,0]
    f_difference = 1
    while f_difference > delta_tolerance:    
        f_adjusted_share = predicted_share(f_base_utility, f_delta_guess)
        safe_observed_share = np.maximum(f_observed_share, 1e-10)
        f_delta_new = f_delta_guess + np.log(safe_observed_share) - np.log(f_adjusted_share)
        f_difference = np.max(np.abs(f_delta_new - f_delta_guess))
        # print(delta_guess, delta_new, difference)
        f_delta_guess = f_delta_new
    return f_delta_new

print("returning", delta_iteration(base_utility, observed_share))
# print("our converged mean utilities are", delta_guess)

returning [ 0.276  0.032 -0.003  0.053  0.1  ]


## 🧪 Practice 7: GMM Estimation of Random Coefficients

### 🎯 Goal

Estimate the random coefficient parameter $\sigma$ (and linear parameters $\beta$, $\alpha$)  
using the BLP GMM framework with moment conditions based on unobserved product characteristics $\xi_{jt}$.

---

### 🧠 Theory

The structural demand equation is:

$$
\delta_{jt} = x_{jt} \beta - \alpha p_{jt} + \xi_{jt}
$$

- You have already recovered $\delta_{jt}$ from observed shares using contraction mapping.
- The residual $\xi_{jt}$ reflects unobserved demand shocks.
- Valid instruments $z_{jt}$ should satisfy:

$$
\mathbb{E}[z_{jt} \cdot \xi_{jt}] = 0
$$

---

### ⚙️ GMM Estimation Procedure

1. **Fix a value of** $\sigma$
2. **Simulate** random coefficients for consumers using $\sigma$
3. **Solve** for $\delta_{jt}$ using contraction mapping for each market
4. **Estimate** $(\beta, \alpha)$ from:

$$
\delta = X\theta + \xi, \quad \text{with instruments } Z
$$

5. **Compute residuals** $\xi_{jt} = \delta_{jt} - x_{jt} \beta + \alpha p_{jt}$
6. **Form moment condition vector**:

$$
\hat{g}(\sigma) = \frac{1}{N} \sum z_{jt} \cdot \xi_{jt}
$$

7. **Compute GMM loss function**:

$$
Q(\sigma) = \hat{g}(\sigma)^\top W \hat{g}(\sigma)
$$

8. **Minimize** $Q(\sigma)$ over values of $\sigma$

---

### 📌 Notes

- The outer loop optimizes $\sigma$
- The inner loop simulates demand and recovers $\delta$ using contraction mapping
- $W$ is the GMM weighting matrix (start with identity, refine iteratively)
- Instruments $z_{jt}$ can include cost shifters, characteristics of rival products, etc.

---

### 📈 Output

- Estimated heterogeneity parameter $\hat{\sigma}$
- Estimated linear parameters $\hat{\beta}$ and $\hat{\alpha}$
- Residuals $\hat{\xi}_{jt}$, used for model fit and marginal cost recovery

In [75]:
# Practice 7: Putting it together
gmm_tolerance = 0.001
sigma_guess = 1

# we didn't creat an instrument for our simulated dataset
# so now we will add in the instrument retroactively 

# dataset = pd.read_csv("multi_market_data.csv")
# dataset['z_rival_x'] = dataset.groupby('market_id')['x'].transform('sum') - dataset['x']
# dataset['z_rival_x'] = dataset['z_rival_x']/multim_n_consumer
# print(dataset[:20])

In [76]:
# step 1: calculate mean utilities for each product/market 
def simulation(input_dataset, chosen_market, sigma_guess, parameter_guess):
    market_chosen = input_dataset[input_dataset['market_id'] == chosen_market].reset_index(drop=True)
    x_j = market_chosen['x'].values  # shape (J,)
    p_j = market_chosen['p'].values  # shape (J,)
    alpha, beta = parameter_guess
    shock_beta = np.clip(np.random.normal(0,1, len(x_j)),-2,2)
    shock_alpha = np.clip(np.random.normal(0,1, len(p_j)),-2,2)
    alpha_i = alpha + sigma_guess*shock_alpha
    beta_i = beta + sigma_guess*shock_beta
    U = np.outer(beta_i, x_j) - np.outer(alpha_i, p_j)
    return U
    
def mean_utility(input_dataset, sigma_guess, parameter_guess): #appending mean utility for each product 
    # to the dataset 
    appending = []
    for i in range(50): 
        observed_share_i = input_dataset[input_dataset['market_id'] == i]['share'].values  
        utilities = pd.DataFrame(simulation(input_dataset,i, sigma_guess, parameter_guess))
        appending.extend(list(delta_iteration(utilities, observed_share_i)))
    return appending #return mean utility vector

market_data = pd.read_csv("multi_market_data.csv")

delta_array = mean_utility(market_data, 0.2,[1,2])

market_data['delta_new'] = delta_array

#construct the instrument here 
market_data['instrument'] = market_data.groupby('market_id')['x'].transform('sum') - market_data['x']

market_data['instrument'] = market_data['instrument']/5

print(market_data)

     market_id  product_id         x         p      delta         share  \
0            0           0 -1.974888  1.294999 -15.119217  3.367844e-10   
1            0           1  0.642573  1.242065   3.255943  3.217973e-02   
2            0           2  1.310736  2.542957   6.632193  9.416063e-01   
3            0           3  0.890814  3.234206   3.001492  2.495032e-02   
4            0           4  0.086252  4.580680  -3.976913  2.324841e-05   
..         ...         ...       ...       ...        ...           ...   
245         49           0 -0.772169  1.531622  -6.936805  2.561214e-06   
246         49           1 -1.171549  1.583599  -9.784442  1.485022e-07   
247         49           2  0.060950  4.689876  -4.263228  3.711637e-05   
248         49           3  1.217949  2.611265   5.914377  9.764343e-01   
249         49           4  0.551589  1.791425   2.069701  2.088920e-02   

           s0  delta_new  instrument  
0    0.001240 -10.270566    0.586075  
1    0.001240   2.695

In [77]:
# step 2: outer loop - GMM algorithm 
# input: (finalized) dataset already contained estimated mean utility 

def estimate_parameter(input_dataset): #return estimated beta + alpha 
    X = input_dataset[['x', 'p']].values
    Z = input_dataset[['instrument', 'p']].values
    y = input_dataset[['delta_new']].values
    # First stage
    Pi = inv(Z.T @ Z) @ (Z.T @ X) #return estimated parameter
    X_hat = Z @ Pi #predicted X 
    # Second stage
    theta = inv(X_hat.T @ X_hat) @ (X_hat.T @ y) #use predicted X to estimate again
    beta_hat = theta[0]
    alpha_hat = -theta[1]
    return beta_hat, alpha_hat

print(estimate_parameter(market_data))

def moment_condition(sigma_guess, input_dataset, parameter_guess):
    # output a number representing the moment condition to minimize 
    delta_array = mean_utility(input_dataset, sigma_guess, parameter_guess)
    input_dataset['delta_new'] = delta_array
    # Step 2: estimate beta and alpha
    beta_hat, alpha_hat = estimate_parameter(input_dataset)
    # Step 3: residuals
    x = input_dataset['x'].values
    p = input_dataset['p'].values
    delta = input_dataset['delta_new']
    residuals = delta - (beta_hat * x) + (alpha_hat * p)
    # Step 4: moment condition vector
    Z = input_dataset[['instrument', 'p']].values
    g = Z.T @ residuals
    return g.T @ g
print(moment_condition(0.5, market_data, [4,5]))


(array([ 10.441]), array([ 0.255]))
6.47471787456e-24


In [79]:
# Final step:
sigma_init = 5
parameter_guess = [1, 2]   # you can start with reasonable alpha, beta guesses

# Minimize
result = minimize(
    moment_condition, 
    x0=sigma_init, 
    args=(market_data, parameter_guess), 
    method='Nelder-Mead', 
    options={'disp': True}
)

sigma_hat = result.x
print("Estimated sigma:", sigma_hat)
print("Estimated parameter:", estimate_parameter(market_data))

Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 15
         Function evaluations: 39
Estimated sigma: [ 4.625]
Estimated parameter: (array([-31.155]), array([-1.291]))
