In [1]:
import numpy as np
import pandas as pd
from pandas import DataFrame as df
import random
import math
import time 
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.optimize import minimize
from scipy.stats import norm
import ray
ray.init(num_cpus= 10, ignore_reinit_error=True)
from numba import jit

2023-03-01 21:24:03,295	INFO worker.py:1538 -- Started a local Ray instance.


In [2]:
from sklearn.linear_model import LinearRegression
from linearmodels.iv import IV2SLS
import statsmodels.api as sm
import linearmodels
from statsmodels.iolib.summary2 import summary_col
from scipy.optimize import minimize


In [5]:
# Define True Parameters
alpha_0 = 3
alpha_1 = 1.5
beta_bar = -1
sigma_beta = 0.2
omega_0 = 5
omega_1 = 1
omega_2 = 2
sigma_xi = 1
sigma_xieta = 0.5
sigma_eta = 2

# Part A. Simulate Data

In [6]:
# panel frame t X j
t_list = []
j_list = []
for i in range(1,101):
    t_temp = list(np.full(10,i))
    j_temp = [k for k in range(1,11)]
    t_list = t_list + t_temp
    j_list = j_list + j_temp

In [7]:
# (1) random draw part
# fix the seed to allow replication
np.random.seed(1)

# draw x
x_list = np.random.normal(1, 0.5, 1000)

# draw z
z_list = np.random.normal(1, 0.5, 1000)

# draw xi & eta
xieta_mean = (0, 0)
xieta_cov = [[sigma_xi**2, sigma_xieta], [sigma_xieta, sigma_eta**2]]
xi_list, eta_list = np.random.multivariate_normal(xieta_mean, xieta_cov, 1000).T

# construct a dataframe
df_master = df({"t": t_list, "j": j_list, "x": x_list, "z": z_list, "xi": xi_list, "eta": eta_list})

In [8]:
# (2) calculation part
# calculate price
df_master['price'] = omega_0 + omega_1*df_master['x'].values + omega_2*df_master['z'].values + df_master['eta'].values

In [42]:
# calculate share
# fix the random draw of nu
np.random.seed(2)
nu_perseon_1000 = np.random.normal(0,1,1000)

# for each market t, calculate true share of each product j
#                  , to check the validity of delta_FXP, calculate the value of true delta too
true_share_list = []
true_delta_list = []
for t in range(1,101):
    work_data = df_master.loc[df_master['t']==t]

    exp_list = []
    exp_sum = 0
    delta_list_temp = []
    for k in range(10):
        x_k = work_data['x'].values[k]
        p_k = work_data['price'].values[k]
        xi_k = work_data['xi'].values[k]
        u_k_true = alpha_0 + alpha_1*x_k + beta_bar*p_k + xi_k + sigma_beta*p_k*nu_perseon_1000
        delta_k_true = alpha_0 + alpha_1*x_k + beta_bar*p_k + xi_k
        
        exp_list.append(np.exp(u_k_true))
        exp_sum += np.exp(u_k_true)
        delta_list_temp.append(delta_k_true)

    denominator = 1+exp_sum
    share_list_temp = [(exp_list[j]/denominator).mean() for j in range(10)]
    true_share_list = true_share_list + share_list_temp
    true_delta_list = true_delta_list + delta_list_temp

df_master['true_share'] = true_share_list
df_master['true_delta'] = true_delta_list

In [44]:
df_master['constant'] = np.ones(1000)
ideal_result = sm.OLS(df_master['true_delta'], df_master[['constant', 'x', 'price', 'xi']]).fit()
ideal_result.summary()

0,1,2,3
Dep. Variable:,true_delta,R-squared:,1.0
Model:,OLS,Adj. R-squared:,1.0
Method:,Least Squares,F-statistic:,2.216e+32
Date:,"Wed, 01 Mar 2023",Prob (F-statistic):,0.0
Time:,21:48:47,Log-Likelihood:,32085.0
No. Observations:,1000,AIC:,-64160.0
Df Residuals:,996,BIC:,-64140.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
constant,3.0000,3.39e-16,8.85e+15,0.000,3.000,3.000
x,1.5000,1.89e-16,7.95e+15,0.000,1.500,1.500
price,-1.0000,3.99e-17,-2.51e+16,0.000,-1.000,-1.000
xi,1.0000,9.32e-17,1.07e+16,0.000,1.000,1.000

0,1,2,3
Omnibus:,3.086,Durbin-Watson:,0.28
Prob(Omnibus):,0.214,Jarque-Bera (JB):,3.042
Skew:,0.135,Prob(JB):,0.218
Kurtosis:,3.009,Cond. No.,33.1


# Part B. GMM estimation

In [52]:
# construct IV
df_avrg_x = df(df_master.groupby('t')['x'].mean())
df_avrg_z = df(df_master.groupby('t')['z'].mean())
df_avrg_x = df_avrg_x.rename(columns= {"x": "avrg_x"})
df_avrg_z = df_avrg_z.rename(columns= {"z": "avrg_z"})
df_avrg_x = df_avrg_x.reset_index()
df_avrg_z = df_avrg_z.reset_index()

df_master = pd.merge(df_master, df_avrg_x, left_on= "t", right_on= "t")
df_master = pd.merge(df_master, df_avrg_z, left_on= "t", right_on= "t")

df_master['IV2'] = (10*df_master['avrg_x'] - df_master['x'])/9
df_master['IV3'] = (10*df_master['avrg_z'] - df_master['z'])/9

In [70]:
def obj_function(sigma_beta, df_observable):

    epsilon = 1e-14
    delta_list = []
    for t in range(1, 101):
        work_data = df_master.loc[df_master['t'] == t]

        # 1. set the initial vector of theta (name it delta_pre)
        share_outer = 1 - work_data['true_share'].sum()
        delta_pre = np.log(work_data['true_share'].values / share_outer) 
        
        # 2. update delta by searching for the fixed point
        #    calculate model predicted share with MC integration    
        # fix the seed for random nu
        np.random.seed(3)
        nu_draw = np.random.normal(0, 1, 200)
        while True:    
            # 2-1. calculate denominator
            exp_common = 0
            for j in range(10):
                delta_j = delta_pre[j]
                p_j = work_data['price'].values[j]
                exp_j = np.exp(delta_j + sigma_beta*p_j*nu_draw)     ##### sigma beta matters here #####
                exp_common += exp_j
            denominator_common = 1+ exp_common
                
            # 2-2. calculate market share for 10 products 
            share_list_temp = []
            for j in range(10):
                delta_j = delta_pre[j]
                p_j = work_data['price'].values[j]
                exp_j = np.exp(delta_j + sigma_beta*p_j*nu_draw)
                s_j = (exp_j/denominator_common).mean()
                share_list_temp.append(s_j)

            # 2-3. update delta
            share_list_array = np.array(share_list_temp)
            delta_post = delta_pre + np.log(work_data['true_share'].values / share_list_array) 

            # 3. Break Condition and Update
            if (abs(delta_post - delta_pre)).max() < epsilon:
                break
            else: 
                delta_pre = delta_post

        delta_list_temp = list(delta_pre)
        delta_list = delta_list + delta_list_temp
    
    df_master['delta_FXP'] = delta_list 
    #step 2: 2SLS
    IV_result = IV2SLS(dependent=df_master['delta_FXP'],
                    exog = df_master[['constant', 'x']],
                    endog = df_master['price'],
                    instruments=df_master[['z']]).fit()

    alpha_0_hat = IV_result.params[0]
    alpha_1_hat = IV_result.params[1]
    beta_bar_hat = IV_result.params[2]
    
    xi_array = (df_master['delta_FXP'] - alpha_0_hat*df_master['constant'] - alpha_1_hat*df_master['x'] - beta_bar_hat*df_master['price']).values

    #step 3: GMM
    g1_bar = ((xi_array * df_master['z'].values)**2).mean()
    g2_bar = ((xi_array * df_master['IV2'].values)**2).mean()
    g3_bar = ((xi_array * df_master['IV3'].values)**2).mean()

    return g1_bar + g2_bar + g3_bar

In [75]:
GMM_result = minimize(obj_function, 0.5, args = (df_master), bounds = ((0,2),), method='Nelder-Mead', options={'maxiter':200, 'disp': True})

Optimization terminated successfully.
         Current function value: 3.273593
         Iterations: 15
         Function evaluations: 30


In [76]:
GMM_result.x

array([0.17919922])

In [77]:
sigma_beta = GMM_result.x[0]
epsilon = 1e-10

delta_list = []
for t in range(1, 101):
    work_data = df_master.loc[df_master['t'] == t]

    # 1. set the initial vector of theta (name it delta_pre)
    share_outer = 1 - work_data['true_share'].sum()
    #delta_pre = np.log(work_data['true_share'].values / share_outer) 
    delta_pre = np.log(work_data['true_share'].values) 


    # 2. update delta by searching for the fixed point
    #    calculate model predicted share with MC integration    
    # fix the seed for random nu
    np.random.seed(3)
    nu_draw = np.random.normal(0, 1, 2000)
    while True:    
        # 2-1. calculate denominator
        #nu_draw = np.random.normal(0, 1, 2000)
        exp_common = 0
        for j in range(10):
            delta_j = delta_pre[j]
            p_j = work_data['price'].values[j]
            exp_j = np.exp(delta_j + sigma_beta*p_j*nu_draw)     ##### sigma beta matters here #####
            exp_common += exp_j
        denominator_common = 1+ exp_common
            
        # 2-2. calculate market share for 10 products 
        share_list_temp = []
        for j in range(10):
            delta_j = delta_pre[j]
            p_j = work_data['price'].values[j]
            exp_j = np.exp(delta_j + sigma_beta*p_j*nu_draw)
            s_j = (exp_j/denominator_common).mean()
            share_list_temp.append(s_j)

        # 2-3. update delta
        share_list_array = np.array(share_list_temp)
        delta_post = delta_pre + np.log(work_data['true_share'].values / share_list_array) 

        # 3. Break Condition and Update
        if abs(delta_post - delta_pre).max() < epsilon:
            break
        else: 
            delta_pre = delta_post
    
    
    delta_list_temp = list(delta_pre)
    delta_list = delta_list + delta_list_temp

df_master['delta_FXP'] = delta_list

#step 2: 2SLS
IV_result = IV2SLS(dependent=df_master['delta_FXP'],
                exog = df_master[['constant', 'x']],
                endog = df_master['price'],
                instruments=df_master[['z', 'IV2', 'IV3']]).fit()

alpha_0_hat = IV_result.params[0]
alpha_1_hat = IV_result.params[1]
beta_bar_hat = IV_result.params[2]


print("alpha_0_hat: ", alpha_0_hat)
print("alpha_1_hat: ", alpha_1_hat)
print("beta_bar_hat: ", beta_bar_hat)
print("sigma_beta_hat: ", sigma_beta)

alpha_0_hat:  2.7335330462987812
alpha_1_hat:  1.564694794031908
beta_bar_hat:  -0.9730532888677441
sigma_beta_hat:  0.17919921874999972
