In [350]:
import numpy as np
import pandas as pd
from pandas import DataFrame as df
import random
import math
import time 
from scipy.optimize import minimize
import ray
ray.init(num_cpus= 10, ignore_reinit_error=True)

2023-02-14 21:39:39,806	INFO worker.py:1370 -- Calling ray.init() again after it has already been called.


0,1
Python version:,3.9.7
Ray version:,2.2.0


In [351]:
#define true parameters 
M = 1000
alpha_1 = 1
alpha_2 = 1
beta = -0.1
price_loc = 2
price_scale = 0.5

# Part A. Simulate Data

In [352]:
def DGP():
    # i X t dataframe (1000 X 2)
    i_list = np.full(10, 1)
    t_list = np.array([t for t in range(1, 11)])
    for i in range(2,101):
        i_list = np.concatenate((i_list, np.full(10, i)), axis = None)
        t_list = np.concatenate((t_list, np.array([t for t in range(1, 11)])), axis = None)
    df_simulation = df({'i': i_list, 't': t_list})

    # t p_1t p_2t dataframe (10 X 2)
    t_list_10 = np.array([t for t in range(1, 11)])
    p_1_draw = np.random.normal(price_loc, price_scale, 10)
    p_2_draw = np.random.normal(price_loc, price_scale, 10)
    df_price = df({'t': t_list_10, 'p_1t': p_1_draw, 'p_2t': p_2_draw})

    # merge
    df_simulation = pd.merge(df_simulation, df_price, left_on="t", right_on="t")
    df_simulation = df_simulation.sort_values(by = ['i', 't'], ascending= True)

    # simulate decision
    p_1_array = np.array(df_simulation['p_1t'])
    p_2_array = np.array(df_simulation['p_2t'])
    exp_1_array = np.exp(alpha_1 + beta*p_1_array)
    exp_2_array = np.exp(alpha_2 + beta*p_2_array)
    prob_1_array = exp_1_array/(1 + exp_1_array + exp_2_array)
    prob_2_array = exp_2_array/(1 + exp_1_array + exp_2_array)
    prob_0_array = 1 - prob_1_array - prob_2_array
    u_array = np.random.uniform(0,1,1000)

    d_0t_bool = (u_array < prob_0_array).astype(int)

    d_1t_temp1 = (u_array >= prob_0_array)
    d_1t_temp2 = (u_array < prob_0_array + prob_1_array)
    d_1t_bool = (d_1t_temp1 * d_1t_temp2).astype(int)

    d_2t_bool = (u_array >= prob_0_array + prob_1_array).astype(int)

    df_simulation['d_0t'] = d_0t_bool
    df_simulation['d_1t'] = d_1t_bool
    df_simulation['d_2t'] = d_2t_bool

    return df_simulation


# Part B. (1) MLE

In [353]:
def log_likelihood(theta, data_observable):
    alpha_1, alpha_2, beta = theta

    p_1_array = np.array(data_observable['p_1t'])
    p_2_array = np.array(data_observable['p_2t'])
    d_0t_array = np.array(data_observable['d_0t'])
    d_1t_array = np.array(data_observable['d_1t'])
    d_2t_array = np.array(data_observable['d_2t'])
    
    exp_1_array = np.exp(alpha_1 + beta*p_1_array)
    exp_2_array = np.exp(alpha_2 + beta*p_2_array)
    prob_1_array = exp_1_array/(1 + exp_1_array + exp_2_array)
    prob_2_array = exp_2_array/(1 + exp_1_array + exp_2_array)
    prob_0_array = 1 - prob_1_array - prob_2_array

    prob_array = d_0t_array * np.log(prob_0_array) + d_1t_array * np.log(prob_1_array) + d_2t_array * np.log(prob_2_array)
    log_sum = prob_array.sum()

    return -log_sum

In [354]:
df_list = [DGP() for i in range(M)]
MLE_result_list = []
for df in df_list:
    MLE_result = minimize(log_likelihood, [0.5, 0.5, -0.1], args = (df), method='Nelder-Mead', options={'maxiter':200})
    MLE_result_list.append(MLE_result.x)

In [355]:
@ray.remote
def MLE_bootstrap(df):
    MLE_result = minimize(log_likelihood, [0.5, 0.5, -0.1], args = (df), method='Nelder-Mead', options={'maxiter':200})
    return MLE_result.x

MLE_list = [MLE_bootstrap.remote(df) for df in df_list]
MLE_array = np.array(ray.get(MLE_list))

In [356]:
print("average of MLE estimates for alpha_1: ", MLE_array[:,0].mean())
print("average of MLE estimates for alpha_2: ", MLE_array[:,1].mean())
print("average of MLE estimates for beta: ", MLE_array[:,2].mean())

average of MLE estimates for alpha_1:  1.002000187083169
average of MLE estimates for alpha_2:  1.0006464707030414
average of MLE estimates for beta:  -0.09974230237470721


# Part B. (2) Bayesian

In [357]:
def log_likelihood2(theta, data_observable):
    alpha_1, alpha_2, beta = theta

    p_1_array = np.array(data_observable['p_1t'])
    p_2_array = np.array(data_observable['p_2t'])
    d_0t_array = np.array(data_observable['d_0t'])
    d_1t_array = np.array(data_observable['d_1t'])
    d_2t_array = np.array(data_observable['d_2t'])
    
    exp_1_array = np.exp(alpha_1 + beta*p_1_array)
    exp_2_array = np.exp(alpha_2 + beta*p_2_array)
    prob_1_array = exp_1_array/(1 + exp_1_array + exp_2_array)
    prob_2_array = exp_2_array/(1 + exp_1_array + exp_2_array)
    prob_0_array = 1 - prob_1_array - prob_2_array

    prob_array = d_0t_array * np.log(prob_0_array) + d_1t_array * np.log(prob_1_array) + d_2t_array * np.log(prob_2_array)
    log_sum = prob_array.sum()

    return log_sum

In [358]:
@ray.remote
def bayesian_bootstrap(df):
    draw_list = []
    theta_pre = np.array([0.5, 0.5, 0])

    for i in range(2000): # setting the number of draws is critical in estimating
        theta_candidate = theta_pre + np.random.normal(0, 0.3, 3) #setting variance is critical in estimating
        mu = np.random.uniform(0,1,1)
        
        llh_pre = log_likelihood2(theta_pre, df)
        llh_candidate = log_likelihood2(theta_candidate, df)

        if math.log(mu) <= llh_candidate - llh_pre:
            theta_post = theta_candidate
        else:
            theta_post = theta_pre

        draw_list.append(theta_post)
        theta_pre = theta_post

    alpha_1_hat = np.array(draw_list)[:,0].mean()
    alpha_2_hat = np.array(draw_list)[:,1].mean()
    beta_hat = np.array(draw_list)[:,2].mean()

    return alpha_1_hat, alpha_2_hat, beta_hat

bootstrap_list = [bayesian_bootstrap.remote(df) for df in df_list]
bootstrap_array = np.array(ray.get(bootstrap_list))

In [359]:
print("average of Bayesian estimates for alpha_1: ", bootstrap_array[:,0].mean())
print("average of Bayesian estimates for alpha_2: ", bootstrap_array[:,1].mean())
print("average of Bayesian estimates for beta: ", bootstrap_array[:,2].mean())

average of Bayesian estimates for alpha_1:  0.9884759132770097
average of Bayesian estimates for alpha_2:  0.9876565199373617
average of Bayesian estimates for beta:  -0.09366364118889657
