In [2]:
import numpy as np
import pandas as pd
from pandas import DataFrame as df
from scipy.stats import norm

In [50]:
# define true parameters
I = 100
T = 10
alpha_1 = 5
alpha_2 = 3
beta_bar = -1
sigma_beta = 0.5
price_bar = 1
sigma_price = 0.5

# bayesian estimation parameter
search_step = 0.02

In [51]:
def multiplyAll(s:list) -> float:
    ans = 1
    for n in s:
        ans *= n
    return ans

## 1. Simulate Data

In [52]:
# supply side data
# t X price_1 X price_2
time_frame = np.array(range(1, T+1))
price_1 = np.random.normal(price_bar, sigma_price, T)
price_2 = np.random.normal(price_bar, sigma_price, T)
df_supply_side = df({"t": time_frame, "price_1": price_1, "price_2": price_2})

In [53]:
# individual level data
# i X t X beta_i
individual_frame = []
time_frame = []
beta_frame = []

for i in range(1, I+1):
    beta_i = np.random.normal(beta_bar, sigma_beta, 1)[0]
    for t in range(1, T+1):
        individual_frame.append(i)
        time_frame.append(t)
        beta_frame.append(beta_i)

df_individual_side = df({"i": individual_frame, "t": time_frame, "beta_i": beta_frame})

In [54]:
# discrete choice data
# i X t X price_1 X price_2 X buy_product_1 X buy_product_2
# (1) pre choice decision
df_discrete_choice = pd.merge(df_individual_side, df_supply_side, left_on="t", right_on="t")

# (2) attach the choice decision
exp_product_1 = np.exp(alpha_1 + df_discrete_choice['beta_i'].values*df_discrete_choice['price_1'].values)
exp_product_2 = np.exp(alpha_2 + df_discrete_choice['beta_i'].values*df_discrete_choice['price_2'].values)

prob_product_1 = exp_product_1/(1+exp_product_1+exp_product_2)
prob_product_2 = exp_product_2/(1+exp_product_1+exp_product_2)
prob_outer = 1 - prob_product_1 - prob_product_2

### decide the product decision (let the error term choose the product)
random_u = np.random.uniform(low=0, high=1, size= I*T)

d_0 = (random_u <= prob_outer).astype(int)
d_1 = ((random_u <= prob_outer + prob_product_1) * (random_u > prob_outer)).astype(int)
d_2 = (random_u > prob_outer + prob_product_1).astype(int)

df_discrete_choice['d_0'] = d_0
df_discrete_choice['d_1'] = d_1
df_discrete_choice['d_2'] = d_2

## 2. Bayesian Estimation

In [55]:
# data structure
# i X t X price_1 X price_2 X d_0 X d_1 X d_2
df_visible_to_researcher = df_discrete_choice[['i', 't', 'price_1', 'price_2', 'd_0', 'd_1', 'd_2']]
df_visible_to_researcher

Unnamed: 0,i,t,price_1,price_2,d_0,d_1,d_2
0,1,1,1.389232,0.732722,1,0,0
1,2,1,1.389232,0.732722,0,1,0
2,3,1,1.389232,0.732722,0,0,1
3,4,1,1.389232,0.732722,0,0,1
4,5,1,1.389232,0.732722,0,0,1
...,...,...,...,...,...,...,...
995,96,10,1.106478,0.183926,0,1,0
996,97,10,1.106478,0.183926,0,1,0
997,98,10,1.106478,0.183926,0,0,1
998,99,10,1.106478,0.183926,0,1,0


In [56]:
# define beta_bar_init, var_beta_init, beta_i_init, alpha_1_init, alpha_2_init
# i.e, set the hyper parameters
beta_bar_init = -0.5
var_beta_init = 0.3
beta_i_init = np.random.normal(beta_bar_init, np.sqrt(var_beta_init), I)
alpha_1_init = 2
alpha_2_init = 2

In [57]:
counter = 0

beta_bar_history = []
var_beta_history = []
alpha_1_history = []
alpha_2_history = []

while (counter <= 3000):
    # STEP (a) 
    beta_bar_post = np.random.normal(loc=beta_i_init.mean(), scale=np.sqrt(var_beta_init/I), size=1)[0]

    # STEP (b)
    IG_first_moment = 1+I
    s_bar = ((beta_i_init - beta_bar_post)**2).mean()
    IG_second_moment = (1 + I*s_bar)/(1+I) #WRONG
    eta_array = np.random.normal(loc=0, scale=1, size=IG_first_moment)
    r = (eta_array**2/IG_second_moment).mean()
    var_beta_post = 1/r

    # STEP (c)
    # likelihood function 
    def likelihood(beta_i, alpha_1, alpha_2, df_i):
        exp_product_1 = np.exp(alpha_1 + beta_i*df_i['price_1'].values)
        exp_product_2 = np.exp(alpha_2 + beta_i*df_i['price_2'].values)

        prob_product_1 = exp_product_1/(1+exp_product_1+exp_product_2)
        prob_product_2 = exp_product_2/(1+exp_product_1+exp_product_2)
        prob_outer = 1 - prob_product_1 - prob_product_2

        likelihood_array = df_i['d_0'].values * prob_outer + df_i['d_1'].values * prob_product_1 + df_i['d_2'].values * prob_product_2
        llh_array = np.log(likelihood_array)
        llh = llh_array.sum() + np.log(norm.pdf(beta_i, loc=beta_bar_post, scale = np.sqrt(var_beta_post)))

        return llh

    beta_i_post = []
    for i in range(I):
        beta_i_init_individual = beta_i_init[i]
        beta_i_candidate = beta_i_init_individual + np.random.normal(loc=0, scale=0.1, size=1)[0]
        df_i = df_visible_to_researcher.loc[df_visible_to_researcher['i']==i]

        # likelihood of the pre beta
        llh_init = likelihood(beta_i_init_individual, alpha_1_init, alpha_2_init, df_i)

        # likelihood of the candidate beta
        llh_post = likelihood(beta_i_candidate, alpha_1_init, alpha_2_init, df_i)

        # accept/reject 
        l_acc_prob = min(0, llh_post-llh_init)
        mu = np.random.uniform(low=0, high=1, size=1)[0]
        lmu = np.log(mu)
        
        if lmu <= l_acc_prob:
            beta_i_post_individual = beta_i_candidate
        else:
            beta_i_post_individual = beta_i_init_individual

        beta_i_post.append(beta_i_post_individual)

    beta_i_post = np.array(beta_i_post)

    
    # STEP (d)
    df_beta_i = df({"i": list(range(1,I+1)), "beta_i": beta_i_post})
    df_update_alpha = pd.merge(df_visible_to_researcher, df_beta_i, left_on='i', right_on='i')

    def likelihood_alpha(alpha_1, alpha_2, df):
        exp_product_1 = np.exp(alpha_1 + df['beta_i'].values * df['price_1'].values)
        exp_product_2 = np.exp(alpha_2 + df['beta_i'].values * df['price_2'].values)

        prob_product_1 = exp_product_1/(1+exp_product_1+exp_product_2) 
        prob_product_2 = exp_product_2/(1+exp_product_1+exp_product_2) 
        prob_outer = 1 - prob_product_1 - prob_product_2

        prob_product_1 = prob_product_1 
        prob_product_2 = prob_product_2 
        prob_outer = prob_outer 

        likelihood_array = df['d_0'].values * prob_outer + df['d_1'].values * prob_product_1 + df['d_2'].values * prob_product_2
        llh_array = np.log(likelihood_array)
        llh = sum(llh_array)

        return llh

    alpha_1_candidate = alpha_1_init + np.random.normal(loc=0, scale=0.3, size=1)[0]

    # likelihood of the pre alpha
    llh_init = likelihood_alpha(alpha_1_init, alpha_2_init, df_update_alpha)

    # likelihood of the alpha_candidate
    llh_post = likelihood_alpha(alpha_1_candidate, alpha_2_init, df_update_alpha)

    # accept/reject
    l_acc_prob = min(0, llh_post-llh_init)
    mu = np.random.uniform(low=0, high=1, size=1)[0]
    lmu = np.log(mu)
    
    if lmu <= l_acc_prob:
        alpha_1_post = alpha_1_candidate
    else:
        alpha_1_post = alpha_1_init

    
    # STEP (e)
    alpha_2_candidate = alpha_2_init + np.random.normal(loc=0, scale=0.3, size=1)[0]

    # likelihood of the pre alpha
    llh_init = likelihood_alpha(alpha_1_post, alpha_2_init, df_update_alpha)

    # likelihood of the alpha_candidate
    llh_post = likelihood_alpha(alpha_1_post, alpha_2_candidate, df_update_alpha)

    # accept/reject
    l_acc_prob = min(0, llh_post-llh_init)
    mu = np.random.uniform(low=0, high=1, size=1)[0]
    lmu = np.log(mu)
    
    if lmu <= l_acc_prob:
        alpha_2_post = alpha_2_candidate
    else:
        alpha_2_post = alpha_2_init

    # Save the results
    beta_bar_history.append(beta_bar_post)
    var_beta_history.append(var_beta_post)
    alpha_1_history.append(alpha_1_post)
    alpha_2_history.append(alpha_2_post)
    
    # Update the inits
    beta_bar_init = beta_bar_post
    var_beta_init = var_beta_post
    beta_i_init = beta_i_post
    alpha_1_init = alpha_1_post
    alpha_2_init = alpha_2_post

    # add 1 to counter
    counter += 1 

In [58]:
np.array(beta_bar_history[700:]).mean()

-1.0456390620758769

In [59]:
np.array(var_beta_history[700:]).mean()

0.23392318264797388

In [60]:
np.array(alpha_1_history[700:]).mean()

5.070429970165814

In [61]:
np.array(alpha_2_history[700:]).mean()

2.9947221575341825