In [None]:
import os
root = "../../Foundation_of_Advanced_Quantitative_Marketing_Python"
os.chdir(root)
import numpy as np
import pandas as pd
import src.logit_boost as lgt
from scipy.optimize import minimize
import statsmodels.api as sm

# Simulation

In [None]:
# Parameters for simulation
num_individuals = 1000  # Number of individuals (I)
num_tasks = 5          # Number of choice tasks per individual (T)
num_alts = 4           # Number of alternatives per task (J)
num_features = 1       # Number of non-price features (K)

# True parameters
beta_const = 0.0       # No outside option; all alts symmetric except attributes
beta_x = np.array([1.5])  # Fixed coefficient for x1
mu_alpha = -2.0        # Mean of random coefficient on price
sigma_alpha = 1.0      # Std dev of random coefficient on price

# Generate data
np.random.seed(42)

# Individual-specific random coefficients for price
alpha_i = np.random.normal(mu_alpha, sigma_alpha, size=num_individuals)

# Prepare arrays
ind_ids = np.repeat(np.arange(num_individuals), num_tasks * num_alts)
task_ids = np.tile(np.repeat(np.arange(num_tasks), num_alts), num_individuals)
alt_ids = np.tile(np.arange(num_alts), num_individuals * num_tasks)

# Features: x1 ~ N(0,1), price ~ Uniform(1,10)
x_nonprice = np.random.normal(0, 1, size=(num_individuals * num_tasks * num_alts, num_features))
price = np.random.uniform(1, 10, size=num_individuals * num_tasks * num_alts)

# Simulate choices
chosen = np.zeros(num_individuals * num_tasks * num_alts, dtype=int)

for i in range(num_individuals):
    alpha = alpha_i[i]
    for t in range(num_tasks):
        start = (i * num_tasks * num_alts) + (t * num_alts)
        end = start + num_alts
        
        # Utilities: beta_x * x + alpha * price + epsilon (Gumbel)
        mean_util = np.dot(x_nonprice[start:end], beta_x) + alpha * price[start:end]
        epsilon = np.random.gumbel(0, 1, size=num_alts)
        util = mean_util + epsilon
        
        # Choice: argmax util
        choice_idx = np.argmax(util)
        chosen[start + choice_idx] = 1

# Create DataFrame in long format
data = pd.DataFrame({
    'ind_id': ind_ids,
    'task_id': task_ids,
    'alt_id': alt_ids,
    'chosen': chosen,
    'price': price,
    'x1': x_nonprice[:, 0],  # Assuming one feature
})

data.head()

(   ind_id  task_id  alt_id  chosen     price        x1
 0       0        0       0       1  5.492618  1.399355
 1       0        0       1       0  6.430066  0.924634
 2       0        0       2       0  9.644963  0.059630
 3       0        0       3       0  8.966734 -0.646937
 4       0        1       0       0  1.980172  0.698223,
              ind_id       task_id        alt_id        chosen         price  \
 count  20000.000000  20000.000000  20000.000000  20000.000000  20000.000000   
 mean     499.500000      2.000000      1.500000      0.250000      5.473366   
 std      288.682207      1.414249      1.118062      0.433024      2.596346   
 min        0.000000      0.000000      0.000000      0.000000      1.000050   
 25%      249.750000      1.000000      0.750000      0.000000      3.213601   
 50%      499.500000      2.000000      1.500000      0.000000      5.468725   
 75%      749.250000      3.000000      2.250000      0.250000      7.724986   
 max      999.000000   

# RCM

In [9]:
p = 4 # three alternatives + outside option
X = data[["price","x1"]].values
# X = sm.add_constant(X)
X = X.reshape(1000*5, p, -1)
y = data['chosen'].values.reshape(-1,4).argmax(axis=1)
indiv_id = data["ind_id"].values.reshape(-1, p)[:,0]
X.shape, y.shape, indiv_id.shape

((5000, 4, 2), (5000,), (5000,))

In [10]:
homo_covariates = np.array([0,1])  # only price is random
model = lgt.RandomCoefficientsModel()
model.fit(X, y, indiv_id=indiv_id, draws=500, niteration=1, homo_covariates=homo_covariates)

Current NLL: 2308.122890519869
Current NLL: 2308.1228904310046
Maximized LL: -2308.1228904310046
The means of the coefficients are: [-2.00460499  1.52849088]
The standard deviations of the coefficients are: [1.02925884 0.        ]
The covariance matrix of the coefficients is: [[1.05937375 0.        ]
 [0.         0.        ]]


<src.logit_boost.RandomCoefficientsModel at 0x169f2aaa0>