In [32]:
import pandas as pd
import numpy as np

In [33]:
data = pd.read_csv('../data/conjoint.csv')
data.head()

Unnamed: 0,resp.id,ques,alt,carpool,seat,cargo,eng,price,choice
0,1,1,1,yes,6,2ft,gas,35,0
1,1,1,2,yes,8,3ft,hyb,30,0
2,1,1,3,yes,6,3ft,gas,30,1
3,1,2,1,yes,6,2ft,gas,30,0
4,1,2,2,yes,7,3ft,gas,35,1


In [34]:
 data.describe()

Unnamed: 0,resp.id,ques,alt,seat,price,choice
count,9000.0,9000.0,9000.0,9000.0,9000.0,9000.0
mean,100.5,8.0,2.0,6.995444,35.003889,0.333333
std,57.737513,4.320734,0.816542,0.817005,4.083728,0.471431
min,1.0,1.0,1.0,6.0,30.0,0.0
25%,50.75,4.0,1.0,6.0,30.0,0.0
50%,100.5,8.0,2.0,7.0,35.0,0.0
75%,150.25,12.0,3.0,8.0,40.0,1.0
max,200.0,15.0,3.0,8.0,40.0,1.0


In [35]:
pd.crosstab(data['price'], data['choice'])

choice,0,1
price,Unnamed: 1_level_1,Unnamed: 2_level_1
30,1512,1486
35,2041,956
40,2447,558


In [36]:
pd.crosstab(data['cargo'], data['choice'])

choice,0,1
cargo,Unnamed: 1_level_1,Unnamed: 2_level_1
2ft,3189,1312
3ft,2811,1688


In [37]:
# Convert categorical variables to dummy variables
data["seat"] = pd.Categorical(data["seat"])
data["cargo"] = pd.Categorical(data["cargo"])
data["eng"] = pd.Categorical(data["eng"])
data["price"] = pd.Categorical(data["price"])
data.head()

Unnamed: 0,resp.id,ques,alt,carpool,seat,cargo,eng,price,choice
0,1,1,1,yes,6,2ft,gas,35,0
1,1,1,2,yes,8,3ft,hyb,30,0
2,1,1,3,yes,6,3ft,gas,30,1
3,1,2,1,yes,6,2ft,gas,30,0
4,1,2,2,yes,7,3ft,gas,35,1


In [38]:
data['price'].cat.categories

Index([30, 35, 40], dtype='int64')

In [39]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.anova import anova_lm

# Assuming cbc_mlogit is your DataFrame and it's already in the correct format
# for use with statsmodels (i.e., you've done any necessary preprocessing
# to convert it from wide to long format, if needed).

# Model 1: Without intercept
m1_formula = 'choice ~  0 + seat + cargo + eng + price'
m1 = smf.mnlogit(formula=m1_formula, data=data).fit()
print(m1.summary())




Optimization terminated successfully.
         Current function value: 0.558645
         Iterations 6
                          MNLogit Regression Results                          
Dep. Variable:                 choice   No. Observations:                 9000
Model:                        MNLogit   Df Residuals:                     8992
Method:                           MLE   Df Model:                            7
Date:                Tue, 12 Mar 2024   Pseudo R-squ.:                  0.1223
Time:                        09:03:44   Log-Likelihood:                -5027.8
converged:                       True   LL-Null:                       -5728.6
Covariance Type:            nonrobust   LLR p-value:                1.702e-298
    choice=1       coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------
seat[6]         -0.6688      0.067     -9.908      0.000      -0.801      -0.536
seat[7]         -1.1939

In [40]:
# Model 3: With numeric conversion of price
# First, ensure price is numeric in the DataFrame, this might require conversion
# if 'price' is not already an appropriate numeric type.
data['price_numeric'] = pd.to_numeric(data['price'], errors='coerce')

m2_formula = 'choice ~ 0 + seat + cargo + eng + price_numeric'
m2 = smf.mnlogit(formula=m2_formula, data=data).fit()
print(m2.summary())

Optimization terminated successfully.
         Current function value: 0.558663
         Iterations 6
                          MNLogit Regression Results                          
Dep. Variable:                 choice   No. Observations:                 9000
Model:                        MNLogit   Df Residuals:                     8993
Method:                           MLE   Df Model:                            6
Date:                Tue, 12 Mar 2024   Pseudo R-squ.:                  0.1223
Time:                        09:03:44   Log-Likelihood:                -5028.0
converged:                       True   LL-Null:                       -5728.6
Covariance Type:            nonrobust   LLR p-value:                1.252e-299
     choice=1       coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
seat[6]           4.0975      0.217     18.861      0.000       3.672       4.523
seat[7]           3.

In [41]:
m2.params.loc['cargo[T.3ft]'] / (-m2.params.loc['price_numeric'] / 1000)

0    2755.802315
dtype: float64

In [42]:
coeff = m2.params.to_dict()
attrarray = {
    'seat': ['seat[6]', 'seat[7]', 'seat[8]'],
    'cargo': ['cargo[T.3ft]'],
    'eng': ['eng[T.gas]', 'eng[T.hyb]'],
    'price': ['price_numeric']
}

part_worth_utility = {}

for attribute in attrarray:
    for level in attrarray[attribute]:
        part_worth_utility[level] = coeff[0][level]

part_worth_utility

{'seat[6]': 4.097493799457214,
 'seat[7]': 3.572742098004901,
 'seat[8]': 3.804408972667818,
 'cargo[T.3ft]': 0.4385383126937028,
 'eng[T.gas]': 1.4346800159644666,
 'eng[T.hyb]': 0.6741910088171773,
 'price_numeric': -0.15913271800301496}

In [43]:
willingness_to_pay = {}

for attribute in part_worth_utility:
    if attribute != 'price_numeric':
        willingness_to_pay[attribute] = part_worth_utility[attribute] / (-part_worth_utility['price_numeric'] / 1000)

willingness_to_pay

{'seat[6]': 25748.908526652467,
 'seat[7]': 22451.33585877174,
 'seat[8]': 23907.145057346024,
 'cargo[T.3ft]': 2755.802315180679,
 'eng[T.gas]': 9015.619377137044,
 'eng[T.hyb]': 4236.658666286363}

In [44]:
import pymc as pm
import arviz as az
import pytensor.tensor as pt

from matplotlib import pyplot as plt
from matplotlib.lines import Line2D

In [45]:
%config InlineBackend.figure_format = 'retina'  # high resolution figures
az.style.use("arviz-darkgrid")
rng = np.random.default_rng(42)

In [46]:
data

Unnamed: 0,resp.id,ques,alt,carpool,seat,cargo,eng,price,choice,price_numeric
0,1,1,1,yes,6,2ft,gas,35,0,35
1,1,1,2,yes,8,3ft,hyb,30,0,30
2,1,1,3,yes,6,3ft,gas,30,1,30
3,1,2,1,yes,6,2ft,gas,30,0,30
4,1,2,2,yes,7,3ft,gas,35,1,35
...,...,...,...,...,...,...,...,...,...,...
8995,200,14,2,no,7,3ft,gas,35,1,35
8996,200,14,3,no,7,3ft,hyb,35,0,35
8997,200,15,1,no,7,2ft,gas,35,0,35
8998,200,15,2,no,8,3ft,elec,40,0,40


In [17]:
N = data.shape[0]
observed = pd.Categorical(data['choice']).codes
person_index, uniques = pd.factorize(data['resp.id'])

In [66]:
pd.Categorical(data['seat']).codes

array([0, 2, 0, ..., 1, 2, 0], dtype=int8)

In [None]:
coords = {
    "alt_intercepts": ['seat, cargo, eng']
}

In [71]:
df = pd.get_dummies(data, columns=['carpool', 'seat', 'cargo', 'eng'], drop_first=True)
df.drop(columns='price', inplace=True)
predictor_columns = [col for col in df.columns if col.startswith(('carpool', 'seat', 'cargo', 'eng'))]
df[predictor_columns] = df[predictor_columns].astype('int')
df.head()

Unnamed: 0,resp.id,ques,alt,choice,price_numeric,carpool_yes,seat_7,seat_8,cargo_3ft,eng_gas,eng_hyb
0,1,1,1,0,35,1,0,0,0,1,0
1,1,1,2,0,30,1,0,1,1,0,1
2,1,1,3,1,30,1,0,0,1,1,0
3,1,2,1,0,30,1,0,0,0,1,0
4,1,2,2,1,35,1,1,0,1,1,0


In [72]:
df.

resp.id          int64
ques             int64
alt              int64
choice           int64
price_numeric    int64
carpool_yes      int64
seat_7           int64
seat_8           int64
cargo_3ft        int64
eng_gas          int64
eng_hyb          int64
dtype: object