In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [2]:
print(pd.__version__)
print(np.__version__)
print(sm.__version__)

2.0.2
1.24.4
0.14.0


In [3]:
data = pd.read_csv('data/conjoint.csv')
data.head()

Unnamed: 0,resp.id,ques,alt,carpool,seat,cargo,eng,price,choice
0,1,1,1,yes,6,2ft,gas,35,0
1,1,1,2,yes,8,3ft,hyb,30,0
2,1,1,3,yes,6,3ft,gas,30,1
3,1,2,1,yes,6,2ft,gas,30,0
4,1,2,2,yes,7,3ft,gas,35,1


In [4]:
data.describe()

Unnamed: 0,resp.id,ques,alt,seat,price,choice
count,9000.0,9000.0,9000.0,9000.0,9000.0,9000.0
mean,100.5,8.0,2.0,6.995444,35.003889,0.333333
std,57.737513,4.320734,0.816542,0.817005,4.083728,0.471431
min,1.0,1.0,1.0,6.0,30.0,0.0
25%,50.75,4.0,1.0,6.0,30.0,0.0
50%,100.5,8.0,2.0,7.0,35.0,0.0
75%,150.25,12.0,3.0,8.0,40.0,1.0
max,200.0,15.0,3.0,8.0,40.0,1.0


In [5]:
pd.crosstab(data['price'], data['choice'])

choice,0,1
price,Unnamed: 1_level_1,Unnamed: 2_level_1
30,1512,1486
35,2041,956
40,2447,558


In [6]:
pd.crosstab(data['cargo'], data['choice'])

choice,0,1
cargo,Unnamed: 1_level_1,Unnamed: 2_level_1
2ft,3189,1312
3ft,2811,1688


In [7]:
# Convert categorical variables to dummy variables
data["seat"] = pd.Categorical(data["seat"])
data["cargo"] = pd.Categorical(data["cargo"])
data["eng"] = pd.Categorical(data["eng"])
data["price"] = pd.Categorical(data["price"])
data.head()

Unnamed: 0,resp.id,ques,alt,carpool,seat,cargo,eng,price,choice
0,1,1,1,yes,6,2ft,gas,35,0
1,1,1,2,yes,8,3ft,hyb,30,0
2,1,1,3,yes,6,3ft,gas,30,1
3,1,2,1,yes,6,2ft,gas,30,0
4,1,2,2,yes,7,3ft,gas,35,1


In [8]:
data['price'].cat.categories

Index([30, 35, 40], dtype='int64')

In [9]:
# Model 1: Without intercept
m1_formula = 'choice ~  0 + seat + cargo + eng + price'
m1 = smf.mnlogit(formula=m1_formula, data=data).fit()
print(m1.summary())




Optimization terminated successfully.
         Current function value: 0.558645
         Iterations 6
                          MNLogit Regression Results                          
Dep. Variable:                 choice   No. Observations:                 9000
Model:                        MNLogit   Df Residuals:                     8992
Method:                           MLE   Df Model:                            7
Date:                Fri, 22 Mar 2024   Pseudo R-squ.:                  0.1223
Time:                        09:22:02   Log-Likelihood:                -5027.8
converged:                       True   LL-Null:                       -5728.6
Covariance Type:            nonrobust   LLR p-value:                1.702e-298
    choice=1       coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------
seat[6]         -0.6688      0.067     -9.908      0.000      -0.801      -0.536
seat[7]         -1.1939

In [10]:
# Model 2: With numeric conversion of price
# First, ensure price is numeric in the DataFrame, this might require conversion
# if 'price' is not already an appropriate numeric type.
data['price_numeric'] = pd.to_numeric(data['price'], errors='coerce')

m2_formula = 'choice ~ 0 + seat + cargo + eng + price_numeric'
m2 = smf.mnlogit(formula=m2_formula, data=data).fit()
print(m2.summary())

Optimization terminated successfully.
         Current function value: 0.558663
         Iterations 6
                          MNLogit Regression Results                          
Dep. Variable:                 choice   No. Observations:                 9000
Model:                        MNLogit   Df Residuals:                     8993
Method:                           MLE   Df Model:                            6
Date:                Fri, 22 Mar 2024   Pseudo R-squ.:                  0.1223
Time:                        09:22:02   Log-Likelihood:                -5028.0
converged:                       True   LL-Null:                       -5728.6
Covariance Type:            nonrobust   LLR p-value:                1.252e-299
     choice=1       coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
seat[6]           4.0975      0.217     18.861      0.000       3.672       4.523
seat[7]           3.

In [11]:
m2.params.loc['cargo[T.3ft]'] / (-m2.params.loc['price_numeric'] / 1000)

0    2755.802315
dtype: float64

In [12]:
coeff = m2.params.to_dict()
attrarray = {
    'seat': ['seat[6]', 'seat[7]', 'seat[8]'],
    'cargo': ['cargo[T.3ft]'],
    'eng': ['eng[T.gas]', 'eng[T.hyb]'],
    'price': ['price_numeric']
}

part_worth_utility = {}

for attribute in attrarray:
    for level in attrarray[attribute]:
        part_worth_utility[level] = coeff[0][level]

part_worth_utility

{'seat[6]': 4.097493799457214,
 'seat[7]': 3.572742098004901,
 'seat[8]': 3.804408972667818,
 'cargo[T.3ft]': 0.4385383126937028,
 'eng[T.gas]': 1.4346800159644666,
 'eng[T.hyb]': 0.6741910088171773,
 'price_numeric': -0.15913271800301496}

In [13]:
willingness_to_pay = {}

for attribute in part_worth_utility:
    if attribute != 'price_numeric':
        willingness_to_pay[attribute] = part_worth_utility[attribute] / (-part_worth_utility['price_numeric'] / 1000)

willingness_to_pay

{'seat[6]': 25748.908526652467,
 'seat[7]': 22451.33585877174,
 'seat[8]': 23907.145057346024,
 'cargo[T.3ft]': 2755.802315180679,
 'eng[T.gas]': 9015.619377137044,
 'eng[T.hyb]': 4236.658666286363}

In [14]:
part_worth_utility.keys()

dict_keys(['seat[6]', 'seat[7]', 'seat[8]', 'cargo[T.3ft]', 'eng[T.gas]', 'eng[T.hyb]', 'price_numeric'])

In [15]:
def sensitivity_mnl(model, attrib, base_data, competitor_data):
    # Function for creating data for a share-sensitivity chart
    # model: Logit object returned by Logit() function
    # attrib: list of vectors with attribute levels to be used in sensitivity
    # base_data: DataFrame containing baseline design of target product
    # competitor_data: DataFrame containing design of competitive set

    data = pd.concat([base_data, competitor_data])
    base_share = model.predict(data.iloc[0, :])[0]
    share = []
    for a in range(len(attrib)):
        for i in attrib[a]:
            data.iloc[0, :] = base_data.iloc[0, :]
            data.iloc[0, a] = i
            share.append(model.predict(data.iloc[0, :])[0])
    return pd.DataFrame({'level': np.concatenate(attrib), 'share': share, 'increase': np.array(share) - base_share})

In [16]:
import itertools

# Assuming 'data' is your DataFrame
columns = ['seat', 'cargo', 'eng', 'price_numeric']

# Get all combinations of the values in the columns
combinations = list(itertools.product(*[data[col].unique() for col in columns]))

# Print each combination
combinations_df = pd.DataFrame(combinations, columns=columns)
combinations_df.head()

Unnamed: 0,seat,cargo,eng,price_numeric
0,6,2ft,gas,35
1,6,2ft,gas,30
2,6,2ft,gas,40
3,6,2ft,hyb,35
4,6,2ft,hyb,30


In [18]:
base_data = combinations_df.iloc[0, :]
competitor_data = combinations_df.iloc[10:20, :]

base_data_df = pd.DataFrame([base_data], columns=data.columns)
competitor_data_df = pd.DataFrame(competitor_data, columns=data.columns)

In [47]:
# Use the DataFrame for prediction
base_share = m2.predict(base_data_df)[0]
competitor_shares = m2.predict(competitor_data_df)[0]

share = []
for i in competitor_shares.index:
    share.append(competitor_shares[i] - base_share[0])

share

[-0.27730939545990185,
 0.08802201108042829,
 0.07953377886111435,
 -0.1167298439363526,
 0.2510718633714466,
 0.2282490221864092,
 0.0498415235995916,
 0.3523087949885766,
 0.07252795576189297,
 -0.12359183601475343]

In [48]:
prediction = m2.predict(base_data_df)