### Multinomial Model


In [11]:
import statsmodels.api as sm
import pandas as pd
import pylogit as pl
from collections import OrderedDict
import numpy as np

In [2]:
sport_car_df = pd.read_csv('sportcar.csv').iloc[:,1:]
sport_car_df.head()

Unnamed: 0,resp_id,ques,alt,segment,seat,trans,convert,price,choice
0,1,1,1,basic,2,manual,yes,35,0
1,1,1,2,basic,5,auto,no,40,0
2,1,1,3,basic,5,auto,no,30,1
3,1,2,1,basic,5,manual,no,35,0
4,1,2,2,basic,2,manual,no,30,1


In [3]:
condition = (sport_car_df['trans'] == 'auto' ) & (sport_car_df['choice'] == 1)
auto_trans_chosen = sport_car_df[condition]
auto_trans_chosen = auto_trans_chosen.shape[0]

condition = (sport_car_df['trans'] == 'manual' ) & (sport_car_df['choice'] == 1)
manual_trans_chosen = sport_car_df[condition]
manual_trans_chosen = manual_trans_chosen.shape[0]

print(f'auto count: {auto_trans_chosen}\nmanual count:{manual_trans_chosen}')

auto count: 1328
manual count:672


In [19]:
sport_car_df.head()

Unnamed: 0,resp_id,ques,alt,segment,seat,trans,convert,price,choice
0,1,1,1,basic,0,0,1,1,0
1,1,1,2,basic,2,1,0,2,0
2,1,1,3,basic,2,1,0,0,1
3,1,2,1,basic,2,0,0,1,0
4,1,2,2,basic,0,0,0,0,1


In [5]:
sport_car_df['convert'] = pd.Series([0 if x == "no" else 1 for x in sport_car_df['convert'] ]) 
sport_car_df['trans'] = pd.Series([0 if x == "manual" else 1 for x in sport_car_df['trans'] ]) 

In [6]:
for row in range(sport_car_df.shape[0]):
    if sport_car_df.iloc[row, 4] == 2:
        sport_car_df.iloc[row,4] = 0
    elif sport_car_df.iloc[row, 4] == 4:
        sport_car_df.iloc[row, 4] = 1
    else:
        sport_car_df.iloc[row, 4] = 2

In [8]:
for row in range(sport_car_df.shape[0]):
    if sport_car_df.iloc[row, 7] == 30:
        sport_car_df.iloc[row,7] = 0
    elif sport_car_df.iloc[row, 7] == 35:
        sport_car_df.iloc[row, 7] = 1
    else:
        sport_car_df.iloc[row, 7] = 2

In [15]:
# Specify the columns that contain the alternative-specific variables
ind_vars = OrderedDict([
    ('seat', [1,2]),
    ('trans', [1]),
    ('convert', [1]),
    ('price', [1,2])
])

ind_names = OrderedDict([
    ('seat', ["seat4", "seat5"]),
    ("trans", ["auto"]),
    ("convert", ["convertYes"]),
    ("price", ["price35k", "price40k"])
])

# ind_vars = OrderedDict()

# Create the Multinomial Logit model
model = pl.create_choice_model(data=sport_car_df,
                               alt_id_col='alt',
                               obs_id_col='resp_id',
                               choice_col='choice',
                               specification=ind_vars,
                               model_type="MNL",
                               names=ind_names)

model.fit_mle(init_vals=np.zeros(6))
model.get_statsmodels_summary()


Log-likelihood at zero: -6,802.3948
Initial Log-likelihood: -6,802.3948
Estimation Time for Point Estimation: 0.14 seconds.
Final log-likelihood: -6,802.3948


  warn('Method %s does not use Hessian information (hess).' % method,


0,1,2,3
Dep. Variable:,choice,No. Observations:,200.0
Model:,Multinomial Logit Model,Df Residuals:,194.0
Method:,MLE,Df Model:,6.0
Date:,"Sun, 20 Oct 2024",Pseudo R-squ.:,0.0
Time:,05:18:45,Pseudo R-bar-squ.:,-0.001
AIC:,13616.790,Log-Likelihood:,-6802.395
BIC:,13636.579,LL-Null:,-6802.395

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
seat4,0,0.134,0,1.000,-0.263,0.263
seat5,0,0.126,0,1.000,-0.246,0.246
auto,0,0.230,0,1.000,-0.450,0.450
convertYes,0,0.232,0,1.000,-0.454,0.454
price35k,0,0.136,0,1.000,-0.267,0.267
price40k,0,0.125,0,1.000,-0.246,0.246


In [20]:
model.print_summaries()



Number of Parameters                                                      6
Number of Observations                                                  200
Null Log-Likelihood                                            -6802.394763
Fitted Log-Likelihood                                          -6802.394763
Rho-Squared                                                             0.0
Rho-Bar-Squared                                                   -0.000882
Estimation Message        Desired error not necessarily achieved due to ...
dtype: object
            parameters   std_err  t_stats  p_values  robust_std_err  \
seat4              0.0  0.134238      0.0       1.0        0.586011   
seat5              0.0  0.125514      0.0       1.0        0.819738   
auto               0.0  0.229685      0.0       1.0        1.162494   
convertYes         0.0  0.231666      0.0       1.0        0.816577   
price35k           0.0  0.136019      0.0       1.0        0.352563   
price40k           0.0  0.