# Discrete Choice Based Conjoint analysis

1.Read the data

In [2]:
import pandas as pd
df=pd.read_csv('sportscar_choice_long.csv')
df.head(10)

Unnamed: 0,resp_id,ques,alt,segment,seat,trans,convert,price,choice
0,1,1,1,basic,2,manual,yes,35,0
1,1,1,2,basic,5,auto,no,40,0
2,1,1,3,basic,5,auto,no,30,1
3,1,2,1,basic,5,manual,no,35,0
4,1,2,2,basic,2,manual,no,30,1
5,1,2,3,basic,4,auto,no,35,0
6,1,3,1,basic,5,auto,yes,35,1
7,1,3,2,basic,4,auto,yes,30,0
8,1,3,3,basic,4,manual,no,40,0
9,1,4,1,basic,2,manual,yes,30,0


In [3]:
df.columns

Index(['resp_id', 'ques', 'alt', 'segment', 'seat', 'trans', 'convert',
       'price', 'choice'],
      dtype='object')

# Full factorial design


In [4]:
import itertools

# Define attribute levels
attribute1_levels = ['basic', 'fun', 'racer']
attribute2_levels = [2, 3, 4]
attribute3_levels = ['manual','autom']
attribute4_levels=['yes','no']
attribute5_levels=[30,35,40]

# Create a list of attribute levels
attribute_levels = [attribute1_levels, attribute2_levels, attribute3_levels,attribute4_levels,attribute5_levels]

# Generate full factorial design
full_factorial_design = list(itertools.product(*attribute_levels))

# Print the generated full factorial design
for combination in full_factorial_design:
    print(combination)

('basic', 2, 'manual', 'yes', 30)
('basic', 2, 'manual', 'yes', 35)
('basic', 2, 'manual', 'yes', 40)
('basic', 2, 'manual', 'no', 30)
('basic', 2, 'manual', 'no', 35)
('basic', 2, 'manual', 'no', 40)
('basic', 2, 'autom', 'yes', 30)
('basic', 2, 'autom', 'yes', 35)
('basic', 2, 'autom', 'yes', 40)
('basic', 2, 'autom', 'no', 30)
('basic', 2, 'autom', 'no', 35)
('basic', 2, 'autom', 'no', 40)
('basic', 3, 'manual', 'yes', 30)
('basic', 3, 'manual', 'yes', 35)
('basic', 3, 'manual', 'yes', 40)
('basic', 3, 'manual', 'no', 30)
('basic', 3, 'manual', 'no', 35)
('basic', 3, 'manual', 'no', 40)
('basic', 3, 'autom', 'yes', 30)
('basic', 3, 'autom', 'yes', 35)
('basic', 3, 'autom', 'yes', 40)
('basic', 3, 'autom', 'no', 30)
('basic', 3, 'autom', 'no', 35)
('basic', 3, 'autom', 'no', 40)
('basic', 4, 'manual', 'yes', 30)
('basic', 4, 'manual', 'yes', 35)
('basic', 4, 'manual', 'yes', 40)
('basic', 4, 'manual', 'no', 30)
('basic', 4, 'manual', 'no', 35)
('basic', 4, 'manual', 'no', 40)
('basic

# Creation of choice set using Simple Randomisation

In [5]:
import random

attribute1_levels = ['basic', 'fun', 'racer']
attribute2_levels = [2, 3, 4]
attribute3_levels = ['manual','autom']
attribute4_levels=['yes','no']
attribute5_levels=[30,35,40]

# Define the number of choice sets to generate
num_choice_sets = 5

# Generate random choice sets
choice_sets = []
for _ in range(num_choice_sets):
    choice_set = {
        'attribute1': random.choice(attribute1_levels),
        'attribute2': random.choice(attribute2_levels),
        'attribute3': random.choice(attribute3_levels),
        'attribute4': random.choice(attribute4_levels),
        'attribute5': random.choice(attribute5_levels)
    }
    choice_sets.append(choice_set)

# Print the generated choice sets
for idx, choice_set in enumerate(choice_sets):
    print(f"Choice Set {idx+1}: {choice_set}")

Choice Set 1: {'attribute1': 'racer', 'attribute2': 2, 'attribute3': 'autom', 'attribute4': 'no', 'attribute5': 35}
Choice Set 2: {'attribute1': 'racer', 'attribute2': 2, 'attribute3': 'autom', 'attribute4': 'yes', 'attribute5': 30}
Choice Set 3: {'attribute1': 'racer', 'attribute2': 4, 'attribute3': 'manual', 'attribute4': 'yes', 'attribute5': 40}
Choice Set 4: {'attribute1': 'racer', 'attribute2': 2, 'attribute3': 'autom', 'attribute4': 'no', 'attribute5': 40}
Choice Set 5: {'attribute1': 'fun', 'attribute2': 3, 'attribute3': 'manual', 'attribute4': 'no', 'attribute5': 40}


# Model Building using Generalised Linear Model to find the coeffients associated with each of the feature

In [6]:

import pandas as pd
import statsmodels.api as sm
from statsmodels.genmod.families import Binomial
from statsmodels.genmod.families.links import logit

In [7]:
 (df.select_dtypes(exclude=['int64', 'float64'])).columns

Index(['segment', 'trans', 'convert'], dtype='object')

In [9]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.genmod.families import Binomial
from statsmodels.genmod.families.links import logit

#one hot encoding done on the dataset 
df = pd.get_dummies(df, columns=['segment','trans','convert'])

X = df.drop({'choice'},axis=1)
y = df['choice']

model = sm.GLM(y, X, family=Binomial(link=logit()))


results = model.fit()
print(results.summary())

coefficients = results.params
print(coefficients)

                 Generalized Linear Model Regression Results                  
Dep. Variable:                 choice   No. Observations:                 6000
Model:                            GLM   Df Residuals:                     5990
Model Family:                Binomial   Df Model:                            9
Link Function:                  logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -3354.7
Date:                Fri, 23 Jun 2023   Deviance:                       6709.5
Time:                        01:44:52   Pearson chi2:                 6.01e+03
No. Iterations:                     4   Pseudo R-squ. (CS):             0.1434
Covariance Type:            nonrobust                                         
                    coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
resp_id           0.0001      0.001      0.196

---------------------------------------------------------------------------------------------------------------