In [18]:
import numpy as np
import pandas as pd

import statsmodels.api as sm

import warnings

warnings.simplefilter("ignore")

In [19]:
# данные
caInputeDF = pd.read_csv("ConjointInput.csv", sep = ";")

Rank - это 3 вида параметров:

- 1 - это TV Size, TV Type, TV Color с параметрами 32",37",42"
- 2 (LCD, Plasma)
- 3 (Black, Silver, Anthrazit)

Назовем это варианты стимулирования к покупкам

In [20]:
caInputeDF.head(5)

Unnamed: 0,Stimulus,Rank
0,A1B1C1,2
1,A1B1C2,3
2,A1B1C3,1
3,A1B2C1,5
4,A1B2C2,6


### Преобразуем в DataFrame с описанием комбинаций

In [21]:
ConjointDummyDF = pd.DataFrame(np.zeros((18,9)), 
                               columns=["Rank","A1", "A2", "A3",
                                        "B1","B2","C1", "C2","C3"])

In [22]:
# сделаем Dummy кодирование 
ConjointDummyDF.Rank = caInputeDF.Rank

for index, row in caInputeDF.iterrows(): 
    stimuli1, stimuli2, stimuli3 = caInputeDF["Stimulus"].iloc[index][:2], \
                                   caInputeDF["Stimulus"].iloc[index][2:4], \
                                   caInputeDF["Stimulus"].iloc[index][4:6]
    
    
    ConjointDummyDF.loc[index, [stimuli1,stimuli2,stimuli3]] = 1

In [23]:
ConjointDummyDF.head(5)

Unnamed: 0,Rank,A1,A2,A3,B1,B2,C1,C2,C3
0,2,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
1,3,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
2,1,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
3,5,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
4,6,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0


### Заменим имена "стимулов к покупке"

In [24]:
fullNames = {"Rank":"Rank",
             "A1": "32\" (81cm)",
             "A2": "37\" (94cm)",
             "A3": "42\" (107cm)",
             "B1": "Plasma", 
             "B2":"LCD", 
             "C1":"Silver", 
             "C2":"Black", 
             "C3": "Anthrazit"}

ConjointDummyDF.rename(columns=fullNames, inplace=True)

In [25]:
ConjointDummyDF.head(5)

Unnamed: 0,Rank,"32"" (81cm)","37"" (94cm)","42"" (107cm)",Plasma,LCD,Silver,Black,Anthrazit
0,2,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
1,3,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
2,1,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
3,5,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
4,6,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0


### Оценка эффекта (Линейная регрессия


Мы можем оценить каждую переменную и посмотреть на ей вклад в рейтинг

In [26]:
ConjointDummyDF.columns

Index(['Rank', '32" (81cm)', '37" (94cm)', '42" (107cm)', 'Plasma', 'LCD',
       'Silver', 'Black', 'Anthrazit'],
      dtype='object')

In [27]:
X = ConjointDummyDF[ConjointDummyDF.columns[1:]]
X = sm.add_constant(X)
Y = ConjointDummyDF.Rank
linearRegression = sm.OLS(Y, X). fit()
linearRegression.summary()

0,1,2,3
Dep. Variable:,Rank,R-squared:,1.0
Model:,OLS,Adj. R-squared:,1.0
Method:,Least Squares,F-statistic:,2.535e+30
Date:,"Thu, 30 Sep 2021",Prob (F-statistic):,1.0500000000000001e-179
Time:,19:47:53,Log-Likelihood:,567.02
No. Observations:,18,AIC:,-1122.0
Df Residuals:,12,BIC:,-1117.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,4.3846,6.73e-16,6.52e+15,0.000,4.385,4.385
"32"" (81cm)",-4.5385,2.07e-15,-2.19e+15,0.000,-4.538,-4.538
"37"" (94cm)",1.4615,2.07e-15,7.05e+14,0.000,1.462,1.462
"42"" (107cm)",7.4615,2.07e-15,3.6e+15,0.000,7.462,7.462
Plasma,0.6923,1.5e-15,4.63e+14,0.000,0.692,0.692
LCD,3.6923,1.5e-15,2.47e+15,0.000,3.692,3.692
Silver,1.4615,2.07e-15,7.05e+14,0.000,1.462,1.462
Black,2.4615,2.07e-15,1.19e+15,0.000,2.462,2.462
Anthrazit,0.4615,2.07e-15,2.23e+14,0.000,0.462,0.462

0,1,2,3
Omnibus:,0.444,Durbin-Watson:,0.439
Prob(Omnibus):,0.801,Jarque-Bera (JB):,0.543
Skew:,-0.284,Prob(JB):,0.762
Kurtosis:,2.366,Cond. No.,1.89e+16


### Важность стимулов


`Важность = Max(beta) - Min(beta)`


`Относительная важность = Важность  / Sum(Важность каждого признака)`

In [28]:
importance = []
relative_importance = []

rangePerFeature = []

begin = "A"
tempRange = []
for stimuli in fullNames.keys():
    if stimuli[0] == begin:
        tempRange.append(linearRegression.params[fullNames[stimuli]])
    elif stimuli == "Rank":
        rangePerFeature.append(tempRange)
    else:
        rangePerFeature.append(tempRange)
        begin = stimuli[0]
        tempRange = [linearRegression.params[fullNames[stimuli]]]
        
importance, relative_importance, rangePerFeature

([],
 [],
 [[-4.538461538461542, 1.4615384615384608, 7.461538461538465],
  [-4.538461538461542, 1.4615384615384608, 7.461538461538465],
  [0.6923076923076914, 3.692307692307696]])

In [29]:
# расчет важности
for item in rangePerFeature:
    importance.append(max(item) - min(item))

# относительная важность
for item in importance:
    relative_importance.append(100* round(item/sum(importance),3))

In [30]:
# оценка наиболее значимых
partworths = []

item_levels = [1,3,5,8]

for i in range(1,4):
    part_worth_range = linearRegression.params[item_levels[i-1]:item_levels[i]]
    print(part_worth_range)
    print('\n')

32" (81cm)   -4.538462
37" (94cm)    1.461538
dtype: float64


42" (107cm)    7.461538
Plasma         0.692308
dtype: float64


LCD       3.692308
Silver    1.461538
Black     2.461538
dtype: float64




In [31]:

meanRank = []
for i in ConjointDummyDF.columns[1:]:
    newmeanRank = ConjointDummyDF["Rank"].loc[ConjointDummyDF[i] == 1].mean()
    meanRank.append(newmeanRank)
print(meanRank)
    
# расчет "базовой полезности" (utility) | в примере "Максимальная цена"
totalMeanRank = sum(meanRank) / len(meanRank)
print('Utility', totalMeanRank)

# составные части цены
partWorths = {}
for i in range(len(meanRank)):
    name = fullNames[sorted(fullNames.keys())[i]]
    partWorths[name] = meanRank[i] - totalMeanRank
partWorths

[3.5, 9.5, 15.5, 8.0, 11.0, 9.5, 10.5, 8.5]
Utility 9.5


{'32" (81cm)': -6.0,
 '37" (94cm)': 0.0,
 '42" (107cm)': 6.0,
 'Plasma': -1.5,
 'LCD': 1.5,
 'Silver': 0.0,
 'Black': 1.0,
 'Anthrazit': -1.0}

### Результат

In [32]:
print("Важные параметры для принятия решения:\n\nРазмер:",relative_importance[0], "%",\
      "\nТип монитора:", relative_importance[1], "%", \
      "\nЦвет:", relative_importance[2], "%\n\n")

Важные параметры для принятия решения:

Размер: 44.4 % 
Тип монитора: 44.4 % 
Цвет: 11.1 %




Какой будет оптимальная цена для продукта с параметрами:

- 42", LCD, Black

In [34]:
#const	32" (81cm)	37" (94cm)	42" (107cm)	Plasma	LCD	Silver	Black	Anthrazit

const = 1
d32 = 0
d37 = 0
d42 = 1
plasma =0
lcd = 1
silver =0
black =1
anthrazit =0

optBundle = [const, d32, d37, d42, plasma, lcd, silver, black, anthrazit]
print("Данная комбинация стимулов получит максимальну оценку:", linearRegression.predict(optBundle)[0])

Данная комбинация стимулов получит максимальну оценку: 18.0


Расчет допольнительные средств, которые принесет данная цена

In [35]:
#расчет на том же наборе

optimalWorth = partWorths["42\" (107cm)"] + partWorths["LCD"] + partWorths["Black"]

print("На данной комбинации мы получим ", optimalWorth, "дополнительных 'средств' к товару")

На данной комбинации мы получим  8.5 дополнительных 'средств' к товару
