In [1]:
# coding=utf-8

### 基本参数设置

设置正交试验设计中每个样本的变量情况

In [2]:
# 所有水平列表
LEVEL_LIST = ["A", "B", "C", "D", "E", "F"]
LEVEL_NAME = ["颜值", "成绩", "爱好", "年龄", "身高", "家庭"]
# 所有因素列表
FACTORY_LIST = ['A1', 'A2', 'A3', 'B1', 'B2', 'B3',
                'C1', 'C2', 'C3', 'D1', 'D2', 'D3',
                'E1', 'E2', 'E3', 'F1', 'F2', 'F3']
FACTORY_NAME = ["高", "一般", "低", "优秀", "一般", "较差",
                "相似", "互补", "毫不相关", "更大", "同级", "更小",
                "身高", "相似", "更低", "更好", "一般", "不那么好"]
FACTORY_LEVEL_NAME = ["颜值", "颜值", "颜值", "成绩", "成绩", "成绩",
                      "爱好", "爱好", "爱好", "年龄", "年龄", "年龄",
                      "身高", "身高", "身高", "家庭", "家庭", "家庭"]
# 正交试验设计结构
CASE_STRUCTURE = ['A2B2C1D2E2F3', 'A1B3C3D2E2F2', 'A3B1C1D2E1F3', 'A3B1C3D2E3F2', 'A2B1C2D3E1F2', 'A1B2C1D3E1F2',
                  'A1B1C2D1E2F3', 'A2B1C3D3E2F1', 'A1B2C3D3E3F3', 'A3B3C2D3E3F3', 'A2B3C1D1E3F2', 'A2B2C2D2E3F1',
                  'A3B2C3D1E1F1', 'A3B3C1D3E2F1', 'A1B1C1D1E3F1', 'A3B2C2D1E2F2', 'A1B3C2D2E1F1', 'A2B3C3D1E1F3']

给定正交试验样本的得分情况

In [3]:
MARK = [3, 4, 8, 2, 1, 2, 4, 7, 6, 4, 7, 6, 2, 7, 6, 10, 3, 9]

### 联合分析

1.导入包

In [4]:
import numpy as np
import pandas as pd
import statsmodels.api as sm

2.生成初始化个案表

In [5]:
columns_list = ["Mark"]
columns_list.extend(FACTORY_LIST)
ConjointDummyDF = pd.DataFrame(np.zeros((len(CASE_STRUCTURE), len(FACTORY_LIST) + 1)), columns=columns_list)

3.生成个案得分对应表

In [6]:
markDF = pd.DataFrame({"Level": CASE_STRUCTURE, "Mark": MARK})

4.将正交设计分布写入个案表

In [7]:
for index, row in markDF.iterrows():
    factory = []
    for i in range(0, len(LEVEL_LIST)):
        factory.append(markDF["Level"].loc[index][i * 2:i * 2 + 2])
    ConjointDummyDF.loc[index, factory] = 1

5.将得分写入个案表

In [8]:
ConjointDummyDF["Mark"] = markDF["Mark"]

6.回归计算

In [9]:
# 计算回归分析的X和Y
X = sm.add_constant(ConjointDummyDF[FACTORY_LIST])
Y = ConjointDummyDF["Mark"]
# 执行回归分析
linearRegression = sm.OLS(Y, X).fit()

7.计算效用估计表

In [10]:
# 生成按组存储的回归系数
level_utilities = []
for level in LEVEL_LIST:
    temp_utility = []
    for factory in FACTORY_LIST:
        if factory[0] == level:
            temp_utility.append(linearRegression.params[factory])
    level_utilities.append(temp_utility)

In [11]:
# 计算各水平的重要性(回归系数极差)
importance = []
for item in level_utilities:
    importance.append(max(item) - min(item))

In [12]:
# 计算各水平的重要性百分比
relative_importance = []
for item in importance:
    relative_importance.append(100 * round(item / sum(importance), 3))

In [13]:
# 计算各因素的平均得分
meanMark = []
for i in ConjointDummyDF.columns[1:]:
    newMeanMark = ConjointDummyDF["Mark"].loc[ConjointDummyDF[i] == 1].mean()
    meanMark.append(newMeanMark)

In [14]:
# 计算各因素平均得分
totalMeanMark = sum(meanMark) / len(meanMark)

In [15]:
# 计算各因素效用值
utility = []
for i in range(len(meanMark)):
    name = sorted(FACTORY_LIST)[i]
    utility.append(meanMark[i] - totalMeanMark)

In [16]:
tableUtilities = pd.DataFrame({"水平": FACTORY_LEVEL_NAME, "因素": FACTORY_NAME, "效用值": utility})
tableUtilities

Unnamed: 0,水平,因素,效用值
0,颜值,高,-0.888889
1,颜值,一般,0.444444
2,颜值,低,0.444444
3,成绩,优秀,-0.388889
4,成绩,一般,-0.222222
5,成绩,较差,0.611111
6,爱好,相似,0.444444
7,爱好,互补,-0.388889
8,爱好,毫不相关,-0.055556
9,年龄,更大,1.277778


8.生成水平重要性表

In [17]:
tableImportance = pd.DataFrame({"水平": LEVEL_NAME, "重要性": relative_importance})
tableImportance

Unnamed: 0,水平,重要性
0,颜值,16.3
1,成绩,12.2
2,爱好,10.2
3,年龄,24.5
4,身高,20.4
5,家庭,16.3
