In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.interpolate import griddata
from mpl_toolkits.mplot3d import Axes3D
import scipy.stats as stats
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.formula.api import ols
from scipy.stats import levene
from scipy.stats import kstest
from statsmodels.stats.anova import anova_lm#方差分析
from statsmodels.stats.multicomp import pairwise_tukeyhsd#两两比较
from statsmodels.stats.outliers_influence import variance_inflation_factor
import pingouin as pg
import re
from scipy.optimize import curve_fit
import palettable
from palettable.cartocolors.qualitative import Bold_9

# matplotlib 图形正常显示中文及负号
plt.rcParams['font.sans-serif'] = ['Times New Roman']
plt.rcParams['axes.unicode_minus'] = False    # 用来正常显示负号
parameters = {'axes.labelsize': 15,
          'axes.titlesize': 20,'xtick.labelsize':13,'ytick.labelsize':13}
plt.rcParams.update(parameters)
#3d绘图内嵌可旋转
#%matplotlib notebook
#3d绘图单独窗口
%matplotlib qt5
%config InlineBackend.figure_format = 'svg'#矢量图设置，设定显示图片的分辨率

pd.set_option('display.max_rows',500)
pd.set_option('display.max_columns',500)
pd.set_option('display.width',1000)

In [4]:
def review_count_iter(data):
    for index in data.index:
        if index % 1000 == 0:
            print(index)

        # 处理'评论1'列
        data.at[index, '评论1'] = [float(x) for x in data.at[index, '评论1'].strip('[]').split()]

        # 处理'评论2'列
        data.at[index, '评论2'] = [float(x) for x in data.at[index, '评论2'].strip('[]').split()]

    return data

def review_compute1(row):
    if row['评论1_lag1']:
        return list(np.array(row['评论1'])-np.array(row['评论1_lag1']))
    else:
        return row['评论1']
               
def review_compute2(row):
    if row['评论2_lag1']:
        return list(np.array(row['评论2'])-np.array(row['评论2_lag1']))
    else:
        return row['评论2']

def create_lag_column(group):
    print(group[group['step']==1][['提供方1类型', '提供方2类型','回应偏好1', '回应偏好2']].iloc[0])
    group['评论1_lag1'] = group['评论1'].shift(1)
    group['评论2_lag1'] = group['评论2'].shift(1)
    group.loc[1:,['评论1_当期']]=group.loc[1:,['评论1','评论1_lag1']].apply(review_compute1,axis=1)
    group.loc[1:,['评论2_当期']]=group.loc[1:,['评论2','评论2_lag1']].apply(review_compute2,axis=1)
    return group

def review_perception_iter(data):
    b_weight = 10
    b = b_weight * np.array([0.11, -0.15, 0.17, 0, 0, -0.23])

    # Iterate over each row in the DataFrame
    for index in data.index:
        if index % 1000 == 0:
            print(index)
        if data.at[index, '评论1_lag1']:  # Check if '评论1_lag1' is not empty
            perception_value1 = np.dot(np.array(data.at[index, '评论1_lag1']) / sum(np.array(data.at[index, '评论1_lag1'])), b.T)
        else:
            perception_value1 = 0
            
        if data.at[index, '评论2_lag1']:
            perception_value2 = np.dot(np.array(data.at[index, '评论2_lag1'])/(sum(np.array(data.at[index, '评论2_lag1']))), b.T)
        else:
            perception_value2 = 0

        # Update the '评论感知1' column for the current row
        data.at[index, '评论感知1'] = perception_value1
        data.at[index, '评论感知2'] = perception_value2
    return data

def review_perception_iter_(data):
    b_weight = 10
    b = b_weight * np.array([0.11, -0.15, 0.17, 0, 0, -0.23])

    # Iterate over each row in the DataFrame
    for index in data.index:
        if index % 1000 == 0:
            print(index)
        if data.at[index, '评论1']:  # Check if '评论1_lag1' is not empty
            perception_value1 = np.dot(np.array(data.at[index, '评论1']) / sum(np.array(data.at[index, '评论1'])), b.T)
        else:
            perception_value1 = 0
            
        if data.at[index, '评论2']:
            perception_value2 = np.dot(np.array(data.at[index, '评论2'])/(sum(np.array(data.at[index, '评论2']))), b.T)
        else:
            perception_value2 = 0

        # Update the '评论感知1' column for the current row
        data.at[index, '评论感知1_'] = perception_value1
        data.at[index, '评论感知2_'] = perception_value2
    return data

In [5]:
data=pd.read_csv(r'C:\Users\11054\.spyder-py3\my_project\在线评论\在线评论-2024-9-14\数据-2024-9-14\data_all_扩充.csv')
data['平均好评激励金额']=(data['好评激励1']+data['好评激励2'])/2
data['平均夸大宣传程度']=(data['夸大宣传程度1']+data['夸大宣传程度2'])/2
data['平均回应偏好']=(data['回应偏好1']+data['回应偏好2'])/2
data['平均价格']=(data['价格1']+data['价格2'])/2
data['平均服务质量']=(data['服务质量1']+data['服务质量2'])/2
data['平均毛利率']=data['平均价格']-data['平均服务质量']
data['平均净利率']=(1-0.15)*data['平均价格']-data['平均服务质量']
data['毛利率1']=data['价格1']-data['服务质量1']
data['毛利率2']=data['价格2']-data['服务质量2']
data['净利率1']=(1-0.15)*data['价格1']-data['服务质量1']
data['净利率2']=(1-0.15)*data['价格2']-data['服务质量2']
data['消费者平均收益1']=data['消费者收益1']/data['服务人数1']
data['消费者平均收益2']=data['消费者收益2']/data['服务人数2']
data = review_count_iter(data)

0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
37000
38000
39000
40000
41000
42000
43000
44000
45000
46000
47000
48000
49000
50000
51000
52000
53000
54000
55000
56000
57000
58000
59000
60000
61000
62000
63000
64000
65000
66000
67000
68000
69000
70000
71000
72000
73000
74000
75000
76000
77000
78000
79000
80000
81000
82000
83000
84000
85000
86000
87000
88000
89000
90000
91000
92000
93000
94000
95000
96000
97000
98000
99000
100000
101000
102000
103000
104000
105000
106000
107000
108000
109000
110000
111000
112000
113000
114000
115000
116000
117000
118000
119000
120000
121000
122000
123000
124000
125000
126000
127000
128000
129000
130000
131000
132000
133000
134000
135000
136000
137000
138000
139000
140000
141000
142000
143000
144000
145000
146000
147000
148000
149000
150000
151000
152000
153000
154000
155000
156000
157000
158000


1165000
1166000
1167000
1168000
1169000
1170000
1171000
1172000
1173000
1174000
1175000
1176000
1177000
1178000
1179000
1180000
1181000
1182000
1183000
1184000
1185000
1186000
1187000
1188000
1189000
1190000
1191000
1192000
1193000
1194000
1195000
1196000
1197000
1198000
1199000
1200000
1201000
1202000
1203000
1204000
1205000
1206000
1207000
1208000
1209000
1210000
1211000
1212000
1213000
1214000
1215000
1216000
1217000
1218000
1219000
1220000
1221000
1222000
1223000
1224000
1225000
1226000
1227000
1228000
1229000
1230000
1231000
1232000
1233000
1234000
1235000
1236000
1237000
1238000
1239000
1240000
1241000
1242000
1243000
1244000
1245000
1246000
1247000
1248000
1249000
1250000
1251000
1252000
1253000
1254000
1255000
1256000
1257000
1258000
1259000
1260000
1261000
1262000
1263000
1264000
1265000
1266000
1267000
1268000
1269000
1270000
1271000
1272000
1273000
1274000
1275000
1276000
1277000
1278000
1279000
1280000
1281000
1282000
1283000
1284000
1285000
1286000
1287000
1288000
1289000


2192000
2193000
2194000
2195000
2196000
2197000
2198000
2199000
2200000
2201000
2202000
2203000
2204000
2205000
2206000
2207000
2208000
2209000
2210000
2211000
2212000
2213000
2214000
2215000
2216000
2217000
2218000
2219000
2220000
2221000
2222000
2223000
2224000
2225000
2226000
2227000
2228000
2229000
2230000
2231000
2232000
2233000
2234000
2235000
2236000
2237000
2238000
2239000
2240000
2241000
2242000
2243000
2244000
2245000
2246000
2247000
2248000
2249000
2250000
2251000
2252000
2253000
2254000
2255000
2256000
2257000
2258000
2259000
2260000
2261000
2262000
2263000
2264000
2265000
2266000
2267000
2268000
2269000
2270000
2271000
2272000
2273000
2274000
2275000
2276000
2277000
2278000
2279000
2280000
2281000
2282000
2283000
2284000
2285000
2286000
2287000
2288000
2289000
2290000
2291000
2292000
2293000
2294000
2295000
2296000
2297000
2298000
2299000
2300000
2301000
2302000
2303000
2304000
2305000
2306000
2307000
2308000
2309000
2310000
2311000
2312000
2313000
2314000
2315000
2316000


In [6]:
data['评论1_当期']=data['评论1']
data['评论2_当期']=data['评论2']
data=data.groupby(['提供方1类型', '提供方2类型','回应偏好1', '回应偏好2']).apply(create_lag_column)
data.reset_index(drop=True, inplace=True)

提供方1类型    0.0
提供方2类型    0.0
回应偏好1     0.1
回应偏好2     0.1
Name: 0, dtype: float64
提供方1类型    0.0
提供方2类型    0.0
回应偏好1     0.1
回应偏好2     0.5
Name: 100000, dtype: float64
提供方1类型    0.0
提供方2类型    0.0
回应偏好1     0.1
回应偏好2     0.9
Name: 200000, dtype: float64
提供方1类型    0.0
提供方2类型    0.0
回应偏好1     0.5
回应偏好2     0.1
Name: 300000, dtype: float64
提供方1类型    0.0
提供方2类型    0.0
回应偏好1     0.5
回应偏好2     0.5
Name: 400000, dtype: float64
提供方1类型    0.0
提供方2类型    0.0
回应偏好1     0.5
回应偏好2     0.9
Name: 500000, dtype: float64
提供方1类型    0.0
提供方2类型    0.0
回应偏好1     0.9
回应偏好2     0.1
Name: 600000, dtype: float64
提供方1类型    0.0
提供方2类型    0.0
回应偏好1     0.9
回应偏好2     0.5
Name: 700000, dtype: float64
提供方1类型    0.0
提供方2类型    0.0
回应偏好1     0.9
回应偏好2     0.9
Name: 800000, dtype: float64
提供方1类型    0.0
提供方2类型    1.0
回应偏好1     0.1
回应偏好2     0.1
Name: 900000, dtype: float64
提供方1类型    0.0
提供方2类型    1.0
回应偏好1     0.1
回应偏好2     0.5
Name: 1000000, dtype: float64
提供方1类型    0.0
提供方2类型    1.0
回应偏好1     0.1
回应偏好2     0.9
Name: 1100000

In [7]:
data['总评论数量1']=data['评论1_当期'].apply(sum)
data['总评论数量2']=data['评论2_当期'].apply(sum)
data['总评论数量']=data['总评论数量1']+data['总评论数量2']
data['好评数量1']=data['评论1_当期'].apply(lambda x:x[0]+x[2]+x[4])
data['好评数量2']=data['评论2_当期'].apply(lambda x:x[0]+x[2]+x[4])
data['好评总数']=data['好评数量1']+data['好评数量2']
data['当期好评占比1']=data['好评数量1']/data['总评论数量1']
data['当期好评占比2']=data['好评数量2']/data['总评论数量2']
data['被激励好评占好评比例1']=data['被激励好评数量1']/data['好评数量1']
data['被激励好评占好评比例2']=data['被激励好评数量2']/data['好评数量2']
data['被激励好评占好评比例']=(data['被激励好评占好评比例1']+data['被激励好评占好评比例2'])/2
data['获得激励评论数量占评论比例1']=data['获得激励数量1']/data['总评论数量1']
data['获得激励评论数量占评论比例2']=data['获得激励数量2']/data['总评论数量2']
data['获得激励评论数量占评论比例']=(data['获得激励评论数量占评论比例1']+data['获得激励评论数量占评论比例2'])/2

In [8]:
data = review_perception_iter(data)
data = review_perception_iter_(data)

0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
37000
38000
39000
40000
41000
42000
43000
44000
45000
46000
47000
48000
49000
50000
51000
52000
53000
54000
55000
56000
57000
58000
59000
60000
61000
62000
63000
64000
65000
66000
67000
68000
69000
70000
71000
72000
73000
74000
75000
76000
77000
78000
79000
80000
81000
82000
83000
84000
85000
86000
87000
88000
89000
90000
91000
92000
93000
94000
95000
96000
97000
98000
99000
100000
101000
102000
103000
104000
105000
106000
107000
108000
109000
110000
111000
112000
113000
114000
115000
116000
117000
118000
119000
120000
121000
122000
123000
124000
125000
126000
127000
128000
129000
130000
131000
132000
133000
134000
135000
136000
137000
138000
139000
140000
141000
142000
143000
144000
145000
146000
147000
148000
149000
150000
151000
152000
153000
154000
155000
156000
157000
158000


1165000
1166000
1167000
1168000
1169000
1170000
1171000
1172000
1173000
1174000
1175000
1176000
1177000
1178000
1179000
1180000
1181000
1182000
1183000
1184000
1185000
1186000
1187000
1188000
1189000
1190000
1191000
1192000
1193000
1194000
1195000
1196000
1197000
1198000
1199000
1200000
1201000
1202000
1203000
1204000
1205000
1206000
1207000
1208000
1209000
1210000
1211000
1212000
1213000
1214000
1215000
1216000
1217000
1218000
1219000
1220000
1221000
1222000
1223000
1224000
1225000
1226000
1227000
1228000
1229000
1230000
1231000
1232000
1233000
1234000
1235000
1236000
1237000
1238000
1239000
1240000
1241000
1242000
1243000
1244000
1245000
1246000
1247000
1248000
1249000
1250000
1251000
1252000
1253000
1254000
1255000
1256000
1257000
1258000
1259000
1260000
1261000
1262000
1263000
1264000
1265000
1266000
1267000
1268000
1269000
1270000
1271000
1272000
1273000
1274000
1275000
1276000
1277000
1278000
1279000
1280000
1281000
1282000
1283000
1284000
1285000
1286000
1287000
1288000
1289000


2193000
2194000
2195000
2196000
2197000
2198000
2199000
2200000
2201000
2202000
2203000
2204000
2205000
2206000
2207000
2208000
2209000
2210000
2211000
2212000
2213000
2214000
2215000
2216000
2217000
2218000
2219000
2220000
2221000
2222000
2223000
2224000
2225000
2226000
2227000
2228000
2229000
2230000
2231000
2232000
2233000
2234000
2235000
2236000
2237000
2238000
2239000
2240000
2241000
2242000
2243000
2244000
2245000
2246000
2247000
2248000
2249000
2250000
2251000
2252000
2253000
2254000
2255000
2256000
2257000
2258000
2259000
2260000
2261000
2262000
2263000
2264000
2265000
2266000
2267000
2268000
2269000
2270000
2271000
2272000
2273000
2274000
2275000
2276000
2277000
2278000
2279000
2280000
2281000
2282000
2283000
2284000
2285000
2286000
2287000
2288000
2289000
2290000
2291000
2292000
2293000
2294000
2295000
2296000
2297000
2298000
2299000
2300000
2301000
2302000
2303000
2304000
2305000
2306000
2307000
2308000
2309000
2310000
2311000
2312000
2313000
2314000
2315000
2316000
2317000


619000
620000
621000
622000
623000
624000
625000
626000
627000
628000
629000
630000
631000
632000
633000
634000
635000
636000
637000
638000
639000
640000
641000
642000
643000
644000
645000
646000
647000
648000
649000
650000
651000
652000
653000
654000
655000
656000
657000
658000
659000
660000
661000
662000
663000
664000
665000
666000
667000
668000
669000
670000
671000
672000
673000
674000
675000
676000
677000
678000
679000
680000
681000
682000
683000
684000
685000
686000
687000
688000
689000
690000
691000
692000
693000
694000
695000
696000
697000
698000
699000
700000
701000
702000
703000
704000
705000
706000
707000
708000
709000
710000
711000
712000
713000
714000
715000
716000
717000
718000
719000
720000
721000
722000
723000
724000
725000
726000
727000
728000
729000
730000
731000
732000
733000
734000
735000
736000
737000
738000
739000
740000
741000
742000
743000
744000
745000
746000
747000
748000
749000
750000
751000
752000
753000
754000
755000
756000
757000
758000
759000
760000
761000

1694000
1695000
1696000
1697000
1698000
1699000
1700000
1701000
1702000
1703000
1704000
1705000
1706000
1707000
1708000
1709000
1710000
1711000
1712000
1713000
1714000
1715000
1716000
1717000
1718000
1719000
1720000
1721000
1722000
1723000
1724000
1725000
1726000
1727000
1728000
1729000
1730000
1731000
1732000
1733000
1734000
1735000
1736000
1737000
1738000
1739000
1740000
1741000
1742000
1743000
1744000
1745000
1746000
1747000
1748000
1749000
1750000
1751000
1752000
1753000
1754000
1755000
1756000
1757000
1758000
1759000
1760000
1761000
1762000
1763000
1764000
1765000
1766000
1767000
1768000
1769000
1770000
1771000
1772000
1773000
1774000
1775000
1776000
1777000
1778000
1779000
1780000
1781000
1782000
1783000
1784000
1785000
1786000
1787000
1788000
1789000
1790000
1791000
1792000
1793000
1794000
1795000
1796000
1797000
1798000
1799000
1800000
1801000
1802000
1803000
1804000
1805000
1806000
1807000
1808000
1809000
1810000
1811000
1812000
1813000
1814000
1815000
1816000
1817000
1818000


In [9]:
data['val1']= (0.3 + data['服务质量1'] - data['价格1'])
data['expect1']=(0.35+0.65*data['评论感知1'])*(0.3 + (1 + data['夸大宣传程度1']) * (data['服务质量1']) - data['价格1'])
data['val2']= (0.3 + data['服务质量2'] - data['价格2'])
data['expect2']=(0.35+0.65*data['评论感知2'])*(0.3 + (1 + data['夸大宣传程度2']) * (data['服务质量2']) - data['价格2'])
data=data[data['step']>1]
data.columns

Index(['step', '提供方1类型', '提供方2类型', '提供方1利他程度', '提供方2利他程度', '夸大宣传程度1', '夸大宣传程度2', '好评激励1', '好评激励2', '价格1', '价格2', '服务质量1', '服务质量2', '服务人数1', '服务人数2', '总服务人数', '评论1', '评论2', '社会总福利', '平台收益', '提供方收益1', '提供方收益2', '提供方总收益', '消费者收益1', '消费者收益2', '消费者总收益', '消费者满意度1', '消费者满意度2', '消费者总满意度', '消费者剩余1', '消费者剩余2', '消费者总剩余', '实际到达人数', '办理入住数1', '办理入住数2', '办理入住总数', '被激励好评数量1', '被激励好评数量2', '获得激励数量1', '获得激励数量2', '好评占比1', '好评占比2', '总好评数量占比', '回应偏好1', '回应偏好2', 'random_seed', '平均好评激励金额', '平均夸大宣传程度', '平均回应偏好', '平均价格', '平均服务质量', '平均毛利率', '平均净利率', '毛利率1', '毛利率2', '净利率1', '净利率2', '消费者平均收益1', '消费者平均收益2', '评论1_当期', '评论2_当期', '评论1_lag1', '评论2_lag1', '总评论数量1', '总评论数量2', '总评论数量', '好评数量1', '好评数量2', '好评总数', '当期好评占比1', '当期好评占比2', '被激励好评占好评比例1', '被激励好评占好评比例2', '被激励好评占好评比例', '获得激励评论数量占评论比例1', '获得激励评论数量占评论比例2', '获得激励评论数量占评论比例', '评论感知1', '评论感知2', '评论感知1_', '评论感知2_', 'val1', 'expect1', 'val2', 'expect2'], dtype='object')

In [10]:
data.loc[(data['提供方1类型']==0) &(data['提供方2类型']==0),['market_type']]='pure-egoism'
data.loc[(data['提供方1类型']==0) &(data['提供方2类型']==1),['market_type']]='hybrid'
data.loc[(data['提供方1类型']==1) &(data['提供方2类型']==1),['market_type']]='pure-altruism'

In [11]:
data=data.rename(columns={'夸大宣传程度1':'OP1','夸大宣传程度2':'OP2','好评激励1':'PCIP1','好评激励2':'PCIP2',
                '回应偏好1':'PERP1','回应偏好2':'PERP2','价格1':'ARPP1','价格2':'ARPP2','服务质量1':'ARSP1','服务质量2':'ARSP2',
                '平均夸大宣传程度':'OP','平均好评激励金额':'PCIP','平均回应偏好':'PERP','平均价格':'ARPP','平均服务质量':'ARSP',
                '评论感知1':'RP1','评论感知2':'RP2','评论感知1_':'RP1_','评论感知2_':'RP2_'})

In [12]:
data.to_csv(r'C:\Users\11054\.spyder-py3\my_project\在线评论\在线评论-2024-9-14\数据-2024-9-14\data_all_processing_扩充.csv', index=False, encoding='utf-8-sig')

In [18]:
data=pd.read_csv(r'C:\Users\11054\.spyder-py3\my_project\在线评论\在线评论-2024-9-14\数据-2024-9-14\data_all_processing_扩充.csv')

In [13]:
data.columns=['step', '提供方2类型', '提供方1类型', '提供方2利他程度', '提供方1利他程度', 'OP2', 'OP1', 'PCIP2', 'PCIP1', 'ARPP2', 'ARPP1',
 'ARSP2', 'ARSP1', '服务人数2', '服务人数1', '总服务人数', '评论2', '评论1', '社会总福利', '平台收益', '提供方收益2', '提供方收益1',
 '提供方总收益', '消费者收益2', '消费者收益1', '消费者总收益','消费者满意度2', '消费者满意度1', '消费者总满意度',
 '消费者剩余2', '消费者剩余1', '消费者总剩余', '实际到达人数', '办理入住数2', '办理入住数1', '办理入住总数',
 '被激励好评数量2', '被激励好评数量1', '获得激励数量2', '获得激励数量1', '好评占比2', '好评占比1', '总好评数量占比',
 'PERP2', 'PERP1', 'random_seed','PCIP', 'OP', 'PERP', 'ARPP', 'ARSP', '平均毛利率', '平均净利率', '毛利率2', '毛利率1', '净利率2', '净利率1',
 '消费者平均收益2', '消费者平均收益1', '评论2_当期', '评论1_当期', '评论2_lag1', '评论1_lag1', '总评论数量2', '总评论数量1', '总评论数量',
 '好评数量2', '好评数量1', '好评总数', '当期好评占比2', '当期好评占比1', '被激励好评占好评比例2', '被激励好评占好评比例1',
 '被激励好评占好评比例', '获得激励评论数量占评论比例2', '获得激励评论数量占评论比例1', '获得激励评论数量占评论比例', 'RP2', 'RP1','RP2_', 'RP1_',
 'val2', 'expect2', 'val1', 'expect1', 'market_type']
data=data[data['market_type']!='hybrid']
data2=pd.read_csv(r'C:\Users\11054\.spyder-py3\my_project\在线评论\在线评论-2024-9-14\数据-2024-9-14\data_all_processing_扩充.csv')
data=pd.concat([data2,data])
data.to_csv(r'C:\Users\11054\.spyder-py3\my_project\在线评论\在线评论-2024-9-14\数据-2024-9-14\data_all_processing_modify_扩充.csv', index=False, encoding='utf-8-sig')

In [14]:
1

1

# 开始

## 策略间相关性

In [4]:
data=pd.read_csv(r'C:\Users\11054\.spyder-py3\my_project\在线评论\在线评论-2024-9-14\数据-2024-9-14\data_all_processing_modify.csv')

In [5]:
data

Unnamed: 0,step,提供方1类型,提供方2类型,提供方1利他程度,提供方2利他程度,OP1,OP2,PCIP1,PCIP2,ARPP1,ARPP2,ARSP1,ARSP2,服务人数1,服务人数2,总服务人数,评论1,评论2,社会总福利,平台收益,提供方收益1,提供方收益2,提供方总收益,消费者收益1,消费者收益2,消费者总收益,消费者满意度1,消费者满意度2,消费者总满意度,消费者剩余1,消费者剩余2,消费者总剩余,实际到达人数,办理入住数1,办理入住数2,办理入住总数,被激励好评数量1,被激励好评数量2,获得激励数量1,获得激励数量2,好评占比1,好评占比2,总好评数量占比,PERP1,PERP2,random_seed,PCIP,OP,PERP,ARPP,ARSP,平均毛利率,平均净利率,毛利率1,毛利率2,净利率1,净利率2,消费者平均收益1,消费者平均收益2,评论1_当期,评论2_当期,评论1_lag1,评论2_lag1,总评论数量1,总评论数量2,总评论数量,好评数量1,好评数量2,好评总数,当期好评占比1,当期好评占比2,被激励好评占好评比例1,被激励好评占好评比例2,被激励好评占好评比例,获得激励评论数量占评论比例1,获得激励评论数量占评论比例2,获得激励评论数量占评论比例,RP1,RP2,RP1_,RP2_,val1,expect1,val2,expect2,market_type
0,2,0,0,0.0,0.0,0.051062,0.282784,0.095311,0.156630,0.712080,0.328511,0.402185,0.064030,109,162,271,"[134.0, 3.0, 29.0, 0.0, 1.0, 0.0]","[139.0, 3.0, 33.0, 0.0, 5.0, 0.0]",255.528086,64.985227,46.602851,65.691926,112.294777,23.669992,54.578090,78.248082,-570.264625,-161.946990,-732.211615,18.601996,41.017781,59.619777,286,109,162,271,92,102,100,116,0.982036,0.983333,0.982709,0.1,0.1,1,0.125971,0.166923,0.1,0.520295,0.233107,0.287188,0.209144,0.309894,0.264481,0.203082,0.215205,0.217156,0.336902,"[80.0, 0.0, 19.0, 0.0, 1.0, 0.0]","[89.0, 0.0, 23.0, 0.0, 4.0, 0.0]","[54.0, 3.0, 10.0, 0.0, 0.0, 0.0]","[50.0, 3.0, 10.0, 0.0, 1.0, 0.0]",100.0,116.0,216.0,100.0,116.0,216.0,1.000000,1.000000,0.920000,0.879310,0.899655,1.000000,1.000000,1.000000,1.073134,1.054688,1.150898,1.136111,-0.009894,0.011148,0.035519,0.055531,pure-egoism
1,3,0,0,0.0,0.0,0.106447,0.044103,0.103443,0.041099,0.987426,0.295550,0.508210,0.006287,17,161,178,"[148.0, 3.0, 32.0, 0.0, 1.0, 0.0]","[223.0, 3.0, 51.0, 0.0, 6.0, 0.0]",167.487747,28.924501,10.020248,116.380809,126.401057,0.419451,11.742737,12.162189,-107.657632,-80.089010,-187.746642,-0.573875,7.556595,6.982720,281,0,161,161,17,25,17,103,0.983696,0.989399,0.987152,0.1,0.1,1,0.072271,0.075275,0.1,0.641488,0.257248,0.384240,0.288017,0.479216,0.289263,0.331102,0.244931,0.024674,0.072936,"[14.0, 0.0, 3.0, 0.0, 0.0, 0.0]","[84.0, 0.0, 18.0, 0.0, 1.0, 0.0]","[134.0, 3.0, 29.0, 0.0, 1.0, 0.0]","[139.0, 3.0, 33.0, 0.0, 5.0, 0.0]",17.0,103.0,120.0,17.0,103.0,120.0,1.000000,1.000000,1.000000,0.242718,0.621359,1.000000,1.000000,1.000000,1.150898,1.136111,1.155978,1.157244,-0.179216,-0.137391,0.010737,0.011988,pure-egoism
2,4,0,0,0.0,0.0,0.257582,0.150256,0.064396,0.023883,0.330831,0.546847,0.056278,0.262650,109,152,261,"[213.0, 3.0, 41.0, 0.0, 2.0, 0.0]","[266.0, 52.0, 62.0, 1.0, 6.0, 18.0]",239.130063,54.418998,68.946805,102.757424,171.704230,10.381248,2.625587,13.006835,-139.311478,-301.656578,-440.968055,8.346652,7.510779,15.857431,276,126,148,274,74,49,75,54,0.988417,0.824691,0.888554,0.1,0.1,1,0.044139,0.203919,0.1,0.438839,0.159464,0.279375,0.213549,0.274553,0.284197,0.224928,0.202170,0.095241,0.017274,"[65.0, 0.0, 9.0, 0.0, 1.0, 0.0]","[43.0, 49.0, 11.0, 1.0, 0.0, 18.0]","[148.0, 3.0, 32.0, 0.0, 1.0, 0.0]","[223.0, 3.0, 51.0, 0.0, 6.0, 0.0]",75.0,122.0,197.0,75.0,54.0,129.0,1.000000,0.442623,0.986667,0.907407,0.947037,1.000000,0.442623,0.721311,1.155978,1.157244,1.156371,0.687901,0.025447,0.043993,0.015803,0.060917,pure-egoism
3,5,0,0,0.0,0.0,0.210330,0.190476,0.009670,0.037143,0.433071,0.387292,0.153940,0.034914,160,13,173,"[243.0, 58.0, 46.0, 4.0, 2.0, 29.0]","[273.0, 52.0, 64.0, 1.0, 8.0, 18.0]",160.568817,36.595651,109.534806,11.317285,120.852091,2.494373,0.626702,3.121075,-260.003832,-37.868916,-297.872749,10.988653,0.916572,11.905225,258,160,0,160,35,11,35,11,0.761780,0.829327,0.796992,0.1,0.1,1,0.023407,0.200403,0.1,0.410181,0.094427,0.315754,0.254227,0.279131,0.352377,0.214170,0.294284,0.015590,0.048208,"[30.0, 55.0, 5.0, 4.0, 0.0, 29.0]","[7.0, 0.0, 2.0, 0.0, 2.0, 0.0]","[213.0, 3.0, 41.0, 0.0, 2.0, 0.0]","[266.0, 52.0, 62.0, 1.0, 6.0, 18.0]",123.0,11.0,134.0,35.0,11.0,46.0,0.284553,1.000000,1.000000,1.000000,1.000000,0.284553,1.000000,0.642276,1.156371,0.687901,0.502094,0.696394,0.020869,0.058659,-0.052377,-0.036451,pure-egoism
4,6,0,0,0.0,0.0,0.120879,0.217289,0.003077,0.106740,0.757431,0.468962,0.255387,0.161814,17,143,160,"[245.0, 64.0, 46.0, 4.0, 2.0, 35.0]","[385.0, 52.0, 94.0, 1.0, 9.0, 18.0]",152.272967,36.034197,13.272397,92.481735,105.754132,-0.140022,10.624661,10.484639,-25.878615,-592.777354,-618.655969,1.293890,-3.252182,-1.958292,289,0,160,160,1,143,2,143,0.739899,0.872987,0.817801,0.1,0.1,1,0.054908,0.169084,0.1,0.613197,0.208600,0.404596,0.312617,0.502045,0.307148,0.388430,0.236803,-0.008237,0.074298,"[2.0, 6.0, 0.0, 0.0, 0.0, 6.0]","[112.0, 0.0, 30.0, 0.0, 1.0, 0.0]","[243.0, 58.0, 46.0, 4.0, 2.0, 29.0]","[273.0, 52.0, 64.0, 1.0, 8.0, 18.0]",14.0,143.0,157.0,2.0,143.0,145.0,0.142857,1.000000,0.500000,1.000000,0.750000,0.142857,1.000000,0.571429,0.502094,0.696394,0.432323,0.829875,-0.202045,-0.115775,-0.007148,0.022485,pure-egoism
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
445495,96,1,1,0.5,0.5,0.224029,0.130989,0.270403,0.015385,0.334493,0.115363,0.153757,0.067936,90,154,244,"[5080.0, 677.0, 1103.0, 358.0, 76.0, 44.0]","[5498.0, 390.0, 1291.0, 227.0, 87.0, 27.0]",243.437781,39.414045,25.333005,12.028299,37.361304,43.631114,123.031318,166.662432,-90.100746,-21.676400,-111.777146,33.178856,126.060081,159.238937,253,84,151,235,27,5,60,82,0.852957,0.914362,0.884036,0.9,0.9,100,0.142894,0.177509,0.9,0.224928,0.110847,0.114082,0.080342,0.180736,0.047427,0.130562,0.030123,0.484790,0.798905,"[52.0, 0.0, 8.0, 0.0, 0.0, 0.0]","[70.0, 1.0, 11.0, 0.0, 1.0, 0.0]","[5028.0, 677.0, 1095.0, 358.0, 76.0, 44.0]","[5428.0, 389.0, 1280.0, 227.0, 86.0, 27.0]",60.0,83.0,143.0,60.0,82.0,142.0,1.000000,0.987952,0.450000,0.060976,0.255488,1.000000,0.987952,0.993976,0.862270,1.008633,0.864868,1.010027,0.119264,0.139949,0.252573,0.262939,pure-altruism
445496,97,1,1,0.5,0.5,0.280659,0.079780,0.230769,0.246520,0.057682,0.205091,0.002686,0.069706,144,127,271,"[5145.0, 677.0, 1118.0, 358.0, 77.0, 44.0]","[5554.0, 390.0, 1301.0, 227.0, 87.0, 27.0]",270.506336,16.628502,7.747157,17.971049,25.718206,145.929772,82.229857,228.159629,35.511441,-14.757287,20.754154,116.366624,67.624031,183.990655,295,152,126,278,2,2,81,66,0.854563,0.915107,0.885172,0.9,0.9,100,0.238645,0.180220,0.9,0.131386,0.036196,0.095190,0.075482,0.054996,0.135384,0.046344,0.104621,1.013401,0.647479,"[65.0, 0.0, 15.0, 0.0, 1.0, 0.0]","[56.0, 0.0, 10.0, 0.0, 0.0, 0.0]","[5080.0, 677.0, 1103.0, 358.0, 76.0, 44.0]","[5498.0, 390.0, 1291.0, 227.0, 87.0, 27.0]",81.0,66.0,147.0,81.0,66.0,147.0,1.000000,1.000000,0.024691,0.030303,0.027497,1.000000,1.000000,1.000000,0.864868,1.010027,0.868500,1.011600,0.245004,0.224171,0.164616,0.171286,pure-altruism
445497,98,1,1,0.5,0.5,0.264982,0.069377,0.043370,0.235531,0.039004,0.444485,0.004883,0.270280,144,129,273,"[5209.0, 677.0, 1136.0, 358.0, 78.0, 44.0]","[5604.0, 390.0, 1319.0, 227.0, 87.0, 27.0]",289.407675,29.419832,12.469731,28.998917,41.468648,152.564715,65.954480,218.519196,45.077692,-52.439817,-7.362125,135.481926,54.813770,190.295697,314,144,130,274,1,14,83,68,0.856172,0.915861,0.886316,0.9,0.9,100,0.139451,0.167179,0.9,0.241744,0.137582,0.104163,0.067901,0.034121,0.174205,0.028270,0.107532,1.059477,0.511275,"[64.0, 0.0, 18.0, 0.0, 1.0, 0.0]","[50.0, 0.0, 18.0, 0.0, 0.0, 0.0]","[5145.0, 677.0, 1118.0, 358.0, 77.0, 44.0]","[5554.0, 390.0, 1301.0, 227.0, 87.0, 27.0]",83.0,68.0,151.0,83.0,68.0,151.0,1.000000,1.000000,0.012048,0.205882,0.108965,1.000000,1.000000,1.000000,0.868500,1.011600,0.872354,1.013797,0.265879,0.244337,0.125795,0.145636,pure-altruism
445498,99,1,1,0.5,0.5,0.289304,0.277802,0.071502,0.259560,0.062931,0.464262,0.038454,0.179515,148,112,260,"[5264.0, 677.0, 1144.0, 358.0, 80.0, 44.0]","[5673.0, 390.0, 1344.0, 227.0, 87.0, 27.0]",278.129937,33.364228,3.807404,57.663041,61.470446,146.821456,36.473807,183.295263,26.285683,-273.977220,-247.691537,134.500818,14.918208,149.419026,294,148,109,257,0,89,65,94,0.857407,0.916882,0.887496,0.9,0.9,100,0.165531,0.283553,0.9,0.263596,0.108985,0.154611,0.115072,0.024477,0.284746,0.015037,0.215107,0.992037,0.325659,"[55.0, 0.0, 8.0, 0.0, 2.0, 0.0]","[69.0, 0.0, 25.0, 0.0, 0.0, 0.0]","[5209.0, 677.0, 1136.0, 358.0, 78.0, 44.0]","[5604.0, 390.0, 1319.0, 227.0, 87.0, 27.0]",65.0,94.0,159.0,65.0,94.0,159.0,1.000000,1.000000,0.000000,0.946809,0.473404,1.000000,1.000000,1.000000,0.872354,1.013797,0.874653,1.016779,0.275523,0.262865,0.015254,0.065707,pure-altruism


In [17]:
plt.rcParams['font.sans-serif'] = ['SimHei']
fig, ax = plt.subplots(figsize=(10, 6))
dt=data[(data['market_type']=='pure-egoism')&(data['PERP1']==0.5)&(data['PERP2']==0.5)]
#dt=dt[['step','OP1']].groupby(['step']).mean()
#dt=gb.get_group((0.9, 0.9))
x='ARPP1'
y='ARSP1'
sns.scatterplot(x=x, y=y, data=dt, ax=ax)
#sns.regplot(x=x, y=y, data=dt, order=1, scatter=False, color='red', ax=ax)
# 手动设置y轴的显示范围
#ax.set_ylim(0.1, 0.2)  # 根据你的数据和需要调整这些值
plt.xlabel(x)
plt.ylabel(y)
plt.title('Scatter Plot of Step vs Value with Non-linear Fit')
plt.show()

In [25]:
#两个市场
def calculate_vif(df, columns):
    vif_data = pd.DataFrame()
    vif_data["feature"] = columns
    vif_data["VIF"] = [variance_inflation_factor(df[columns].values, i) for i in range(len(columns))]
    return vif_data

def standardization(data):
    # 计算每一列的均值和标准差
    mean = data.mean()
    std = data.std()
    # 标准化数据：(数据 - 均值) / 标准差
    standardized_data = (data - mean) / std
    return standardized_data

columns=[ 'PERP1','OP1','PCIP1','ARPP1', 'ARSP1',
          'PERP2','OP2','PCIP2','ARPP2', 'ARSP2','RP1','RP2']

coef_matrixs=[]
for i,market_type in zip([0,1],['pure-egoism','hybrid']):
    # 初始化一个空的数据框来存储回归系数
    coef_matrix = pd.DataFrame(index=columns, columns=columns)
    dt=data[data['market_type']==market_type]
    df=dt[columns]
    df=standardization(df)
    # 计算每个变量与其他变量的回归系数
    for target in columns:
        c=columns.copy()
        c.remove(target)
        predictors = c
        if (target!='RP1') and (target!='RP2'):
            predictors.remove('RP1')
            predictors.remove('RP2')
        X = df[predictors]
        y = df[target]
        X = sm.add_constant(X)  # 添加截距项
        model = sm.OLS(y, X).fit()
        #print(calculate_vif(df, predictors))
        # 根据p值判断是否保留回归系数
        for predictor in predictors:
            if model.pvalues[predictor] > 0.05:
                coef_matrix.loc[target, predictor] = 0
            else:
                coef_matrix.loc[target, predictor] = model.params[predictor]
    for target in columns:
        c=columns.copy()
        c.remove(target)
        predictors = c
        X = df[predictors]
        y = df[target]
        X = sm.add_constant(X)  # 添加截距项
        model = sm.OLS(y, X).fit()
        #print(calculate_vif(df, predictors))
        # 根据p值判断是否保留回归系数
        for predictor in ['RP1','RP2']:
            try:
                if model.pvalues[predictor] > 0.05:
                    coef_matrix.loc[target, predictor] = 0
                else:
                    coef_matrix.loc[target, predictor] = model.params[predictor]
            except:
                pass
    # 填充对角线为 1，因为每个变量与自身的回归系数为 1
    np.fill_diagonal(coef_matrix.values, 1)
    coef_matrix = coef_matrix.astype(float)
    coef_matrix=coef_matrix.T
    coef_matrix=coef_matrix.drop(['PERP1', 'PERP2','RP1','RP2'], axis=1)
    coef_matrixs.append(coef_matrix)

fig, ax =plt.subplots(1,2, figsize=(13, 8))
fig.subplots_adjust(bottom=0.15, top=0.8,left=0.05,right=0.95,wspace=0.15)#子图间距
titles=['(a) spillover effects in pure-egoism market','(b) spillover effects in hybrid market','(c) spillover effects in pure-altruism market']
for i,coef_matrix in zip([0,1],coef_matrixs):
    if i ==1:
        axes=sns.heatmap(coef_matrix, annot=True,annot_kws={"fontsize":13}, vmin=-1,vmax=1,linewidths=0.5, linecolor='black',cbar_kws={'aspect':50,'fraction':0.017},cmap='bwr',ax=ax[i])#rainbow\bwr\RdBu
        cbar = axes.collections[0].colorbar
        cbar.ax.tick_params(labelsize=15)
    else:
        axes=sns.heatmap(coef_matrix, annot=True,annot_kws={"fontsize":13}, vmin=-1,vmax=1,linewidths=0.5, linecolor='black',cbar=False,cmap='bwr',ax=ax[i])
    axes.xaxis.tick_top() 
    axes.set_xticklabels(axes.get_xticklabels(),rotation = 0,fontsize=11)
    axes.yaxis.tick_left()
    axes.set_yticklabels(axes.get_yticklabels(),rotation = 0,fontsize=11)
    axes.set_title(titles[i],fontsize=20,y=-0.07)
    
    # 添加下边和右边的实线###设置坐标轴的粗细
    for spine in ['bottom', 'right']:#设置下边和右边坐标轴的粗细
        axes.spines[spine].set_visible(True)
        axes.spines[spine].set_color('black')
        axes.spines[spine].set_linewidth(0.7)

In [41]:
#两个市场--使用逐步回归确定纳入回归的变量
import statsmodels.api as sm

def calculate_vif(df, columns):
    # Initialize DataFrame to store VIF scores
    vif_data = pd.DataFrame()
    vif_data["feature"] = columns

    # Calculate VIF for each feature
    try:
        # Collects each column's VIF score
        vif_data["VIF"] = [variance_inflation_factor(df[columns].values, i) for i in range(len(columns))]
    except Exception as e:
        print(f"An error occurred: {e}")
        print(columns)
    return vif_data

def standardization(data):
    # 计算每一列的均值和标准差
    mean = data.mean()
    std = data.std()
    # 标准化数据：(数据 - 均值) / 标准差
    standardized_data = (data - mean) / std
    return standardized_data

def stepwise_regression(data, predictors, target):
    """
    执行逐步回归以确定重要的自变量，使用p值来决定是否加入变量。

    :param data: 数据框，包含所需的所有变量。
    :param predictors: 初始自变量的列表。
    :param target: 因变量的名称。
    :return: 返回一个包含选定自变量和它们回归系数的字典。
    """
    print(target)
    included = []
    while True:
        changed = False
        
        # 向前选择
        excluded = list(set(predictors) - set(included))
        new_pval = pd.Series(index=excluded)
        for new_column in excluded:
            model = sm.OLS(data[target], sm.add_constant(data[included + [new_column]])).fit()
            new_pval[new_column] = model.pvalues[new_column]
        best_pval = new_pval.min()
        if best_pval < 0.01:
            best_feature = new_pval.idxmin()
            included.append(best_feature)
            changed = True
        
        # 向后剔除
        model = sm.OLS(data[target], sm.add_constant(data[included])).fit()
        # 使用最大 p 值而不是任何变量的 p 值
        pvalues = model.pvalues.iloc[1:]
        worst_pval = pvalues.max()  # 仅考虑添加的变量
        if worst_pval > 0.01:
            changed = True
            worst_feature = pvalues.idxmax()
            included.remove(worst_feature)

        if not changed:
            break

    model = sm.OLS(data[target], sm.add_constant(data[included])).fit()
    #print(calculate_vif(data, included))
    print(included)
    return model.params

def stepwise_regression_with_f_test(data, predictors, target):
    """
    执行逐步回归以确定重要的自变量，使用F值的显著性来决定是否加入变量。

    :param data: 数据框，包含所需的所有变量。
    :param predictors: 自变量的列表。
    :param target: 因变量的名称。
    :return: 返回一个包含选定自变量和它们回归系数的字典。
    """
    included = []
    while True:
        changed = False
        
        # 向前选择
        excluded = list(set(predictors) - set(included))
        f_values = pd.Series(index=excluded)
        for new_column in excluded:
            X_with = data[included + [new_column]]
            X_with = sm.add_constant(X_with)
            model_with = sm.OLS(data[target], X_with).fit()
            
            if len(included) == 0:  # 如果模型中没有其他变量，则无法计算F值
                f_values[new_column] = model_with.f_pvalue
            else:    
                X_without = data[included]
                X_without = sm.add_constant(X_without)
                model_without = sm.OLS(data[target], X_without).fit()
                
                # 计算F-test的p值
                p_value = model_with.compare_f_test(model_without)[1]
                f_values[new_column] = p_value

        best_f_pvalue = f_values.min()
        if best_f_pvalue < 0.05:  # 如果F-test的p值显著，则包括这个变量
            best_feature = f_values.idxmin()
            included.append(best_feature)
            changed = True

        # 向后剔除
        if changed:
            X_with = data[included]
            X_with = sm.add_constant(X_with)
            model_with = sm.OLS(data[target], X_with).fit()

            f_values = pd.Series(index=included)
            for feature in included:
                temp_included = [f for f in included if f != feature]
                X_temp = data[temp_included]
                X_temp = sm.add_constant(X_temp)
                model_temp = sm.OLS(data[target], X_temp).fit()

                # 计算F-test的p值
                p_value = model_with.compare_f_test(model_temp)[1]
                f_values[feature] = p_value

            worst_f_pvalue = f_values.max()
            if worst_f_pvalue > 0.05:  # 如果F-test的p值较高，则移除这个变量
                worst_feature = f_values.idxmax()
                included.remove(worst_feature)
                changed = True

        if not changed:
            break

    final_model = sm.OLS(data[target], sm.add_constant(data[included])).fit()
    #print(calculate_vif(data, included))
    print(included)
    return final_model.params

def stepwise_regression_with_f_value1(data, predictors, target):#使用最终迭代终止的模型
    """
    执行逐步回归，以F统计量的变化来决定是否加入或剔除变量。

    :param data: 数据框，包含所需的所有变量。
    :param predictors: 初始自变量的列表。
    :param target: 因变量的名称。
    :return: 返回一个包含选定自变量和它们回归系数的字典。
    """
    included = []
    best_f = None
    
    while True:
        changed = False
        
        # 向前选择
        excluded = list(set(predictors) - set(included))
        f_values = pd.Series(index=excluded)
        for new_column in excluded:
            model_with = sm.OLS(data[target], sm.add_constant(data[included + [new_column]])).fit()
            f_values[new_column] = model_with.fvalue  # 获取加入变量后的F值
            print(included + [new_column],model_with.fvalue)
        
        # 选择使F值最大的变量
        if not f_values.empty:
            best_f = f_values.max()

            best_feature = f_values.idxmax()
            if best_f > (sm.OLS(data[target], sm.add_constant(data[included])).fit().fvalue if included else -float('inf')):
                included.append(best_feature)
                changed = True

        # 向后剔除
        if included:
            model_with = sm.OLS(data[target], sm.add_constant(data[included])).fit()
            f_values = pd.Series(index=included)
            for feature in included:
                reduced_included = [f for f in included if f != feature]
                model_without = sm.OLS(data[target], sm.add_constant(data[reduced_included])).fit()
                f_values[feature] = model_with.fvalue  # 获取剔除变量后的F值
            
            # 选择使F值最大的变量
            worst_f = f_values.min()
            if worst_f > model_with.fvalue:
                worst_feature = f_values.idxmin()
                included.remove(worst_feature)
                changed = True
        
        if not changed:
            break
    
    final_model = sm.OLS(data[target], sm.add_constant(data[included])).fit()
    print(calculate_vif(data, included))
    return final_model.params

def analyze_market_effects(data, columns):
    data=data[['market_type']+columns].dropna()
    coef_matrixs = []
    for i, market_type in zip([0,1], ['pure-egoism', 'hybrid']):
        dt = data[data['market_type'] == market_type]
        df = standardization(dt[columns])
        coef_matrix = pd.DataFrame(index=columns, columns=columns)
        
        for target in columns:
            predictors = [col for col in columns if col != target and col not in ['RP1', 'RP2']]#剔除评论的影响（以为他会遮蔽评论干预策略的作用）
            coefficients = stepwise_regression(df, predictors, target)
            for predictor, coef in coefficients.items():
                coef_matrix.loc[target, predictor] = coef
                
        #研究评论的影响
        for target in columns:
            predictors = [col for col in columns if col != target]
            coefficients = stepwise_regression(df, predictors, target)
            for predictor, coef in coefficients.items():
                if predictor in ['RP1','RP2']:
                    coef_matrix.loc[target, predictor] = coef
        
        coef_matrix=coef_matrix.drop(['const'], axis=1)
        np.fill_diagonal(coef_matrix.values, 1)  # 对角线设为 1
        coef_matrix = coef_matrix.astype(float)
        coef_matrix = coef_matrix.T.drop(['PERP1', 'PERP2', 'RP1', 'RP2'], axis=1)
        coef_matrixs.append(coef_matrix)
    
    return coef_matrixs

def plot_coef(coef_matrixs):
    fig, ax =plt.subplots(1,2, figsize=(13, 8))
    fig.subplots_adjust(bottom=0.15, top=0.8,left=0.05,right=0.95,wspace=0.15)#子图间距
    titles=['(a) spillover effects in pure-egoism market','(b) spillover effects in hybrid market','(c) spillover effects in pure-altruism market']
    for i,coef_matrix in zip([0,1],coef_matrixs):
        if i ==1:
            axes=sns.heatmap(coef_matrix, annot=True,annot_kws={"fontsize":13}, vmin=-1,vmax=1,linewidths=0.5, linecolor='black',cbar_kws={'aspect':50,'fraction':0.017},cmap='bwr',ax=ax[i])#rainbow\bwr\RdBu
            cbar = axes.collections[0].colorbar
            cbar.ax.tick_params(labelsize=15)
        else:
            axes=sns.heatmap(coef_matrix, annot=True,annot_kws={"fontsize":13}, vmin=-1,vmax=1,linewidths=0.5, linecolor='black',cbar=False,cmap='bwr',ax=ax[i])
        axes.xaxis.tick_top() 
        axes.set_xticklabels(axes.get_xticklabels(),rotation = 0,fontsize=11)
        axes.yaxis.tick_left()
        axes.set_yticklabels(axes.get_yticklabels(),rotation = 0,fontsize=11)
        axes.set_title(titles[i],fontsize=20,y=-0.07)
        
        # 添加下边和右边的实线###设置坐标轴的粗细
        for spine in ['bottom', 'right']:#设置下边和右边坐标轴的粗细
            axes.spines[spine].set_visible(True)
            axes.spines[spine].set_color('black')
            axes.spines[spine].set_linewidth(0.7)
            
            
columns = [ 'PERP1','OP1','PCIP1','ARPP1', 'ARSP1',
            'PERP2','OP2','PCIP2','ARPP2', 'ARSP2','RP1','RP2']
coef_matrixs = analyze_market_effects(data, columns)
plot_coef(coef_matrixs)


PERP1
['ARSP1', 'PCIP1', 'OP1', 'PCIP2', 'ARPP1', 'ARSP2', 'ARPP2', 'PERP2', 'OP2']
OP1
['PCIP2', 'ARSP1', 'PCIP1', 'ARPP1', 'PERP1', 'ARSP2', 'OP2', 'ARPP2', 'PERP2']
PCIP1
['PCIP2', 'ARSP1', 'ARPP2', 'PERP1', 'OP2', 'OP1', 'ARSP2', 'ARPP1', 'PERP2']
ARPP1
['PCIP2', 'ARSP1', 'PCIP1', 'ARPP2', 'OP1', 'ARSP2', 'PERP2', 'PERP1', 'OP2']
ARSP1
['PCIP1', 'PERP1', 'OP1', 'ARPP1', 'PCIP2', 'ARPP2', 'ARSP2', 'OP2', 'PERP2']
PERP2
['PCIP2', 'ARPP2', 'OP2', 'ARSP2', 'PCIP1', 'ARSP1', 'ARPP1', 'PERP1', 'OP1']
OP2
['PCIP2', 'PCIP1', 'PERP2', 'ARSP2', 'ARPP2', 'ARSP1', 'OP1', 'ARPP1', 'PERP1']
PCIP2
['PCIP1', 'ARPP2', 'OP2', 'OP1', 'PERP2', 'ARPP1', 'ARSP1', 'ARSP2', 'PERP1']
ARPP2
['PCIP2', 'PCIP1', 'ARPP1', 'ARSP1', 'ARSP2', 'OP2', 'PERP1', 'PERP2', 'OP1']
ARSP2
['PCIP2', 'ARPP2', 'ARSP1', 'PCIP1', 'OP2', 'PERP2', 'OP1', 'ARPP1', 'PERP1']
RP1
['ARSP1', 'PCIP1', 'PERP1', 'PERP2', 'OP2', 'OP1', 'PCIP2', 'ARPP1', 'ARSP2', 'ARPP2']
RP2
['PCIP2', 'OP2', 'PERP2', 'PERP1', 'OP1', 'PCIP1', 'ARSP1', 'ARPP

In [6]:
#三个市场--使用逐步回归确定纳入回归的变量
import statsmodels.api as sm


def calculate_vif(df, columns):
    # Initialize DataFrame to store VIF scores
    vif_data = pd.DataFrame()
    vif_data["feature"] = columns

    # Calculate VIF for each feature
    try:
        # Collects each column's VIF score
        vif_data["VIF"] = [variance_inflation_factor(df[columns].values, i) for i in range(len(columns))]
    except Exception as e:
        print(f"An error occurred: {e}")
        print(columns)
    return vif_data

def standardization(data):
    # 计算每一列的均值和标准差
    mean = data.mean()
    std = data.std()
    # 标准化数据：(数据 - 均值) / 标准差
    standardized_data = (data - mean) / std
    return standardized_data

def stepwise_regression(data, predictors, target):
    """
    执行逐步回归以确定重要的自变量，使用p值来决定是否加入变量。

    :param data: 数据框，包含所需的所有变量。
    :param predictors: 初始自变量的列表。
    :param target: 因变量的名称。
    :return: 返回一个包含选定自变量和它们回归系数的字典。
    """
    included = []
    while True:
        changed = False
        
        # 向前选择
        excluded = list(set(predictors) - set(included))
        new_pval = pd.Series(index=excluded)
        for new_column in excluded:
            model = sm.OLS(data[target], sm.add_constant(data[included + [new_column]])).fit()
            new_pval[new_column] = model.pvalues[new_column]
        best_pval = new_pval.min()
        if best_pval < 0.01:
            best_feature = new_pval.idxmin()
            included.append(best_feature)
            changed = True
        
        # 向后剔除
        model = sm.OLS(data[target], sm.add_constant(data[included])).fit()
        # 使用最大 p 值而不是任何变量的 p 值
        pvalues = model.pvalues.iloc[1:]
        worst_pval = pvalues.max()  # 仅考虑添加的变量
        if worst_pval > 0.01:
            changed = True
            worst_feature = pvalues.idxmax()
            included.remove(worst_feature)

        if not changed:
            break

    model = sm.OLS(data[target], sm.add_constant(data[included])).fit()
    #print(calculate_vif(data, included))
    print(included)
    return model.params

def stepwise_regression_with_f_test(data, predictors, target):
    """
    执行逐步回归以确定重要的自变量，使用F值的显著性来决定是否加入变量。

    :param data: 数据框，包含所需的所有变量。
    :param predictors: 自变量的列表。
    :param target: 因变量的名称。
    :return: 返回一个包含选定自变量和它们回归系数的字典。
    """
    included = []
    while True:
        changed = False
        
        # 向前选择
        excluded = list(set(predictors) - set(included))
        f_values = pd.Series(index=excluded)
        for new_column in excluded:
            X_with = data[included + [new_column]]
            X_with = sm.add_constant(X_with)
            model_with = sm.OLS(data[target], X_with).fit()
            
            if len(included) == 0:  # 如果模型中没有其他变量，则无法计算F值
                f_values[new_column] = model_with.f_pvalue
            else:    
                X_without = data[included]
                X_without = sm.add_constant(X_without)
                model_without = sm.OLS(data[target], X_without).fit()
                
                # 计算F-test的p值
                p_value = model_with.compare_f_test(model_without)[1]
                f_values[new_column] = p_value

        best_f_pvalue = f_values.min()
        if best_f_pvalue < 0.05:  # 如果F-test的p值显著，则包括这个变量
            best_feature = f_values.idxmin()
            included.append(best_feature)
            changed = True

        # 向后剔除
        if changed:
            X_with = data[included]
            X_with = sm.add_constant(X_with)
            model_with = sm.OLS(data[target], X_with).fit()

            f_values = pd.Series(index=included)
            for feature in included:
                temp_included = [f for f in included if f != feature]
                X_temp = data[temp_included]
                X_temp = sm.add_constant(X_temp)
                model_temp = sm.OLS(data[target], X_temp).fit()

                # 计算F-test的p值
                p_value = model_with.compare_f_test(model_temp)[1]
                f_values[feature] = p_value

            worst_f_pvalue = f_values.max()
            if worst_f_pvalue > 0.05:  # 如果F-test的p值较高，则移除这个变量
                worst_feature = f_values.idxmax()
                included.remove(worst_feature)
                changed = True

        if not changed:
            break

    final_model = sm.OLS(data[target], sm.add_constant(data[included])).fit()
    #print(calculate_vif(data, included))
    print(included)
    return final_model.params

def stepwise_regression_with_f_value1(data, predictors, target):#使用最终迭代终止的模型
    """
    执行逐步回归，以F统计量的变化来决定是否加入或剔除变量。

    :param data: 数据框，包含所需的所有变量。
    :param predictors: 初始自变量的列表。
    :param target: 因变量的名称。
    :return: 返回一个包含选定自变量和它们回归系数的字典。
    """
    included = []
    best_f = None
    
    while True:
        changed = False
        
        # 向前选择
        excluded = list(set(predictors) - set(included))
        f_values = pd.Series(index=excluded)
        for new_column in excluded:
            model_with = sm.OLS(data[target], sm.add_constant(data[included + [new_column]])).fit()
            f_values[new_column] = model_with.fvalue  # 获取加入变量后的F值
            print(included + [new_column],model_with.fvalue)
        
        # 选择使F值最大的变量
        if not f_values.empty:
            best_f = f_values.max()

            best_feature = f_values.idxmax()
            if best_f > (sm.OLS(data[target], sm.add_constant(data[included])).fit().fvalue if included else -float('inf')):
                included.append(best_feature)
                changed = True

        # 向后剔除
        if included:
            model_with = sm.OLS(data[target], sm.add_constant(data[included])).fit()
            f_values = pd.Series(index=included)
            for feature in included:
                reduced_included = [f for f in included if f != feature]
                model_without = sm.OLS(data[target], sm.add_constant(data[reduced_included])).fit()
                f_values[feature] = model_with.fvalue  # 获取剔除变量后的F值
            
            # 选择使F值最大的变量
            worst_f = f_values.min()
            if worst_f > model_with.fvalue:
                worst_feature = f_values.idxmin()
                included.remove(worst_feature)
                changed = True
        
        if not changed:
            break
    
    final_model = sm.OLS(data[target], sm.add_constant(data[included])).fit()
    print(calculate_vif(data, included))
    return final_model.params

def analyze_market_effects(data, columns):
    coef_matrixs = []
    for i, market_type in zip([0,1,2], ['pure-egoism', 'hybrid','pure-altruism']):
        dt = data[data['market_type'] == market_type]
        df = standardization(dt[columns])
        
        coef_matrix = pd.DataFrame(index=columns, columns=columns)
        
        for target in columns:
            predictors = [col for col in columns if col != target and col not in ['RP1', 'RP2']]#剔除评论的影响（以为他会遮蔽评论干预策略的作用）
            coefficients = stepwise_regression(df, predictors, target)
            for predictor, coef in coefficients.items():
                coef_matrix.loc[target, predictor] = coef
                
        #研究评论的影响
        for target in columns:
            predictors = [col for col in columns if col != target]
            coefficients = stepwise_regression(df, predictors, target)
            for predictor, coef in coefficients.items():
                if predictor in ['RP1','RP2']:
                    coef_matrix.loc[target, predictor] = coef
        
        coef_matrix=coef_matrix.drop(['const'], axis=1)
        np.fill_diagonal(coef_matrix.values, 1)  # 对角线设为 1
        coef_matrix = coef_matrix.astype(float)
        coef_matrix = coef_matrix.T.drop(['PERP1', 'PERP2', 'RP1', 'RP2'], axis=1)
        coef_matrixs.append(coef_matrix)
    
    return coef_matrixs

def plot_coef(coef_matrixs):
    fig, ax =plt.subplots(1,3, figsize=(20, 8))
    fig.subplots_adjust(bottom=0.15, top=0.8,left=0.05,right=0.95,wspace=0.15)#子图间距
    titles=['(a) spillover effects in pure-egoism market','(b) spillover effects in hybrid market','(c) spillover effects in pure-altruism market']
    for i,coef_matrix in zip([0,1,2],coef_matrixs):
        if i ==2:
            axes=sns.heatmap(coef_matrix, annot=True,annot_kws={"fontsize":13}, vmin=-1,vmax=1,linewidths=0.5, linecolor='black',cbar_kws={'aspect':50,'fraction':0.017},cmap='bwr',ax=ax[i])#rainbow\bwr\RdBu
            cbar = axes.collections[0].colorbar
            cbar.ax.tick_params(labelsize=15)
        else:
            axes=sns.heatmap(coef_matrix, annot=True,annot_kws={"fontsize":13}, vmin=-1,vmax=1,linewidths=0.5, linecolor='black',cbar=False,cmap='bwr',ax=ax[i])
        axes.xaxis.tick_top() 
        axes.set_xticklabels(axes.get_xticklabels(),rotation = 0,fontsize=11)
        axes.yaxis.tick_left()
        axes.set_yticklabels(axes.get_yticklabels(),rotation = 0,fontsize=11)
        axes.set_title(titles[i],fontsize=20,y=-0.07)
        
        # 添加下边和右边的实线###设置坐标轴的粗细
        for spine in ['bottom', 'right']:#设置下边和右边坐标轴的粗细
            axes.spines[spine].set_visible(True)
            axes.spines[spine].set_color('black')
            axes.spines[spine].set_linewidth(0.7)
            
            
columns = [ 'PERP1','OP1','PCIP1','ARPP1', 'ARSP1',
            'PERP2','OP2','PCIP2','ARPP2', 'ARSP2','RP1','RP2']
coef_matrixs = analyze_market_effects(data, columns)
plot_coef(coef_matrixs)


['PCIP1', 'OP1', 'ARSP1', 'PCIP2']
['ARSP1', 'ARPP1', 'ARSP2', 'PERP1', 'PCIP1', 'PCIP2', 'OP2', 'ARPP2']
['ARPP1', 'ARSP1', 'PCIP2', 'PERP1', 'ARPP2', 'ARSP2', 'OP2', 'OP1', 'PERP2']
['ARSP1', 'PCIP2', 'PCIP1', 'OP1', 'ARPP2', 'OP2', 'PERP2']
['ARPP1', 'OP1', 'ARSP2', 'PCIP2', 'PCIP1', 'OP2', 'PERP1']
['PCIP2', 'OP2', 'ARSP2', 'PCIP1']
['ARSP2', 'ARPP2', 'ARSP1', 'PERP2', 'PCIP2', 'PCIP1', 'OP1', 'ARPP1']
['ARPP2', 'ARSP2', 'PCIP1', 'PERP2', 'ARPP1', 'ARSP1', 'OP1', 'OP2', 'PERP1']
['ARPP1', 'ARSP2', 'PCIP2', 'PCIP1', 'OP2', 'OP1', 'PERP1']
['ARPP2', 'OP2', 'ARSP1', 'PCIP1', 'PCIP2', 'OP1', 'PERP2']
['PERP1', 'ARSP1', 'PERP2', 'OP1', 'PCIP1', 'ARPP2', 'ARSP2', 'OP2']
['PERP2', 'ARSP2', 'PERP1', 'OP2', 'PCIP2', 'ARPP1', 'ARSP1', 'OP1']
['RP1', 'PCIP1', 'RP2', 'OP1', 'ARSP1', 'PERP2', 'PCIP2']
['ARSP1', 'ARPP1', 'ARSP2', 'PERP1', 'PCIP1', 'PCIP2', 'OP2', 'ARPP2', 'RP1']
['ARPP1', 'ARSP1', 'PCIP2', 'PERP1', 'ARPP2', 'ARSP2', 'OP2', 'OP1', 'RP1', 'PERP2']
['ARSP1', 'PCIP2', 'ARPP2', 'PCIP

In [8]:
calculate_vif(data[data['market_type'] == 'pure-egoism'], [ 'PERP1','OP1','PCIP1','ARPP1','ARSP1',
            'PERP2','OP2','PCIP2','ARPP2','ARSP2'])

Unnamed: 0,feature,VIF
0,PERP1,3.143513
1,OP1,3.777587
2,PCIP1,2.202588
3,ARPP1,18.628809
4,ARSP1,7.807643
5,PERP2,3.143513
6,OP2,3.777587
7,PCIP2,2.202588
8,ARPP2,18.628809
9,ARSP2,7.807643


In [20]:
data[data['market_type'] == 'hybrid'].describe()

Unnamed: 0,step,提供方1类型,提供方2类型,提供方1利他程度,提供方2利他程度,OP1,OP2,PCIP1,PCIP2,ARPP1,ARPP2,ARSP1,ARSP2,服务人数1,服务人数2,总服务人数,社会总福利,平台收益,提供方收益1,提供方收益2,提供方总收益,消费者收益1,消费者收益2,消费者总收益,实际到达人数,办理入住数1,办理入住数2,办理入住总数,被激励好评数量1,被激励好评数量2,获得激励数量1,获得激励数量2,好评占比1,好评占比2,总好评数量占比,PERP1,PERP2,PCIP,OP,PERP,ARPP,ARSP,平均毛利率,平均净利率,毛利率1,毛利率2,净利率1,净利率2,消费者平均收益1,消费者平均收益2,总评论数量1,总评论数量2,总评论数量,好评数量1,好评数量2,好评总数,当期好评占比1,当期好评占比2,被激励好评占好评比例1,被激励好评占好评比例2,被激励好评占好评比例,获得激励评论数量占评论比例1,获得激励评论数量占评论比例2,获得激励评论数量占评论比例,RP1,RP2,val1,expect1,val2,expect2
count,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88653.0,87574.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0,88649.0,87563.0,84575.0,87559.0,83522.0,88649.0,87563.0,87503.0,88709.0,88709.0,88709.0,88709.0,88709.0,88709.0
mean,50.855911,0.0,1.0,0.0,0.5,0.158192,0.0,0.067251,0.0,0.550561,0.444652,0.255724,0.222114,124.472139,119.863768,244.335907,261.182494,54.73074,77.155727,35.836996,112.992723,25.841784,67.617248,93.459032,281.100801,124.463189,119.851345,244.314534,56.9659,0.0,79.612046,58.874714,0.782403,0.946418,0.843234,0.500203,0.499847,0.033626,0.079096,0.500025,0.497606,0.238919,0.258688,0.184047,0.294837,0.222538,0.212253,0.155841,0.213898,0.562357,103.25755,62.220801,165.478351,79.612046,58.874714,138.48676,0.803315,0.946066,0.642518,0.0,0.321018,0.803315,0.946066,0.875272,0.685072,1.062375,0.005163,0.034268,0.077462,0.080595
std,28.547446,0.0,0.0,0.0,0.0,0.084701,0.0,0.063989,0.0,0.217463,0.264447,0.20114,0.182806,29.510628,51.306018,41.405297,44.350966,19.467009,24.86975,28.064629,38.079525,19.920524,46.430332,51.006213,16.773136,32.768523,57.987951,46.478938,52.587292,0.0,43.53338,25.775455,0.157417,0.005227,0.100392,0.326472,0.326602,0.031995,0.042351,0.230897,0.16182,0.132451,0.098224,0.086566,0.069212,0.181109,0.064116,0.154844,0.164525,0.295737,33.260631,27.170495,31.052563,43.53338,25.775455,46.048476,0.337793,0.042214,0.410028,0.0,0.205038,0.337793,0.042214,0.169705,0.341311,0.011355,0.069212,0.062445,0.181109,0.188441
min,2.0,0.0,1.0,0.0,0.5,0.0,0.0,0.0,0.0,0.010743,0.000244,0.0,0.0,0.0,0.0,128.0,-450.985312,2.342767,-1.026672,-0.974178,2.480669,-637.737722,-3.619814,-635.972311,208.0,0.0,0.0,124.0,0.0,0.0,0.0,0.0,0.255864,0.899083,0.468336,0.1,0.1,0.0,0.0,0.1,0.025972,0.000397,-0.007782,-0.029219,-0.010071,-0.042727,-0.059131,-0.054505,-4.555269,-0.288381,0.0,0.0,63.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.239474,-0.462222,0.889744,-0.684496,-0.716784,-0.696399,-0.721699
25%,26.0,0.0,1.0,0.0,0.5,0.089011,0.0,0.018681,0.0,0.362754,0.235244,0.072209,0.068791,110.0,109.0,244.0,248.286414,40.073704,62.495644,10.085845,86.948489,13.09799,21.828923,49.135507,270.0,108.0,109.0,248.0,5.0,0.0,56.0,53.0,0.620237,0.942722,0.742665,0.1,0.1,0.009341,0.044505,0.3,0.379509,0.134896,0.186443,0.123012,0.271745,0.087041,0.187328,0.040289,0.113777,0.301971,81.0,56.0,148.0,56.0,53.0,111.0,0.813187,0.927536,0.175439,0.0,0.0875,0.813187,0.927536,0.874763,0.354367,1.056023,-0.012458,0.013518,0.010615,0.011043
50%,51.0,0.0,1.0,0.0,0.5,0.165055,0.0,0.045568,0.0,0.51938,0.408167,0.225233,0.172069,123.0,144.0,260.0,273.623685,53.306235,78.122077,30.218924,110.731254,23.62671,67.302711,91.658101,281.0,122.0,149.0,264.0,43.0,0.0,84.0,68.0,0.793404,0.946649,0.852507,0.5,0.5,0.022784,0.082527,0.5,0.494751,0.232985,0.242874,0.166529,0.29555,0.187695,0.213062,0.119981,0.194375,0.573348,103.0,73.0,166.0,84.0,68.0,147.0,1.0,0.95,0.914894,0.0,0.457251,1.0,0.95,0.959459,0.675795,1.06225,0.00445,0.030289,0.112305,0.116856
75%,76.0,0.0,1.0,0.0,0.5,0.232015,0.0,0.0937,0.0,0.722822,0.643899,0.397424,0.342855,149.0,154.0,271.0,289.973999,67.635708,95.338741,59.520472,138.64149,35.215805,110.056935,135.000798,292.0,152.0,158.0,273.0,104.0,0.0,110.0,76.0,0.949059,0.950147,0.947958,0.9,0.9,0.04685,0.116007,0.7,0.608069,0.326253,0.297717,0.219165,0.312458,0.289385,0.235161,0.212806,0.288001,0.791669,126.0,80.0,189.0,110.0,76.0,172.0,1.0,0.972222,1.0,0.0,0.5,1.0,0.972222,0.977612,1.051578,1.068475,0.028255,0.057311,0.212959,0.221604
max,100.0,0.0,1.0,0.0,0.5,0.3,0.0,0.3,0.0,1.0,0.999939,0.861503,0.871269,188.0,186.0,310.0,839.474195,127.861152,133.522473,124.377509,230.722324,626.654376,168.582179,745.517896,351.0,188.0,190.0,315.0,180.0,0.0,180.0,107.0,1.0,0.982609,0.987578,0.9,0.9,0.15,0.15,0.9,0.998199,0.811542,0.664042,0.541029,0.984496,0.996399,0.83532,0.846802,6.52765,1.884228,183.0,114.0,248.0,180.0,107.0,244.0,1.0,1.0,1.0,0.0,0.5,1.0,1.0,1.0,1.231395,1.231481,0.310071,0.346659,0.342727,0.360693


## 提供方决策对个体收益的影响

In [5]:
data.columns

Index(['step', '提供方1类型', '提供方2类型', '提供方1利他程度', '提供方2利他程度', 'OP1', 'OP2', 'PCIP1', 'PCIP2', 'ARPP1', 'ARPP2', 'ARSP1', 'ARSP2', '服务人数1', '服务人数2', '总服务人数', '评论1', '评论2', '社会总福利', '平台收益', '提供方收益1', '提供方收益2', '提供方总收益', '消费者收益1', '消费者收益2', '消费者总收益', '实际到达人数', '办理入住数1', '办理入住数2', '办理入住总数', '被激励好评数量1', '被激励好评数量2', '获得激励数量1', '获得激励数量2', '好评占比1', '好评占比2', '总好评数量占比', 'PERP1', 'PERP2', 'PCIP', 'OP', 'PERP', 'ARPP', 'ARSP', '平均毛利率', '平均净利率', '毛利率1', '毛利率2', '净利率1', '净利率2', '消费者平均收益1', '消费者平均收益2', '评论1_当期', '评论2_当期', '评论1_lag1', '评论2_lag1', '总评论数量1', '总评论数量2', '总评论数量', '好评数量1', '好评数量2', '好评总数', '当期好评占比1', '当期好评占比2', '被激励好评占好评比例1', '被激励好评占好评比例2', '被激励好评占好评比例', '获得激励评论数量占评论比例1', '获得激励评论数量占评论比例2', '获得激励评论数量占评论比例', 'RP1', 'RP2', 'val1', 'expect1', 'val2', 'expect2', 'market_type'], dtype='object')

In [23]:
#'pure-egoism','hybrid','pure-altruism'
Y=['提供方收益1','提供方收益2','消费者平均收益1','消费者平均收益2']
#pd.set_option('precision',4)#设置显示精度
pd.set_option('display.float_format',lambda x : '%.4f' % x)#不显示科学计数法
results=pd.DataFrame([],columns=['params','std err','t','p-values'])
i=0
for mt in ['pure-egoism']:
    dt=data[data['market_type']==mt]
    dt['PCIP12']=dt['PCIP1']*dt['PCIP1']
    for y in Y:
        #correlation, p_value = stats.pearsonr(dt[x],dt[y])#相关性分析
        fomula='~OP1+PCIP1+PERP1+服务人数1+PCIP12'
        model=ols(y+fomula,data=dt).fit()
        result = pd.DataFrame({"params": model.params,   # 回归系数
                        "std err": model.bse,     # 回归系数标准差
                        "t": round(model.tvalues,4),       # 回归系数T值
                        "p-values": round(model.pvalues,4) # 回归系数P值
                         })
        print('y:',y,'调整的R方',model.rsquared_adj,"AIC",model.aic,'BIC',model.bic)
        print(result)
        print('\n'*3)
        results=pd.concat([results,result])
        i+=1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dt['PCIP12']=dt['PCIP1']*dt['PCIP1']


y: 提供方收益1 调整的R方 0.8188555713632646 AIC 14652770.572312858 BIC 14652844.93179418
             params  std err         t  p-values
Intercept   11.6287   0.0434  267.7267    0.0000
OP1         21.2896   0.1275  166.9773    0.0000
PCIP1     -111.1264   0.5160 -215.3423    0.0000
PERP1        1.5621   0.0349   44.7560    0.0000
服务人数1        0.5433   0.0002 2645.9742    0.0000
PCIP12      61.8083   2.0924   29.5401    0.0000




y: 提供方收益2 调整的R方 0.454023133533873 AIC 16618818.997223023 BIC 16618893.356704345
             params  std err          t  p-values
Intercept  118.3204   0.0754  1569.0765    0.0000
OP1        -22.3144   0.2214  -100.8093    0.0000
PCIP1       93.8729   0.8959   104.7798    0.0000
PERP1        4.5035   0.0606    74.3211    0.0000
服务人数1       -0.4107   0.0004 -1152.2196    0.0000
PCIP12    -158.4019   3.6325   -43.6065    0.0000




y: 消费者平均收益1 调整的R方 0.0022844046843208954 AIC 4724692.53212223 BIC 4724766.834687991
           params  std err        t  p-values
Intercept 

In [10]:
dt.describe()

Unnamed: 0,step,提供方1类型,提供方2类型,提供方1利他程度,提供方2利他程度,OP1,OP2,PCIP1,PCIP2,ARPP1,ARPP2,ARSP1,ARSP2,服务人数1,服务人数2,总服务人数,社会总福利,平台收益,提供方收益1,提供方收益2,提供方总收益,消费者收益1,消费者收益2,消费者总收益,实际到达人数,办理入住数1,办理入住数2,办理入住总数,被激励好评数量1,被激励好评数量2,获得激励数量1,获得激励数量2,好评占比1,好评占比2,总好评数量占比,PERP1,PERP2,PCIP,OP,PERP,ARPP,ARSP,平均毛利率,平均净利率,毛利率1,毛利率2,净利率1,净利率2,消费者平均收益1,消费者平均收益2,总评论数量1,总评论数量2,总评论数量,好评数量1,好评数量2,好评总数,当期好评占比1,当期好评占比2,被激励好评占好评比例1,被激励好评占好评比例2,被激励好评占好评比例,获得激励评论数量占评论比例1,获得激励评论数量占评论比例2,获得激励评论数量占评论比例,RP1,RP2,val1,expect1,val2,expect2
count,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1765176.0,1765176.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1764882.0,1764882.0,1677303.0,1677303.0,1579418.0,1764882.0,1764882.0,1747764.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0,1782000.0
mean,51.0,0.0,0.0,0.0,0.0,0.1532,0.1532,0.0797,0.0797,0.5736,0.5736,0.2496,0.2496,120.9487,120.9487,241.8973,262.5257,64.355,73.2057,73.2057,146.4113,25.8797,25.8797,51.7594,281.249,120.934,120.934,241.8681,60.4639,60.4639,79.7129,79.7129,0.8057,0.8057,0.803,0.5,0.5,0.0797,0.1532,0.5,0.5736,0.2496,0.324,0.238,0.324,0.324,0.238,0.238,0.219,0.219,99.9442,99.9442,199.8884,79.7129,79.7129,159.4257,0.8363,0.8363,0.6919,0.6919,0.69,0.8363,0.8363,0.837,0.74,0.74,-0.024,0.0096,-0.024,0.0096
std,28.5774,0.0,0.0,0.0,0.0,0.0875,0.0875,0.0688,0.0688,0.2155,0.2155,0.1872,0.1872,54.5321,54.5321,44.56,51.1264,21.5203,34.6958,34.6958,29.0631,23.3489,23.3489,31.3492,16.7924,61.1891,61.1891,49.512,58.0781,58.0781,52.7762,52.7762,0.1444,0.1444,0.1059,0.3266,0.3266,0.0467,0.0625,0.2309,0.1469,0.1338,0.0805,0.0737,0.1162,0.1162,0.104,0.104,0.9236,0.9236,49.7506,49.7506,47.2364,52.7762,52.7762,64.9163,0.3204,0.3204,0.3959,0.3959,0.2838,0.3204,0.3204,0.222,0.3129,0.3129,0.1162,0.1055,0.1162,0.1055
min,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015,0.015,0.0,0.0,0.0,0.0,13.0,-3395.8706,3.3571,-6.0922,-6.0922,10.7142,-3580.5558,-3580.5558,-3552.8402,206.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2383,0.2383,0.3296,0.1,0.1,0.0,0.0006,0.1,0.1284,0.0003,0.0677,-0.0138,-0.0119,-0.0119,-0.0489,-0.0489,-1104.2858,-1104.2858,0.0,0.0,13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.6735,-0.6735,-0.6994,-0.7732,-0.6994,-0.7732
25%,26.0,0.0,0.0,0.0,0.0,0.0774,0.0774,0.0245,0.0245,0.3927,0.3927,0.0886,0.0886,101.0,101.0,225.0,233.7447,47.8262,53.4878,53.4878,123.561,9.0489,9.0489,30.2201,270.0,100.0,100.0,220.0,6.0,6.0,23.0,23.0,0.6651,0.6651,0.7268,0.1,0.1,0.0431,0.1088,0.3,0.4648,0.1464,0.2797,0.1943,0.2783,0.2783,0.1912,0.1912,0.1216,0.1216,73.0,73.0,164.0,23.0,23.0,116.0,0.8681,0.8681,0.2593,0.2593,0.5,0.8681,0.8681,0.6462,0.4516,0.4516,-0.0213,0.0086,-0.0213,0.0086
50%,51.0,0.0,0.0,0.0,0.0,0.1511,0.1511,0.0607,0.0607,0.5621,0.5621,0.2331,0.2331,134.0,134.0,259.0,274.0996,63.2803,77.892,77.892,148.095,22.14,22.14,47.666,281.0,137.0,137.0,263.0,41.0,41.0,85.0,85.0,0.823,0.823,0.8038,0.5,0.5,0.0728,0.1532,0.5,0.579,0.2423,0.3021,0.2175,0.2999,0.2999,0.2191,0.2191,0.2029,0.2029,104.0,104.0,204.0,85.0,85.0,162.0,1.0,1.0,0.9535,0.9535,0.6862,1.0,1.0,0.9568,0.75,0.75,0.0001,0.0261,0.0001,0.0261
75%,76.0,0.0,0.0,0.0,0.0,0.2318,0.2318,0.1172,0.1172,0.7345,0.7345,0.3776,0.3776,154.0,154.0,272.0,297.1245,80.1022,97.5108,97.5108,168.2227,36.5048,36.5048,68.8027,293.0,158.0,158.0,275.0,110.0,110.0,117.0,117.0,0.9516,0.9516,0.8888,0.9,0.9,0.1114,0.197,0.7,0.6751,0.3409,0.3332,0.2492,0.3213,0.3213,0.2408,0.2408,0.2973,0.2973,137.0,137.0,238.0,117.0,117.0,209.0,1.0,1.0,1.0,1.0,0.9767,1.0,1.0,1.0,1.0593,1.0593,0.0217,0.0525,0.0217,0.0525
max,100.0,0.0,0.0,0.0,0.0,0.3,0.3,0.3,0.3,1.0,1.0,0.8652,0.8652,254.0,254.0,352.0,2049.3978,160.8051,188.7849,188.7849,258.7981,1815.6977,1815.6977,1841.9121,370.0,262.0,262.0,354.0,247.0,247.0,247.0,247.0,1.0,1.0,0.9984,0.9,0.9,0.2915,0.2996,0.9,0.9994,0.7742,0.8701,0.7309,0.9994,0.9994,0.8494,0.8494,16.9691,16.9691,247.0,247.0,344.0,247.0,247.0,338.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.2384,1.2384,0.3119,0.4512,0.3119,0.4512


In [45]:
#'pure-egoism','hybrid','pure-altruism'
Y=['提供方收益1','提供方收益2','消费者平均收益1','消费者平均收益2']
#pd.set_option('precision',4)#设置显示精度
pd.set_option('display.float_format',lambda x : '%.4f' % x)#不显示科学计数法
results=pd.DataFrame([],columns=['params','std err','t','p-values'])
i=0
for mt in ['hybrid']:
    dt=data[data['market_type']==mt]
    for y in Y:
        #correlation, p_value = stats.pearsonr(dt[x],dt[y])#相关性分析
        fomula='~OP2+PCIP2+PERP2+服务人数2'
        model=ols(y+fomula,data=dt).fit()
        result = pd.DataFrame({"params": model.params,   # 回归系数
                        "std err": model.bse,     # 回归系数标准差
                        "t": round(model.tvalues,4),       # 回归系数T值
                        "p-values": round(model.pvalues,4) # 回归系数P值
                         })
        print('y:',y,'调整的R方',model.rsquared_adj,"AIC",model.aic,'BIC',model.bic)
        print(result)
        print('\n'*3)
        results=pd.concat([results,result])
        i+=1

y: 提供方收益1 调整的R方 0.37187483064498117 AIC 780656.8974872949 BIC 780685.0768371811
            params  std err         t  p-values
Intercept 112.7921   0.1966  573.7062    0.0000
OP2         0.0000   0.0000  573.7911    0.0000
PCIP2      -0.0000   0.0000 -315.8326    0.0000
PERP2      -0.4091   0.2026   -2.0190    0.0435
服务人数2      -0.2956   0.0013 -229.1706    0.0000




y: 提供方收益2 调整的R方 0.11073346167810294 AIC 832939.5999630077 BIC 832967.779312894
           params  std err        t  p-values
Intercept 13.7092   0.2640  51.9329    0.0000
OP2        0.0000   0.0000  51.9356    0.0000
PCIP2     -0.0000   0.0000 -27.1082    0.0000
PERP2      0.6224   0.2721   2.2877    0.0222
服务人数2      0.1820   0.0017 105.0926    0.0000




y: 消费者平均收益1 调整的R方 0.03183752721349564 AIC -71261.75658437237 BIC -71233.57912891674
           params  std err        t  p-values
Intercept  0.1439   0.0016  89.1152    0.0000
OP2        0.0000   0.0000  89.0388    0.0000
PCIP2     -0.0000   0.0000 -47.8171    0.0000
P

In [46]:
calculate_vif(dt, [ 'PERP1','OP1','PCIP1','ARPP1','ARSP1',
            'PERP2','OP2','PCIP2','ARPP2','ARSP2','服务人数1','服务人数2'])

  return 1 - self.ssr/self.uncentered_tss
  return 1 - self.ssr/self.uncentered_tss


Unnamed: 0,feature,VIF
0,PERP1,3.5936
1,OP1,4.782
2,PCIP1,2.4144
3,ARPP1,63.1381
4,ARSP1,24.2311
5,PERP2,3.2941
6,OP2,
7,PCIP2,
8,ARPP2,28.5756
9,ARSP2,12.1936


In [48]:
calculate_vif(dt, ['PERP1','OP1','PCIP1','ARPP1','ARSP1','服务人数1'])

  return 1 - self.ssr/self.uncentered_tss
  return 1 - self.ssr/self.uncentered_tss


Unnamed: 0,feature,VIF
0,PERP2,3.1837
1,OP2,
2,PCIP2,
3,ARPP2,6.631
4,ARSP2,6.2851
5,服务人数2,3.1257
