In [1]:
import numpy as np
import pandas as pd
import pymc as pm
import pytensor.tensor as pt
import arviz as az
from scipy.special import softmax
import warnings
warnings.filterwarnings('ignore')

ArviZ is undergoing a major refactor to improve flexibility and extensibility while maintaining a user-friendly interface.
Some upcoming changes may be backward incompatible.
For details and migration guidance, visit: https://python.arviz.org/en/latest/user_guide/migration_guide.html
  warn(


In [2]:
def compute_significance(trace, var_names=None):
    """
    计算贝叶斯显著性指标（双侧p值形式）
    
    Parameters
    ----------
    trace : arviz.InferenceData
        MCMC采样结果
    var_names : list
        要分析的变量名列表
    
    Returns
    -------
    pd.DataFrame : 包含各种显著性指标的表格
    """
    
    if var_names is None:
        var_names = ['alpha_init', 'beta_init', 'gamma_trans', 
                     'emit_base_t1', 'emit_base_t2', 'emit_diff_t1', 'emit_diff_t2']
    
    results = []
    
    for var_name in var_names:
        if var_name not in trace.posterior:
            continue
            
        # 获取后验样本
        samples = trace.posterior[var_name].values
        # 合并chains和draws: (chains, draws, ...) -> (n_samples, ...)
        samples = samples.reshape(-1, *samples.shape[2:])
        
        # 处理不同维度的参数
        if samples.ndim == 1:
            # 标量参数
            results.append(analyze_single_param(var_name, samples))
        elif samples.ndim == 2:
            # 1D参数
            for i in range(samples.shape[1]):
                results.append(analyze_single_param(f"{var_name}[{i}]", samples[:, i]))
        elif samples.ndim == 3:
            # 2D参数
            for i in range(samples.shape[1]):
                for j in range(samples.shape[2]):
                    results.append(analyze_single_param(f"{var_name}[{i},{j}]", samples[:, i, j]))
    
    df = pd.DataFrame(results)
    return df


def analyze_single_param(name, samples):
    """
    分析单个参数的显著性（双侧p值形式）
    
    双侧p值计算方法：
    p_value = 2 * min(P(β > 0), P(β < 0))
    
    显著性判断标准：
    *** : p < 0.01
    **  : p < 0.05
    *   : p < 0.10
    """
    mean = np.mean(samples)
    std = np.std(samples)
    
    # 95% 可信区间
    ci_lower = np.percentile(samples, 2.5)
    ci_upper = np.percentile(samples, 97.5)
    
    # 后验概率
    prob_positive = np.mean(samples > 0)
    prob_negative = np.mean(samples < 0)
    
    # 双侧p值: 2 * min(P(β>0), P(β<0))
    p_value = 2 * min(prob_positive, prob_negative)
    
    # 基于p值的显著性标记
    if p_value < 0.01:
        significance = "***"
    elif p_value < 0.05:
        significance = "**"
    elif p_value < 0.10:
        significance = "*"
    else:
        significance = ""
    
    return {
        'parameter': name,
        'mean': mean,
        'std': std,
        'ci_2.5%': ci_lower,
        'ci_97.5%': ci_upper,
        'p_value': p_value,
        'significance': significance,
    }



# 模型结果分析

In [11]:
# 读取结果
trace = az.from_netcdf('maas_hmm_results/trace_state_2.nc')

var_names = ['alpha_init', 'beta_init', 'gamma_trans', 'ASC_t1','beta_firstcar_t1','beta_firsttaxi_t1','beta_firstpt_t1','beta_distance5_t1','beta_triptime_t1','beta_normal_t1',
 'ASC_t2','beta_taxi12_t2','beta_priceratio_t2','beta_price_t2','beta_weekbus_t2','beta_ebike_t2','beta_occupy_t2','beta_sex_t2','beta_income1_t2','beta_age4_t2',
 'beta_traveldistancework_t2','beta_weekmetro_t2','beta_c7_t2','beta_traveldistanceweekend_t2','beta_weektaxi_t2','beta_age3_t2','beta_income2_t2','beta_c6_t2',
 'beta_cost_t2','beta_license_t2','beta_havecar_t2','beta_education_t2']
sig_df = compute_significance(trace, var_names=var_names)

In [12]:
sig_df

Unnamed: 0,parameter,mean,std,ci_2.5%,ci_97.5%,p_value,significance
0,alpha_init[0],2.456431,4.267277,-5.265458,5.399211,0.500000,
1,"beta_init[0,0]",0.027663,0.086643,-0.147444,0.190112,0.732375,
2,"beta_init[1,0]",-0.494857,0.861810,-1.143324,1.091732,0.500000,
3,"beta_init[2,0]",0.074318,0.160456,-0.271003,0.327399,0.558375,
4,"beta_init[3,0]",0.163507,0.321438,-0.522594,0.613882,0.513125,
...,...,...,...,...,...,...,...
88,beta_license_t2[1],0.242557,0.766975,-1.371835,1.431137,0.625625,
89,beta_havecar_t2[0],-1.032389,1.313104,-2.437086,1.683543,0.498000,
90,beta_havecar_t2[1],0.418049,1.315288,-2.218441,1.907182,0.503875,
91,beta_education_t2[0],-0.357028,0.650279,-1.413862,1.067774,0.526250,


In [5]:
# 读取结果
trace = az.from_netcdf('maas_hmm_results/trace_state_2.nc')

var_names = ['alpha_init', 'beta_init', 'gamma_trans', 'ASC_t1','beta_no_stage1',
             'beta_M1_stage1', 'beta_M2_stage1', 'beta_M3_stage1', 'beta_M4_stage1',
             'ASC_t2', 'beta_bus_stage2', 'beta_metro_stage2', 'beta_taxi_stage2',
             'beta_ultra_stage2', 'beta_payg_stage2']

# 查看摘要统计
summary = az.summary(trace, var_names=var_names)


KeyError: 'var names: "[\'beta_no_stage1\' \'beta_M1_stage1\' \'beta_M2_stage1\' \'beta_M3_stage1\'\\n \'beta_M4_stage1\' \'beta_bus_stage2\' \'beta_metro_stage2\' \'beta_taxi_stage2\'\\n \'beta_ultra_stage2\' \'beta_payg_stage2\'] are not present" in dataset'

In [6]:
summary = az.summary(trace)


In [7]:
summary[['mean', 'sd', 'r_hat', 'ess_bulk']]

Unnamed: 0,mean,sd,r_hat,ess_bulk
alpha_init[0],2.456,4.267,1.53,7.0
"beta_init[0, 0]",0.028,0.087,1.21,13.0
"beta_init[1, 0]",-0.495,0.862,1.53,7.0
"beta_init[2, 0]",0.074,0.160,1.49,7.0
"beta_init[3, 0]",0.164,0.321,1.53,7.0
...,...,...,...,...
log_lik[27357],-0.074,0.004,1.00,17981.0
log_lik[27358],-0.074,0.004,1.00,17794.0
log_lik[27359],-0.074,0.004,1.00,17782.0
pi_init_mean[0],0.388,0.195,1.53,7.0


In [57]:
trace.posterior['pi_init_mean'].mean(dim=['chain', 'draw']).values

array([0.014305  , 0.26385826, 0.72183674])

In [56]:
trace.posterior['alpha_init'].mean(dim=['chain', 'draw']).values

array([0.01540609, 0.39148783])

In [55]:
trace.posterior['beta_init'].mean(dim=['chain', 'draw']).values

array([[ 0.00516961,  0.14607423],
       [ 0.00643546,  0.14522669],
       [ 0.00391317,  0.17091024],
       [ 0.0052839 ,  0.07818058],
       [ 0.01533854,  0.23957588],
       [-0.00082084,  0.1474522 ],
       [ 0.01236828,  0.2061604 ],
       [ 0.01985748,  0.44289897],
       [ 0.0162472 ,  0.4828415 ],
       [ 0.02561059,  0.45634356],
       [ 0.00653802,  0.02583201],
       [ 0.00344561,  0.02004636]])

In [72]:
trace.posterior['gamma_trans'].mean(dim=['chain', 'draw']).values

array([-0.13315981,  0.10911515,  0.0066032 , -0.06695646, -0.12346804,
       -0.07885294, -0.06684398, -0.50257989,  0.00161778, -0.31574123])

In [73]:
trace.posterior['trans_logits_raw'].mean(dim=['chain', 'draw']).values

array([[ 0.03884317, -0.04896119],
       [-0.09885965,  0.42681707],
       [ 0.48615182, -1.1185531 ]])

In [58]:
trace.posterior['ASC_t1'].mean(dim=['chain', 'draw']).values

array([[-0.0286641 , -0.03129296, -0.01936249, -0.01772836],
       [-0.12144101, -0.19753801, -0.37032504, -0.3658919 ],
       [-0.318953  , -0.54275091, -1.02181688, -0.99765641]])

In [59]:
trace.posterior['beta_no_stage1'].mean(dim=['chain', 'draw']).values

array([[0.00933457, 0.00443792, 0.01320499],
       [0.1543152 , 0.13755903, 0.14292155],
       [0.42224958, 0.3754815 , 0.38689851]])

In [60]:
trace.posterior['beta_M1_stage1'].mean(dim=['chain', 'draw']).values

array([[-0.08680354, -0.08997829,  0.00197009,  0.00154234],
       [-0.15650682, -0.15215569,  0.10107066,  0.03373077],
       [-0.36560969, -0.35622717,  0.28507594,  0.09120013]])

In [61]:
trace.posterior['beta_M2_stage1'].mean(dim=['chain', 'draw']).values

array([[-0.0859467 , -0.09134965,  0.00298753, -0.00195491],
       [-0.10769081, -0.17858924,  0.11065935, -0.05830737],
       [-0.23274367, -0.43009621,  0.30088224, -0.16208902]])

In [62]:
trace.posterior['beta_M3_stage1'].mean(dim=['chain', 'draw']).values

array([[-4.89424331e-02, -4.60741130e-02,  3.03390330e-03,
        -1.61657983e-04],
       [-1.19564455e-01, -1.19985492e-01,  1.37897343e-02,
        -3.38963404e-02],
       [-3.02867752e-01, -2.99406017e-01,  3.39521676e-02,
        -9.73695948e-02]])

In [63]:
trace.posterior['beta_M4_stage1'].mean(dim=['chain', 'draw']).values

array([[ 0.04830658,  0.00126057,  0.00711763],
       [-0.04457717, -0.00426188, -0.00855419],
       [-0.13252853, -0.01354788, -0.0256809 ]])

In [64]:
trace.posterior['ASC_t2'].mean(dim=['chain', 'draw']).values

array([[ 0.13395602,  0.03838943, -0.00203218, -0.07732381],
       [ 0.05992404,  0.01059202, -0.04286414, -0.07566353],
       [-0.00532083, -0.04525241,  0.16324411, -0.13160788]])

In [65]:
trace.posterior['beta_bus_stage2'].mean(dim=['chain', 'draw']).values

array([[-0.04138736,  0.14488979, -0.03311562,  0.42157209,  0.12479509,
         0.12429067, -0.04620831,  0.09068942,  0.01801949],
       [-0.07588425,  0.07620589, -0.15693327,  0.23462671,  0.146951  ,
         0.0174068 , -0.17769428,  0.02966835,  0.08071529],
       [-0.01592459,  0.01976337, -0.09312216,  0.00566384, -0.01772161,
         0.0452106 , -0.0516998 ,  0.02373315, -0.02790051]])

In [66]:
trace.posterior['beta_metro_stage2'].mean(dim=['chain', 'draw']).values

array([[-0.07487639, -0.00627182, -0.04645838, -0.08721349,  0.54245887,
         0.17784645, -0.12411518, -0.02997256,  0.04318819, -0.10678209,
         0.10606011],
       [-0.11019795, -0.02793367, -0.14987185, -0.28799313,  0.49605438,
         0.16404762, -0.21229065, -0.03143132,  0.14554248, -0.12360665,
         0.1350467 ],
       [-0.00218533, -0.07779273, -0.11926194, -0.18859228,  0.08711115,
         0.01504497, -0.05895368,  0.00624137, -0.03614167, -0.03605658,
         0.04053674]])

In [67]:
trace.posterior['beta_taxi_stage2'].mean(dim=['chain', 'draw']).values

array([[ 0.00191913, -0.15095593, -0.08540278,  0.23250536,  0.05353328,
        -0.00273727],
       [-0.03898818, -0.44759039, -0.13467231,  0.03983235,  0.02136736,
        -0.0245792 ],
       [ 0.17421214, -0.02886796, -0.16972134,  0.1817203 ,  0.13585133,
         0.19694921]])

In [68]:
trace.posterior['beta_ultra_stage2'].mean(dim=['chain', 'draw']).values

array([[-0.05733731, -0.32264163,  0.08765251,  0.08006475,  0.07848877,
        -0.06275182],
       [-0.0522569 , -0.33482463,  0.05077573,  0.06717351,  0.04971332,
        -0.12669701],
       [-0.13660437, -0.07797495,  0.16897136,  0.20921985, -0.00283289,
        -0.13259122]])

In [69]:
trace.posterior['beta_payg_stage2'].mean(dim=['chain', 'draw']).values

array([[ 0.08381255, -0.03854909, -0.08861305, -0.0061566 ],
       [ 0.24037947,  0.0559521 ,  0.23804208,  0.32238692],
       [ 0.0197956 ,  0.01369371,  0.02976691,  0.02015327]])

In [4]:
def load_data(filepath):
    """
    加载合并后的问卷数据
    
    Parameters
    ----------
    filepath : str
        数据文件路径
    """
    # 根据文件类型选择读取方式
    if filepath.endswith('.csv'):
        df = pd.read_csv(filepath, encoding='gb2312')
    elif filepath.endswith('.xlsx'):
        df = pd.read_excel(filepath)
    else:
        raise ValueError("不支持的文件格式")
    
    return df

In [5]:
# 使用模拟数据测试 (实际使用时替换为真实数据)
data = load_data('data/最终模型数据.csv')

In [6]:
data['maas'].value_counts()

maas
1    15620
4     4090
5     3545
6     2325
7     1780
Name: count, dtype: int64

# 态度转移因素

## 第一阶段态度

In [7]:
data['choose_options'] = data['maas'].apply(lambda x: 1 if x >= 4 else 0)

In [8]:
data['time_savings'] = 0

In [9]:
data.loc[data['maas'] == 4, 'time_savings'] = data.loc[data['maas'] == 4]['M1dif_triptime']
data.loc[data['maas'] == 5, 'time_savings'] = data.loc[data['maas'] == 5]['M2dif_triptime']
data.loc[data['maas'] == 6, 'time_savings'] = data.loc[data['maas'] == 6]['M3dif_triptime']
data.loc[data['maas'] == 7 ,'time_savings'] = data.loc[data['maas'] == 7]['M4dif_triptime']

In [10]:
data['cost_savings'] = 0

In [11]:
data.loc[data['maas'] == 4, 'cost_savings'] = data.loc[data['maas'] == 4]['M1dif_price']
data.loc[data['maas'] == 5, 'cost_savings'] = data.loc[data['maas'] == 5]['M2dif_price']
data.loc[data['maas'] == 6, 'cost_savings'] = data.loc[data['maas'] == 6]['M3dif_price']
data.loc[data['maas'] == 7 ,'cost_savings'] = data.loc[data['maas'] == 7]['M4dif_price']

## 套餐匹配因素

In [12]:
data['week_ebike']

0        2
1        2
2        2
3        2
4        2
        ..
27355    1
27356    1
27357    1
27358    1
27359    1
Name: week_ebike, Length: 27360, dtype: int64

In [13]:
data['match_bus'] = data['week_bus'] > 3
data['match_metro'] = data['week_metro'] > 3
data['match_bike'] = data['week_bike'] > 3

In [14]:
data.loc[data['results'] == 4]['week_ebike'] * 4 + 1.5 - data.loc[data['results'] == 4]['ebike_4'] * 10

3       -22.5
8       -22.5
13      -22.5
61      -18.5
66      -18.5
         ... 
27195   -10.5
27196   -14.5
27197   -18.5
27198   -22.5
27199   -22.5
Length: 2258, dtype: float64

In [15]:
data['match_e_bike'] = 0
data.loc[data['results'] == 1, 'match_e_bike'] = data.loc[data['results'] == 1]['week_ebike'] * 4 + 1.5 - data.loc[data['results'] == 1]['ebike_12'] * 10
data.loc[data['results'] == 2, 'match_e_bike'] = data.loc[data['results'] == 2]['week_ebike'] * 4 + 1.5 - data.loc[data['results'] == 2]['ebike_12'] * 10
data.loc[data['results'] == 3, 'match_e_bike'] = data.loc[data['results'] == 3]['week_ebike'] * 4 + 1.5 - data.loc[data['results'] == 3]['ebike_3'] * 10
data.loc[data['results'] == 4 ,'match_e_bike'] = data.loc[data['results'] == 4]['week_ebike'] * 4 + 1.5 - data.loc[data['results'] == 4]['ebike_4'] * 10

In [16]:
## 居民出行调查是13.48公里

In [17]:
data['match_taxi'] = 0
data.loc[data['results'] == 1, 'match_taxi'] = ((data.loc[data['results'] == 1]['week_taxi'] * 4 + 1.5)*13.48 - data.loc[data['results'] == 1]['taxi_12'] * 100)/100
data.loc[data['results'] == 2, 'match_taxi'] = ((data.loc[data['results'] == 2]['week_taxi'] * 4 + 1.5)*13.48 - data.loc[data['results'] == 2]['taxi_12'] * 100)/100
data.loc[data['results'] == 3, 'match_taxi'] = ((data.loc[data['results'] == 3]['week_taxi'] * 4 + 1.5)*13.48 - data.loc[data['results'] == 3]['taxi_3'] * 100)/100
data.loc[data['results'] == 4 ,'match_taxi'] = ((data.loc[data['results'] == 4]['week_taxi'] * 4 + 1.5)*13.48 - data.loc[data['results'] == 4]['taxi_4'] * 100)/100

In [18]:
data

Unnamed: 0,maas,peopleID,purpose,distance,B1wtimeTaxi,A1ttimeCar,A1priceCar,B1ttimeTaxi,B1wtimeTaxi.1,B1triptime,...,age3,age4,choose_options,time_savings,cost_savings,match_bus,match_metro,match_bike,match_e_bike,match_taxi
0,1,9,0,9.1,10,1000,1000,18,10,28,...,False,False,0,0.0,0.0,True,True,False,9.5,1.8198
1,1,9,0,9.1,10,1000,1000,18,10,28,...,False,False,0,0.0,0.0,True,True,False,5.5,1.1198
2,1,9,0,9.1,10,1000,1000,18,10,28,...,False,False,0,0.0,0.0,True,True,False,-6.5,-2.3802
3,1,9,0,9.1,10,1000,1000,18,10,28,...,False,False,0,0.0,0.0,True,True,False,-22.5,-5.1302
4,1,9,0,9.1,10,1000,1000,18,10,28,...,False,False,0,0.0,0.0,True,True,False,1.5,0.4198
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27355,1,2742,0,26.0,3,1000,1000,33,3,36,...,False,False,0,0.0,0.0,False,False,False,0.0,0.0000
27356,1,2742,0,26.0,3,1000,1000,33,3,36,...,False,False,0,0.0,0.0,False,False,False,0.0,0.0000
27357,1,2742,0,26.0,3,1000,1000,33,3,36,...,False,False,0,0.0,0.0,False,False,False,0.0,0.0000
27358,1,2742,0,26.0,3,1000,1000,33,3,36,...,False,False,0,0.0,0.0,False,False,False,0.0,0.0000


## 价格感知因素

In [19]:
cost_map = {
    1: 25,
    2: 100,
    3: 275,
    4: 600,
    5: 1150,
    6: 1500
}

In [20]:
data['cost']

0        3
1        3
2        3
3        3
4        3
        ..
27355    1
27356    1
27357    1
27358    1
27359    1
Name: cost, Length: 27360, dtype: int64

In [21]:
data['match_price'] = 0
data.loc[data['results'] == 1, 'match_price'] = (data.loc[data['results'] == 1]['cost'].map(cost_map))*0.01 - data.loc[data['results'] == 1]['price1']
data.loc[data['results'] == 2, 'match_price'] = (data.loc[data['results'] == 2]['cost'].map(cost_map))*0.01 - data.loc[data['results'] == 2]['price2']
data.loc[data['results'] == 3, 'match_price'] = (data.loc[data['results'] == 3]['cost'].map(cost_map))*0.01 - data.loc[data['results'] == 3]['price3']
data.loc[data['results'] == 4 ,'match_price'] = (data.loc[data['results'] == 4]['cost'].map(cost_map))*0.01 - data.loc[data['results'] == 4]['price4']

In [22]:
data['match_price'].describe()

count    27360.000000
mean        -1.896997
std          5.487067
min        -25.449200
25%         -3.386800
50%          0.000000
75%          0.000000
max         14.360000
Name: match_price, dtype: float64

In [31]:
data['price_ratio'] = 0
data.loc[data['results'] == 1, 'price_ratio'] = data.loc[data['results'] == 1]['price_12']
data.loc[data['results'] == 2, 'price_ratio'] = data.loc[data['results'] == 2]['price_12']
data.loc[data['results'] == 3, 'price_ratio'] = data.loc[data['results'] == 3]['price_3']
data.loc[data['results'] == 4 ,'price_ratio'] = data.loc[data['results'] == 4]['price_4']

In [44]:
for i in ['distance','B1wtimeTaxi', 'B1ttimeTaxi', 'B1wtimeTaxi.1', 'B1triptime',
       'B1priceTaxi',  'M4ttime', 'M4price', 'first_car',
       'first_taxi', 'first_pt', 'morning', 'evening', 'normal', 'late',
       'M4dif_triptime', 'M4dif_price','time_savings',
       'cost_savings']:
    data[i] = data[i] / 10

for i in ['A1ttimeCar',
       'A1priceCar','C1busrail', 'C1ttimePT', 'C1wtimePT',
       'C1distancePT_walk', 'C1triptimePT', 'C1pricePT', 'M1ttimerail',
       'M1ttime_bus', 'M1wtime', 'M1distance_walk', 'M1triptime', 'M1price',
       'M2ttime_rail', 'M2ttime_bus', 'M2wtime', 'M2distance_bike',
       'M2triptime', 'M2price', 'M3ttime_rail', 'M3ttime_taxi', 'M3wtime',
       'M3triptime', 'M3price','M1dif_triptime', 'M1dif_price', 'M2dif_triptime', 'M2dif_price',
       'M3dif_triptime', 'M3dif_price']:
    data[i] = data[i] / 100

In [45]:
data.to_csv('data/最终模型数据.csv', index=False)

In [None]:
['distance','B1wtimeTaxi', 'B1ttimeTaxi', 'B1wtimeTaxi.1', 'B1triptime',
       'B1priceTaxi',  'M4ttime', 'M4price', 'first_car',
       'first_taxi', 'first_pt', 'morning', 'evening', 'normal', 'late',
       'M4dif_triptime', 'M4dif_price','time_savings',
       'cost_savings']

In [None]:
['A1ttimeCar',
       'A1priceCar','C1busrail', 'C1ttimePT', 'C1wtimePT',
       'C1distancePT_walk', 'C1triptimePT', 'C1pricePT', 'M1ttimerail',
       'M1ttime_bus', 'M1wtime', 'M1distance_walk', 'M1triptime', 'M1price',
       'M2ttime_rail', 'M2ttime_bus', 'M2wtime', 'M2distance_bike',
       'M2triptime', 'M2price', 'M3ttime_rail', 'M3ttime_taxi', 'M3wtime',
       'M3triptime', 'M3price','M1dif_triptime', 'M1dif_price', 'M2dif_triptime', 'M2dif_price',
       'M3dif_triptime', 'M3dif_price']

In [42]:
data.columns[50:]

Index(['M4dif_price', 'M1metroratio', 'M2metroratio', 'M3metroratio',
       'distance1', 'distance2', 'distance3', 'distance4', 'distance5',
       'MaasFamiliar', 'results', 'ebike_12', 'ebike_3', 'ebike_4', 'taxi_12',
       'taxi_3', 'taxi_4', 'price_12', 'price_3', 'price_4', 'price1',
       'price2', 'price3', 'price4', 'travel_num', 'travel_distance_work',
       'travel_distance_weekend', 'travel_aim', 'travel_people', 'a6', 'b6',
       'c6', 'd6', 'e6', 'f6', 'g6', 'a7', 'b7', 'c7', 'd7', 'cost', 'sex',
       'education', 'income1', 'income2', 'income3', 'have_car', 'license',
       'e_bike', 'occupy', 'week_bus', 'week_metro', 'week_bike', 'week_ebike',
       'week_taxi', 'at1', 'at2', 'at3', 'at4', 'at5', 'at6', 'at7', 'at8',
       'at9', 'at10', 'at11', 'at12', 'at13', 'at14', 'at15', 'at16', 'at17',
       'at18', 'at19', 'at20', 'at21', 'at22', 'at23', 'at24', 'at25', 'age1',
       'age2', 'age3', 'age4', 'choose_options', 'time_savings',
       'cost_savings', 'ma

In [43]:
data.describe()

Unnamed: 0,maas,peopleID,purpose,distance,B1wtimeTaxi,A1ttimeCar,A1priceCar,B1ttimeTaxi,B1wtimeTaxi.1,B1triptime,...,at23,at24,at25,choose_options,time_savings,cost_savings,match_e_bike,match_taxi,match_price,price_ratio
count,27360.0,27360.0,27360.0,27360.0,27360.0,27360.0,27360.0,27360.0,27360.0,27360.0,...,27360.0,27360.0,27360.0,27360.0,27360.0,27360.0,27360.0,27360.0,27360.0,27360.0
mean,2.781981,1300.221491,0.376279,20.491795,6.062317,647.573648,638.029971,39.107639,6.062317,45.169956,...,3.911732,3.979715,3.875731,0.429094,-6.386637,-2.964855,-1.089291,-0.447562,-1.896997,0.676466
std,2.1702,795.323355,0.48446,11.457055,3.075133,463.017793,475.471532,17.592339,3.075133,18.033042,...,1.020851,1.006099,1.06802,0.494956,16.093519,19.045099,8.369447,1.796544,5.487067,0.473662
min,1.0,9.0,0.0,9.1,3.0,18.0,0.0,18.0,3.0,23.0,...,1.0,1.0,1.0,0.0,-93.0,-187.2,-30.5,-6.9086,-25.4492,0.0
25%,1.0,625.0,0.0,14.0,3.0,47.0,25.0,25.0,3.0,30.0,...,3.0,3.0,3.0,0.0,-4.0,0.0,-2.5,-0.6586,-3.3868,0.0
50%,1.0,1246.0,0.0,16.0,5.0,1000.0,1000.0,35.0,5.0,40.0,...,4.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9
75%,5.0,2008.0,1.0,26.0,10.0,1000.0,1000.0,50.0,10.0,55.0,...,5.0,5.0,5.0,1.0,0.0,1.5,1.5,0.5806,0.0,1.0
max,7.0,2742.0,1.0,44.0,10.0,1000.0,1000.0,80.0,10.0,90.0,...,5.0,5.0,5.0,1.0,23.888889,132.36,21.5,2.8982,14.36,1.2


In [34]:
data.isna().sum()

maas            0
peopleID        0
purpose         0
distance        0
B1wtimeTaxi     0
               ..
match_bike      0
match_e_bike    0
match_taxi      0
match_price     0
price_ratio     0
Length: 144, dtype: int64