In [58]:
# ライブラリの導入
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
from biogeme.version import getText # なくても良い（バージョン確認のため導入）
from biogeme.biogeme import BIOGEME
from biogeme.database import Database
from biogeme.expressions import Beta, Variable, log, exp
from biogeme import models
from biogeme.models import loglogit
from biogeme.results_processing import get_pandas_estimated_parameters

In [59]:
# biogemeのバージョン確認
print(getText())

getText is deprecated; use get_text instead.


biogeme 3.3.1 [2025-12-12]
Home page: http://biogeme.epfl.ch
Submit questions to https://groups.google.com/d/forum/biogeme
Michel Bierlaire, Transport and Mobility Laboratory, Ecole Polytechnique Fédérale de Lausanne (EPFL)



In [60]:
# マスターデータの読み込み
master_df = pd.read_csv("/home/shibumtk/B4research/estimate/data/01master_data/Logit_master5.csv", encoding="utf-8-sig")

# 欠損している部分をすべて0で補完（必要な処理は作成時にやってるから大丈夫）
master_df = master_df.fillna(0)

# 確認
print(len(master_df))
master_df.columns

49135


Index(['Personal_ID', 'HouseholdIncome', 'sex', 'age', 'JobType',
       'ComuTime[m]', 'MainlineTime[m]', 'AccessTime_used[m]', 'HouseholdType',
       'HouseholdMembers(all)', 'WorkTime[m]', 'first_transportation',
       'NearestStation', 'NearestStation_code', 'near_area_syou',
       'near_area_kinrin', 'near_area_sum', 'near_area_syou500',
       'near_area_kinrin500', 'near_area_sum500', 'near_area_syou300',
       'near_area_kinrin300', 'near_area_sum300', 'near_o_area_syou',
       'near_o_area_kinrin', 'near_o_area_sum', 'near_o_area_syou500',
       'near_o_area_kinrin500', 'near_o_area_sum500', 'near_o_area_syou300',
       'near_o_area_kinrin300', 'near_o_area_sum300', 'WorkplaceStation',
       'WorkplaceStation_code', 'WP_area_syou', 'WP_area_kinrin',
       'WP_area_sum', 'WP_area_syou500', 'WP_area_kinrin500', 'WP_area_sum500',
       'WP_area_syou300', 'WP_area_kinrin300', 'WP_area_sum300',
       'WP_o_area_syou', 'WP_o_area_kinrin', 'WP_o_area_sum',
       'WP_o_are

In [61]:
# 文字列は除外する
drop_cols =['NearestStation', 'WorkplaceStation']
master_df = master_df.drop(columns=drop_cols)
master_df.columns

Index(['Personal_ID', 'HouseholdIncome', 'sex', 'age', 'JobType',
       'ComuTime[m]', 'MainlineTime[m]', 'AccessTime_used[m]', 'HouseholdType',
       'HouseholdMembers(all)', 'WorkTime[m]', 'first_transportation',
       'NearestStation_code', 'near_area_syou', 'near_area_kinrin',
       'near_area_sum', 'near_area_syou500', 'near_area_kinrin500',
       'near_area_sum500', 'near_area_syou300', 'near_area_kinrin300',
       'near_area_sum300', 'near_o_area_syou', 'near_o_area_kinrin',
       'near_o_area_sum', 'near_o_area_syou500', 'near_o_area_kinrin500',
       'near_o_area_sum500', 'near_o_area_syou300', 'near_o_area_kinrin300',
       'near_o_area_sum300', 'WorkplaceStation_code', 'WP_area_syou',
       'WP_area_kinrin', 'WP_area_sum', 'WP_area_syou500', 'WP_area_kinrin500',
       'WP_area_sum500', 'WP_area_syou300', 'WP_area_kinrin300',
       'WP_area_sum300', 'WP_o_area_syou', 'WP_o_area_kinrin', 'WP_o_area_sum',
       'WP_o_area_syou500', 'WP_o_area_kinrin500', 'WP_o_area

In [62]:
master_df[['MainlineTime[m]', 'AccessTime_used[m]', 'HouseholdMembers(all)', 'WorkTime[m]', 'near_area_syou300']].describe()

Unnamed: 0,MainlineTime[m],AccessTime_used[m],HouseholdMembers(all),WorkTime[m],near_area_syou300
count,49135.0,49135.0,49135.0,49135.0,49135.0
mean,56.068516,10.554626,2.598779,604.004315,439093.4
std,25.548637,6.716427,1.23193,108.680044,454260.1
min,0.0,0.0,1.0,240.0,0.0
25%,38.086344,5.943449,2.0,550.0,0.0
50%,52.895433,9.206132,3.0,598.0,321321.1
75%,71.187521,13.690621,4.0,660.0,687882.9
max,336.65219,202.816762,9.0,1200.0,2981425.0


In [63]:
# biogeme.databaseに格納
master_db = Database("PTdata", master_df)

In [64]:
# 変数（main の DB）
PRITRIP   = Variable("PriTrip")                 # 目的変数（0/1）
MACM_min  = Variable("MainlineTime[m]")         # 本線（分）
WORK_min  = Variable("WorkTime[m]")             # 就業（分）
HHM       = Variable("HouseholdMembers(all)")   # 世帯人数
LU        = Variable("near_area_syou")          # 駅土地利用指標
AT_min    = Variable("AccessTime_used[m]")      # その日の実際の端末時間（分）
MODE      = Variable("first_transportation")    # 1:歩, 2:自転車, 4:バス, 5:車

# スケーリング
MACM_h = master_db.define_variable("MACM_h",  MACM_min / 60)   # /時
WORK_h = master_db.define_variable("WORK_h",  WORK_min / 60)   # /時
LU_s   = master_db.define_variable("LU_s",    LU / 1000000) # 1,000,000㎡あたり
AT_c   = master_db.define_variable("AT_c",    AT_min - np.mean(master_df["AccessTime_used[m]"])) 

# モード・ダミー（歩=基準なので作らない）
D_BIC = master_db.define_variable("D_BIC", (MODE == 2))
D_BUS = master_db.define_variable("D_BUS", (MODE == 4))
D_CAR = master_db.define_variable("D_CAR", (MODE == 5))
D_WAL = master_db.define_variable("D_WAL", (MODE == 1))  # 交差項用に使う

# 交差項：端末時間 × モード（4本）
AT_WAL = master_db.define_variable("AT_WAL", AT_c * D_WAL)
AT_BIC = master_db.define_variable("AT_BIC", AT_c * D_BIC)
AT_BUS = master_db.define_variable("AT_BUS", AT_c * D_BUS)
AT_CAR = master_db.define_variable("AT_CAR", AT_c * D_CAR)

# 係数
ASC_YES  = Beta("ASC_YES", 0.0, None, None, 0)
B_MACM   = Beta("B_MACMTIME", -0.01, None, None, 0)   # /時
B_WORK   = Beta("B_WORKTIME", -0.30, None, None, 0)   # /時
B_HH     = Beta("B_HHM_all",  0.0, None, None, 0)
B_LU     = Beta("B_N_EKI_SCORE", 0.0, None, None, 0) # 駅土地利用を入れるなら

# モード・ダミー（歩が基準）→ 定数のずれ
B_D_BIC  = Beta("B_D_BICYCLE", 0.0, None, None, 0)
B_D_BUS  = Beta("B_D_BUS",     0.0, None, None, 0)
B_D_CAR  = Beta("B_D_CAR",     0.0, None, None, 0)

# 端末時間のモード別傾き（/分）
B_AT_WAL = Beta("B_AT_WALK",    -0.01, None, None, 0)
B_AT_BIC = Beta("B_AT_BICYCLE", -0.01, None, None, 0)
B_AT_BUS = Beta("B_AT_BUS",     -0.01, None, None, 0)
B_AT_CAR = Beta("B_AT_CAR",     -0.01, None, None, 0)

# 効用（V_NO=0 を基準に）
V_YES = (ASC_YES
         + B_MACM * MACM_h
         + B_WORK * WORK_h
         + B_HH   * HHM
         + B_LU * LU_s   # 使う場合
         + B_D_BIC * D_BIC + B_D_BUS * D_BUS + B_D_CAR * D_CAR
         + B_AT_WAL * AT_WAL + B_AT_BIC * AT_BIC + B_AT_BUS * AT_BUS + B_AT_CAR * AT_CAR)

V = {0: 0, 1: V_YES}


In [65]:
# モデルの推定と結果の表示
# 尤度関数の設定
logprob = loglogit(V, None, PRITRIP)

bio = BIOGEME(master_db, logprob, 
              generate_html=False, generate_yaml=False) # 推定結果保存可否
bio.model_name = "binlogit_mode_dummies_time_interactions"
bio.calculate_null_loglikelihood(avail={0:1, 1:1})
res = bio.estimate()
print(res.short_summary())

# パラメータ推定値をpandasに格納して表示
pandas_results = get_pandas_estimated_parameters(estimation_results=res)
print(pandas_results)

Results for model binlogit_mode_dummies_time_interactions
Nbr of parameters:		12
Sample size:			49135
Excluded data:			0
Null log likelihood:		-34057.79
Final log likelihood:		-19925.65
Likelihood ratio test (null):		28264.27
Rho square (null):			0.415
Rho bar square (null):			0.415
Akaike Information Criterion:	39875.31
Bayesian Information Criterion:	39980.93

             Name     Value  Robust std err.  Robust t-stat.  Robust p-value
0         ASC_YES  4.348753         0.086230       50.431802    0.000000e+00
1      B_MACMTIME -0.507496         0.033660      -15.077231    0.000000e+00
2      B_WORKTIME -0.502412         0.008007      -62.747986    0.000000e+00
3       B_HHM_all -0.241960         0.011634      -20.797407    0.000000e+00
4   B_N_EKI_SCORE  0.018580         0.004916        3.779549    1.571129e-04
5     B_D_BICYCLE -0.122269         0.075507       -1.619311    1.053805e-01
6         B_D_BUS -0.078405         0.077305       -1.014221    3.104772e-01
7         B_D_CAR -