In [38]:
# ライブラリの導入
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
from biogeme.version import getText # なくても良い（バージョン確認のため導入）
from biogeme.biogeme import BIOGEME
from biogeme.database import Database
from biogeme.expressions import Beta, Variable, log, exp
from biogeme.models import loglogit
from biogeme.results_processing import get_pandas_estimated_parameters

In [39]:
# biogemeのバージョン確認
print(getText())

getText is deprecated; use get_text instead.


biogeme 3.3.1 [2025-11-12]
Home page: http://biogeme.epfl.ch
Submit questions to https://groups.google.com/d/forum/biogeme
Michel Bierlaire, Transport and Mobility Laboratory, Ecole Polytechnique Fédérale de Lausanne (EPFL)



In [40]:
# マスターデータの読み込み
master_df = pd.read_csv("/home/shibumtk/B4research/estimate/data/01master_data/Logit_master2.csv", encoding="utf-8-sig")

# 欠損している部分をすべて0で補完（必要な処理は作成時にやってるから大丈夫）
master_df = master_df.fillna(0)

# 確認
print(len(master_df))
master_df.columns

47160


Index(['Personal_ID', 'HouseholdIncome', 'sex', 'age', 'JobType',
       'ComuTime[m]', 'HouseholdType', 'HouseholdMembers(all)', 'WorkTime[m]',
       'PriTrip', 'first_transportation', 'FacailtyType', 'TripPurpose',
       'StayTime', 'walk_av', 'walk_time[m]', 'bicycle_av', 'bicycle_time[m]',
       'bicycle_parking_fee', 'bus_av', 'bus_time[m]', 'bus_cost',
       'dist_nearest_BusStop[km]', 'car_av', 'car_time[m]', 'car_cost'],
      dtype='object')

In [41]:
# 通勤時間と端末時間の切り分け
# 1) 実際に使った端末手段の時間（分）を拾う
conds = [
    master_df["first_transportation"] == 1,  # walk
    master_df["first_transportation"] == 2,  # bicycle
    master_df["first_transportation"] == 4,  # bus
    master_df["first_transportation"] == 5,  # car
]
choices = [
    master_df["walk_time[m]"],
    master_df["bicycle_time[m]"],
    master_df["bus_time[m]"],
    master_df["car_time[m]"],
]
master_df["AccessTime_used_m"] = np.select(conds, choices, default=np.nan)

# 2) 不整合・欠損の補完：av=1なのに欠損/0/負などは「利用可能な中の最短」で補う（本来は必要ない）
walk_cand    = np.where(master_df["walk_av"]   == 1, master_df["walk_time[m]"],    np.inf)
bicycle_cand = np.where(master_df["bicycle_av"]== 1, master_df["bicycle_time[m]"], np.inf)
bus_cand     = np.where(master_df["bus_av"]    == 1, master_df["bus_time[m]"],     np.inf)
car_cand     = np.where(master_df["car_av"]    == 1, master_df["car_time[m]"],     np.inf)
min_avail_m  = np.minimum.reduce([walk_cand, bicycle_cand, bus_cand, car_cand])

need_fallback = (
    master_df["AccessTime_used_m"].isna()
    | (master_df["AccessTime_used_m"] <= 0)
    | ((master_df["first_transportation"] == 1) & (master_df["walk_av"]    != 1))
    | ((master_df["first_transportation"] == 2) & (master_df["bicycle_av"] != 1))
    | ((master_df["first_transportation"] == 4) & (master_df["bus_av"]     != 1))
    | ((master_df["first_transportation"] == 5) & (master_df["car_av"]     != 1))
)
master_df.loc[need_fallback, "AccessTime_used_m"] = min_avail_m[need_fallback]

# 3) そもそも全モード不可の観測（min_avail=∞）は除外
df = master_df[np.isfinite(master_df["AccessTime_used_m"])].copy()

# 4) 時間に統一し、本線を作る
df["MainlineTime[m]"] = (df["ComuTime[m]"] - df["AccessTime_used_m"]).clip(lower=0)

df[["ComuTime[m]", "MainlineTime[m]", "AccessTime_used_m"]].head()

Unnamed: 0,ComuTime[m],MainlineTime[m],AccessTime_used_m
0,75.0,68.614062,6.385938
1,35.0,29.807682,5.192318
2,80.0,61.311768,18.688232
3,80.0,72.888165,7.111835
4,60.0,49.082593,10.917407


In [42]:
# biogeme.databaseに格納
master_db = Database("PTdata", df)

In [43]:
# 下位モデルの変数設定（logsum）
# output
CHOICE_ACCESS = Variable("first_transportation")

# input(TT=Travel/Transit Time)
WALK_AV = Variable("walk_av")
WALK_TT = Variable("walk_time[m]")

BICYCLE_AV = Variable("bicycle_av")
BICYCLE_TT = Variable("bicycle_time[m]")
BICYCLE_CO = Variable("bicycle_parking_fee")

BUS_AV   = Variable("bus_av")
BUS_TT   = Variable("bus_time[m]")
BUS_CO   = Variable("bus_cost")
BUS_DIST = Variable("dist_nearest_BusStop[km]")

CAR_AV = Variable("car_av")
CAR_TT = Variable("car_time[m]")
CAR_CO = Variable("car_cost")

# scaled
WALK_TT_SCALED = master_db.DefineVariable("WALK_TT_SCALED", WALK_TT / 60)

BICYCLE_TT_SCALED = master_db.DefineVariable("BICYCLE_TT_SCALED", BICYCLE_TT / 60)
BICYCLE_CO_SCALED = master_db.DefineVariable("BICYCLE_CO_SCALED", BICYCLE_CO / 100)

BUS_TT_SCALED   = master_db.DefineVariable("BUS_TT_SCALED", BUS_TT / 60)
BUS_CO_SCALED   = master_db.DefineVariable("BUS_CO_SCALED", BUS_CO / 100)
BUS_DIST_SCALED = master_db.DefineVariable("BUS_DIST_SCALED", BUS_DIST * 10)

CAR_TT_SCALED = master_db.DefineVariable("CAR_TT_SCALED", CAR_TT / 60)
CAR_CO_SCALED = master_db.DefineVariable("CAR_CO_SCALED", CAR_CO / 100)

# パラメータ設定
# ASC（定数項）
ASC_WALK = Beta("ASC_WALK", 0.0, None, None, 1)
ASC_BICYCLE = Beta("ASC_BICYCLE", 0.0, None, None, 0)
ASC_BUS = Beta("ASC_BUS", 0.0, None, None, 0)
ASC_CAR = Beta("ASC_CAR", 0.0, None, None, 0)

# 変数
B_TIME_LS = Beta("B_TIME_LS", -0.10, None, None, 0)
B_COST_LS = Beta("B_COST_LS", -0.10, None, None, 0)
B_DIST_LS = Beta("B_DIST_LS", -0.10, None, None, 0)

# 効用関数の特定化
# Logsum
V_WALK    = (ASC_WALK 
             + B_TIME_LS * WALK_TT_SCALED)
V_BICYCLE = (ASC_BICYCLE 
             + B_TIME_LS * BICYCLE_TT_SCALED 
             + B_COST_LS * BICYCLE_CO_SCALED)
V_BUS     = (ASC_BUS 
             + B_TIME_LS * BUS_TT_SCALED 
             + B_COST_LS * BUS_CO_SCALED 
             + B_DIST_LS * BUS_DIST_SCALED)
V_CAR     = (ASC_CAR 
             + B_TIME_LS * CAR_TT_SCALED 
             + B_COST_LS * CAR_CO_SCALED)

# 効用関数と選択肢の番号を関連付ける
V_access = {1:V_WALK, 2:V_BICYCLE, 4:V_BUS, 5:V_CAR}
# 選択可能性と選択肢の番号を関連づける
AV_access = {1: WALK_AV, 2: BICYCLE_AV, 4: BUS_AV, 5: CAR_AV}

DefineVariable is deprecated; use define_variable instead.
DefineVariable is deprecated; use define_variable instead.
DefineVariable is deprecated; use define_variable instead.
DefineVariable is deprecated; use define_variable instead.
DefineVariable is deprecated; use define_variable instead.
DefineVariable is deprecated; use define_variable instead.
DefineVariable is deprecated; use define_variable instead.
DefineVariable is deprecated; use define_variable instead.


In [44]:
# モデルの推定と結果の表示
# 尤度関数の設定
logprob_access = loglogit(V_access, AV_access, CHOICE_ACCESS)

# BIOGEMEのオブ軸とを生成（結果の名前）(rebio=result biogeme)
rebio_access = BIOGEME(master_db, logprob_access,
                generate_html=False, generate_yaml=False) # 推定結果保存可否
rebio_access.model_name = "access_02logit"

# 初期尤度の計算
rebio_access.calculate_null_loglikelihood(AV_access) # 全員が2つの選択肢を選択可能

# パラメータ推定の実行と結果概要の表示
results = rebio_access.estimate()
print(results.short_summary())

# パラメータ推定値をpandasに格納して表示
pandas_results = get_pandas_estimated_parameters(estimation_results=results)
print(pandas_results)


Results for model access_02logit
Nbr of parameters:		6
Sample size:			47160
Excluded data:			0
Null log likelihood:		-51558.45
Final log likelihood:		-23318.83
Likelihood ratio test (null):		56479.24
Rho square (null):			0.548
Rho bar square (null):			0.548
Akaike Information Criterion:	46649.65
Bayesian Information Criterion:	46702.22

          Name      Value  Robust std err.  Robust t-stat.  Robust p-value
0    B_TIME_LS -15.965573         0.269786      -59.178553    0.000000e+00
1  ASC_BICYCLE  -4.636168         0.089966      -51.532691    0.000000e+00
2    B_COST_LS   0.479448         0.067601        7.092301    1.318945e-12
3      ASC_BUS  -3.258226         0.127039      -25.647424    0.000000e+00
4    B_DIST_LS   0.160743         0.012219       13.154667    0.000000e+00
5      ASC_CAR  -6.662559         0.068991      -96.570721    0.000000e+00


In [45]:
# 変数の定義
# main
PRITRIP  = Variable("PriTrip") # 0/1をとる目的変数
HHM_all  = Variable("HouseholdMembers(all)") # 世帯人数
FACAILTY = Variable("FacailtyType") # 目的地施設種類
PURPOSE  = Variable("TripPurpose") # トリップ目的
COMUTIME = Variable("ComuTime[m]") # 通勤時間
MACMTIME = Variable("MainlineTime[m]") # 通勤時間から端末時間を引いたもの，MACM=MAin CoMuTIME
WORKTIME = Variable("WorkTime[m]") # 就業時間

# Logsum(TT=Travel/Transit Time)
WALK_AV = Variable("walk_av")
WALK_TT = Variable("walk_time[m]")

BICYCLE_AV = Variable("bicycle_av")
BICYCLE_TT = Variable("bicycle_time[m]")
BICYCLE_CO = Variable("bicycle_parking_fee")

BUS_AV   = Variable("bus_av")
BUS_TT   = Variable("bus_time[m]")
BUS_CO   = Variable("bus_cost")
BUS_DIST = Variable("dist_nearest_BusStop[km]")

CAR_AV = Variable("car_av")
CAR_TT = Variable("car_time[m]")
CAR_CO = Variable("car_cost")

# スケーリング（係数を0.1~10にするように）
COMUTIME_SCALED = master_db.DefineVariable("COMUTIME_SCALED", COMUTIME / 60) # 1時間当たりの変化
MACMTIME_SCALED = master_db.DefineVariable("MACMTIME_SCALED", MACMTIME / 60)
WORKTIME_SCALED = master_db.DefineVariable("WORKTIME_SCALED", WORKTIME / 60)

WALK_TT_SCALED = master_db.DefineVariable("WALK_TT_SCALED", WALK_TT / 60)

BICYCLE_TT_SCALED = master_db.DefineVariable("BICYCLE_TT_SCALED", BICYCLE_TT / 60)
BICYCLE_CO_SCALED = master_db.DefineVariable("BICYCLE_CO_SCALED", BICYCLE_CO / 100)

BUS_TT_SCALED   = master_db.DefineVariable("BUS_TT_SCALED", BUS_TT / 60)
BUS_CO_SCALED   = master_db.DefineVariable("BUS_CO_SCALED", BUS_CO / 100)
BUS_DIST_SCALED = master_db.DefineVariable("BUS_DIST_SCALED", BUS_DIST * 10)

CAR_TT_SCALED = master_db.DefineVariable("CAR_TT_SCALED", CAR_TT / 60)
CAR_CO_SCALED = master_db.DefineVariable("CAR_CO_SCALED", CAR_CO / 100)

DefineVariable is deprecated; use define_variable instead.
DefineVariable is deprecated; use define_variable instead.
DefineVariable is deprecated; use define_variable instead.
DefineVariable is deprecated; use define_variable instead.


ValueError: Variable WALK_TT_SCALED already exists

In [None]:
# パラメータ設定
# main
# ACS（定数項）
ASC_NO  = Beta("ASC_NO" , 0.0, None, None, 1) # 私事トリップしない方を基準
ASC_YES = Beta("ASC_YES", 0.0, None, None, 0)

# Beta（変数）
B_CT = Beta("B_COMUTIME", 0.0, None, None, 1) # 使用しない
B_MACT = Beta("B_MACMTIME", 0.0 ,None, None, 0)
B_WT = Beta("B_WORKTIME", 0.0, None, None, 0)
B_HM = Beta("B_HHM_all" , 0.0, None, None, 0)
B_LOGSUME = Beta("B_LOGSUM", 1.0, None, None, 0)

# Logsum
# ACS（定数項）
ASC_WALK = Beta("ASC_WALK", 0.0, None, None, 1)
ASC_BICYCLE = Beta("ASC_BICYCLE", 0.0, None, None, 0)
ASC_BUS = Beta("ASC_BUS", 0.0, None, None, 0)
ASC_CAR = Beta("ASC_CAR", 0.0, None, None, 0)

# Beta（変数）
B_TIME_LS = Beta("B_TIME_LS", -0.01, None, None, 0)
B_COST_LS = Beta("B_COST_LS", -0.10, None, None, 0)
B_DIST_LS = Beta("B_DIST_LS", -0.10, None, None, 0)

In [None]:
# 効用関数の特定化
# Logsum
V_WALK    = (ASC_WALK 
             + B_TIME_LS * WALK_TT_SCALED)
V_BICYCLE = (ASC_BICYCLE 
             + B_TIME_LS * BICYCLE_TT_SCALED 
             + B_COST_LS * BICYCLE_CO_SCALED)
V_BUS     = (ASC_BUS 
             + B_TIME_LS * BUS_TT_SCALED 
             + B_COST_LS * BUS_CO_SCALED 
             + B_DIST_LS * BUS_DIST_SCALED)
V_CAR     = (ASC_CAR 
             + B_TIME_LS * CAR_TT_SCALED 
             + B_COST_LS * CAR_CO_SCALED)

LOGSUM = log(
    WALK_AV * exp(V_WALK)
    + BICYCLE_AV * exp(V_BICYCLE)
    + BUS_AV * exp(V_BUS)
    + CAR_AV * exp(V_CAR)
    + 1e-300
)

# main
V_NO  = (ASC_NO) 
         #+ B_CT * COMUTIME_SCALED 
         #+ B_WT * WORKTIME_SCALED
         #+ B_HM * HHM_all
         #+ B_LOGSUME * LOGSUM)

V_YES = (ASC_YES 
         + B_MACT * MACMTIME_SCALED 
         + B_WT * WORKTIME_SCALED
         + B_HM * HHM_all
         + B_LOGSUME * LOGSUM)



# 効用関数と選択肢の番号を関連付ける
V = {0:V_NO, 1:V_YES}

In [None]:
# モデルと尤度関数の定義（logitモデル）
logprob = loglogit(V, None, PRITRIP)

# BIOGEMEのオブ軸とを生成（結果の名前）(rebio=result biogeme)
# outdir = "/home/shibumtk/B4research/estimate/estimate_results/b01logit.iter"
rebio = BIOGEME(master_db, logprob,
                generate_html=False, generate_yaml=False) # 推定結果保存可否
rebio.model_name = "b01logit"

# 初期尤度の計算
rebio.calculate_null_loglikelihood(avail={0:1, 1:1}) # 全員が2つの選択肢を選択可能

# パラメータ推定の実行と結果概要の表示
results = rebio.estimate()
print(results.short_summary())

# パラメータ推定値をpandasに格納して表示
pandas_results = get_pandas_estimated_parameters(estimation_results=results)
print(pandas_results)