# 2段階のNLモデルの構築
上位：私事トリップ選択モデル（しないor義務or裁量）  
下位：端末選択モデル

In [None]:
# ライブラリの導入
import pandas as pd
import numpy as np
import biogeme.biogeme_logging as blog

from biogeme.database import Database
from biogeme.expressions import Variable, Beta
from biogeme.biogeme import BIOGEME
from biogeme.models import lognested
from biogeme.nests import OneNestForNestedLogit, NestsForNestedLogit
from biogeme.results_processing import get_pandas_estimated_parameters

In [8]:
# 入力データの読み込み
master_df = pd.read_csv("/home/shibumtk/B4research/estimate/data/NL_data/c_NL_Logit_master2.csv", encoding="utf-8-sig")
master_df = master_df.fillna(0) # 欠損値を一応埋める

# 文字列は除外する
drop_cols =['NearestStation', 'WorkplaceStation']
master_df = master_df.drop(columns=drop_cols)

# 確認
print(len(master_df))
master_df.columns

46738


Index(['Personal_ID', 'HouseholdIncome', 'sex', 'age', 'JobType',
       'ComuTime[m]', 'MainlineTime[m]', 'AccessTime_used[m]', 'HouseholdType',
       'HouseholdMembers(all)', 'YoungestMember_No', 'WorkTime[m]',
       'AfterTime_work[m]', 'AfterTime_home[m]', 'first_transportation',
       'NearestStation_code', 'near_chuko_area300', 'near_chuko_area1000',
       'near_300index1', 'near_1000index1', 'near_300index2',
       'near_1000index2', 'WorkplaceStation_code', 'WP_chuko_area300',
       'WP_chuko_area1000', 'WP_300index1', 'WP_1000index1', 'WP_300index2',
       'WP_1000index2', 'PriTrip', 'timing', 'FacilityType', 'TripPurpose',
       'PriTrip_class', 'StayTime', 'walk_av', 'walk_time[m]', 'bicycle_av',
       'bicycle_time[m]', 'bicycle_parking_fee', 'bus_av', 'bus_time[m]',
       'bus_cost', 'dist_nearest_BusStop[km]', 'car_av', 'car_time[m]',
       'car_cost'],
      dtype='object')

In [9]:
# NL用のCHOICEカラムを作成
mode_map = {1:0, 2:1, 4:2, 5:3} # 端末手段を0~3にまとめる
# 端末手段のカラムを書き換え（新しいカラムを作成）
master_df["t_first_transportation"] = master_df["first_transportation"].map(mode_map)

df = master_df.copy()
# CHOICEカラム
df['CHOICE9'] = np.select(
    [df['PriTrip_class'] == 0, df['PriTrip_class'] == 1, df['PriTrip_class'] == 2],
    [1, 2 + df['t_first_transportation'], 6 + df['t_first_transportation']]
).astype("int64")

t_master_df = df.copy()

# 確認
print(t_master_df['CHOICE9'].value_counts().sort_index().sum())
t_master_df['CHOICE9'].value_counts().sort_index()

46738


CHOICE9
1    38142
2     2954
3      438
4      299
5       64
6     3912
7      420
8      417
9       92
Name: count, dtype: int64

In [10]:
# biogemeデータリストに格納する
master_db = Database("PTdata", t_master_df)

In [11]:
# ==============
# 代替ID（9択）★定義あり版
# ==============
NO      = 1
DU_WA   = 2
DU_BI   = 3
DU_BU   = 4
DU_CA   = 5
DI_WA   = 6
DI_BI   = 7
DI_BU   = 8
DI_CA   = 9

CHOICE = Variable("CHOICE9")

# ==============
# パラメータ（ASCのみ）
# ==============
# モード側ASC（walk基準）
ASC_BI = Beta("ASC_BI", 0, None, None, 0)
ASC_BU = Beta("ASC_BU", 0, None, None, 0)
ASC_CA = Beta("ASC_CA", 0, None, None, 0)

# Trip-type（Duty/Disc）側ASC
ASC_DU = Beta("ASC_DU", 0, None, None, 0)
ASC_DI = Beta("ASC_DI", 0, None, None, 0)

# ネストパラメータ（NoTripは単一代替なので固定=1）
MU_NO = Beta("MU_NO", 1.0, 1.0, 1.0, 1)
MU_DU = Beta("MU_DU", 1.2, 1.0, 50.0, 0)
MU_DI = Beta("MU_DI", 1.2, 1.0, 50.0, 0)

# availability & time
WA_AV = Variable("walk_av")
WA_TT = Variable("walk_time[m]")

BI_AV = Variable("bicycle_av")
BI_TT = Variable("bicycle_time[m]")

BU_AV = Variable("bus_av")
BU_TT = Variable("bus_time[m]")

CA_AV = Variable("car_av")
CA_TT = Variable("car_time[m]")

# ==============
# 効用（ASCのみ）
# NoTripは基準：0
# ==============
V_NO = 0

# Trip-type utility（共変量なし）
V_DU_PRI = ASC_DU
V_DI_PRI = ASC_DI

# Mode utility（時間なし。walk基準で0）
V_WA_MODE = 0
V_BI_MODE = ASC_BI
V_BU_MODE = ASC_BU
V_CA_MODE = ASC_CA

V = {
    NO:    V_NO,

    DU_WA: V_DU_PRI + V_WA_MODE,
    DU_BI: V_DU_PRI + V_BI_MODE,
    DU_BU: V_DU_PRI + V_BU_MODE,
    DU_CA: V_DU_PRI + V_CA_MODE,

    DI_WA: V_DI_PRI + V_WA_MODE,
    DI_BI: V_DI_PRI + V_BI_MODE,
    DI_BU: V_DI_PRI + V_BU_MODE,
    DI_CA: V_DI_PRI + V_CA_MODE,
}

# ==============
# availability（あなたの元コードの定義をそのまま流用）
av = {
    NO: 1,

    DU_WA: WA_AV,
    DU_BI: BI_AV,
    DU_BU: BU_AV,
    DU_CA: CA_AV,

    DI_WA: WA_AV,
    DI_BI: BI_AV,
    DI_BU: BU_AV,
    DI_CA: CA_AV,
}

# ==============
# nests（上位＝{NoTrip, Duty, Discretion}）
# ==============
nest_no = OneNestForNestedLogit(MU_NO, [NO], name="NoTrip")
nest_du = OneNestForNestedLogit(MU_DU, [DU_WA, DU_BI, DU_BU, DU_CA], name="Duty")
nest_di = OneNestForNestedLogit(MU_DI, [DI_WA, DI_BI, DI_BU, DI_CA], name="Discretion")

nests = NestsForNestedLogit(
    choice_set=list(V.keys()),
    tuple_of_nests=(nest_no, nest_du, nest_di)
)

# ==============
# loglikelihood
# ==============
logprob = lognested(V, av, nests, CHOICE)

In [None]:
# ==============
# estimate
# ==============
logger = blog.get_screen_logger(level=blog.INFO)
biogeme_asc = BIOGEME(
    master_db,
    logprob,
    generate_html=False,
    generate_yaml=False,
    optimization_algorithm="simple_bounds_BFGS",
)
biogeme_asc.model_name = "NL_ASC_only_TripTypePlusMode"

biogeme_asc.calculate_null_loglikelihood(av)
results_asc = biogeme_asc.estimate()

print(results_asc.short_summary())

pandas_results_asc = get_pandas_estimated_parameters(estimation_results=results_asc)
print(pandas_results_asc.to_string())

Biogeme parameters read from biogeme.toml. 
Biogeme parameters read from biogeme.toml. 


*** Initial values of the parameters are obtained from the file __NL_ASC_only_TripTypePlusMode.iter 
*** Initial values of the parameters are obtained from the file __NL_ASC_only_TripTypePlusMode.iter 
Parameter values restored from __NL_ASC_only_TripTypePlusMode.iter 
Parameter values restored from __NL_ASC_only_TripTypePlusMode.iter 
Starting values for the algorithm: {'ASC_DU': -2.334055728870544, 'MU_DU': 18.50798553707901, 'ASC_BI': -0.082813900961603, 'ASC_BU': -0.03632928701633138, 'ASC_CA': -0.14975231135153083, 'ASC_DI': -2.0776793998637615, 'MU_DI': 20.0} 
Starting values for the algorithm: {'ASC_DU': -2.334055728870544, 'MU_DU': 18.50798553707901, 'ASC_BI': -0.082813900961603, 'ASC_BU': -0.03632928701633138, 'ASC_CA': -0.14975231135153083, 'ASC_DI': -2.0776793998637615, 'MU_DI': 20.0} 
Optimization algorithm: BFGS with simple bounds [simple_bounds_BFGS]. 
Optimization algorithm: BFGS with simple bounds [simple_bounds_BFGS]. 
Optimization algorithm: hybrid Newton/BFGS with si

Results for model NL_ASC_only_TripTypePlusMode
Nbr of parameters:		7
Sample size:			46738
Excluded data:			0
Null log likelihood:		-82868.15
Final log likelihood:		-32602.28
Likelihood ratio test (null):		100531.7
Rho square (null):			0.607
Rho bar square (null):			0.606
Akaike Information Criterion:	65218.55
Bayesian Information Criterion:	65279.82

     Name      Value  Robust std err.  Robust t-stat.  Robust p-value  Active bound
0  ASC_DU  -2.324660         0.017126     -135.739356             0.0         False
1   MU_DU  45.963700         1.804108       25.477242             0.0         False
2  ASC_BI  -0.033168         0.001109      -29.901465             0.0         False
3  ASC_BU  -0.014301         0.001130      -12.657853             0.0         False
4  ASC_CA  -0.059885         0.002446      -24.483598             0.0         False
5  ASC_DI  -2.069600         0.015276     -135.476423             0.0         False
6   MU_DI  50.000000         1.403921       35.614545      

: 