In [1]:
import pandas as pd
import numpy as np
import biogeme.biogeme_logging as blog
from biogeme.database import Database
from biogeme.biogeme import BIOGEME
from biogeme.expressions import Variable, Beta
from biogeme.models import logcnl
from biogeme.nests import NestsForCrossNestedLogit, OneNestForCrossNestedLogit
from biogeme.results_processing import get_pandas_estimated_parameters

  from tqdm.autonotebook import tqdm


In [2]:
# 入力データの読み込み
master_df = pd.read_csv("/home/shibumtk/B4research/estimate/data/NL_data/c_NL_Logit_master2.csv", encoding="utf-8-sig")
master_df = master_df.fillna(0) # 欠損値を一応埋める

# 文字列は除外する
drop_cols =['NearestStation', 'WorkplaceStation']
master_df = master_df.drop(columns=drop_cols)

# 確認
print(len(master_df))
master_df.columns

46738


Index(['Personal_ID', 'HouseholdIncome', 'sex', 'age', 'JobType',
       'ComuTime[m]', 'MainlineTime[m]', 'AccessTime_used[m]', 'HouseholdType',
       'HouseholdMembers(all)', 'YoungestMember_No', 'WorkTime[m]',
       'AfterTime_work[m]', 'AfterTime_home[m]', 'first_transportation',
       'NearestStation_code', 'near_chuko_area300', 'near_chuko_area1000',
       'near_300index1', 'near_1000index1', 'near_300index2',
       'near_1000index2', 'WorkplaceStation_code', 'WP_chuko_area300',
       'WP_chuko_area1000', 'WP_300index1', 'WP_1000index1', 'WP_300index2',
       'WP_1000index2', 'PriTrip', 'timing', 'FacilityType', 'TripPurpose',
       'PriTrip_class', 'StayTime', 'walk_av', 'walk_time[m]', 'bicycle_av',
       'bicycle_time[m]', 'bicycle_parking_fee', 'bus_av', 'bus_time[m]',
       'bus_cost', 'dist_nearest_BusStop[km]', 'car_av', 'car_time[m]',
       'car_cost'],
      dtype='object')

In [3]:
# NL用のCHOICEカラムを作成
mode_map = {1:0, 2:1, 4:2, 5:3} # 端末手段を0~3にまとめる
# 端末手段のカラムを書き換え（新しいカラムを作成）
master_df["t_first_transportation"] = master_df["first_transportation"].map(mode_map)

df = master_df.copy()
# CHOICEカラム
df['CHOICE9'] = np.select(
    [df['PriTrip_class'] == 0, df['PriTrip_class'] == 1, df['PriTrip_class'] == 2],
    [1, 2 + df['t_first_transportation'], 6 + df['t_first_transportation']]
).astype("int64")

t_master_df = df.copy()

# 確認
print(t_master_df['CHOICE9'].value_counts().sort_index().sum())
t_master_df['CHOICE9'].value_counts().sort_index()

46738


CHOICE9
1    38142
2     2954
3      438
4      299
5       64
6     3912
7      420
8      417
9       92
Name: count, dtype: int64

In [None]:
# biogemeデータリストに格納する
master_db = Database("PTdata", t_master_df)

: 

In [None]:
# 変数定義
# ==============
CHOICE = Variable("CHOICE9")

# availability & time
WA_AV = Variable("walk_av")
WA_TT = Variable("walk_time[m]")

BI_AV = Variable("bicycle_av")
BI_TT = Variable("bicycle_time[m]")

BU_AV = Variable("bus_av")
BU_TT = Variable("bus_time[m]")

CA_AV = Variable("car_av")
CA_TT = Variable("car_time[m]")

# Trip-level covariates
WORKTIME    = Variable("WorkTime[m]")
AF_WORKTIME = Variable("AfterTime_work[m]") # AF=after
MACMTIME    = Variable("MainlineTime[m]")
HHM_all     = Variable("HouseholdMembers(all)")
NEKI_INDEX  = Variable("near_1000index2")
WEKI_INDEX  = Variable("WP_1000index2") # 就業地駅土地利用指標

# scaled
WORKTIME_SCALED    = master_db.define_variable("WORKTIME_SCALED", WORKTIME / 60)
AF_WORKTIME_SCALED = master_db.define_variable("AF_WORKTIME_SCALED", AF_WORKTIME / 60)
MACMTIME_SCALED    = master_db.define_variable("MACMTIME_SCALED", MACMTIME / 60)
NEKI_SCALED        = master_db.define_variable("NEKI_SCALED", NEKI_INDEX / 1_000_000)
WEKI_SCALED        = master_db.define_variable("WEKI_SCALED", WEKI_INDEX / 1_000_000)

# ==============
# パラメータ
# ==============
# モード側ASC（walk基準）
ASC_BI = Beta("ASC_BI", 0, None, None, 0)
ASC_BU = Beta("ASC_BU", 0, None, None, 0)
ASC_CA = Beta("ASC_CA", 0, None, None, 0)

# 端末時間係数
b_at = Beta("b_at", 0, None, None, 0)

# Trip-type（Duty/Disc）側
ASC_DU = Beta("ASC_DU", 0, None, None, 0)
ASC_DI = Beta("ASC_DI", 0, None, None, 0)

b_work_du        = Beta("b_work_du", 0, None, None, 0)
b_aftime_work_du = Beta("b_aftime_work_du", 0, None, None, 0)
b_macmtime_du    = Beta("b_macmtime_du", 0, None, None, 0)
b_n_eki_du       = Beta("b_n_eki_du", 0, None, None, 0)
b_w_eki_du       = Beta("b_w_eki_du", 0, None, None, 0)
b_hhm_du         = Beta("b_hhm_du", 0, None, None, 0)

b_work_di        = Beta("b_work_di", 0, None, None, 0)
b_aftime_work_di = Beta("b_aftime_work_di", 0, None, None, 0)
b_macmtime_di    = Beta("b_macmtime_di", 0, None, None, 0)
b_n_eki_di       = Beta("b_n_eki_di", 0, None, None, 0)
b_w_eki_di       = Beta("b_w_eki_di", 0, None, None, 0)
b_hhm_di         = Beta("b_hhm_di", 0, None, None, 0)

# ============================================================
# CNL用パラメータ
# ============================================================
# ネストパラメータ（mu>=1の制約を付けるのが一般的）
MU_NO = Beta("MU_NO", 1.0, 1.0, 1.0, 1)        # NoTripは固定1
MU_TR = Beta("MU_TR", 1.2, 1.0, 20.0, 0)       # Tripネスト
MU_DU = Beta("MU_DU", 1.2, 1.0, 20.0, 0)       # Dutyネスト
MU_DI = Beta("MU_DI", 1.2, 1.0, 20.0, 0)       # Discretionネスト

# Tripネストへの所属度（全Trip代替で共通の1パラメータ）
alpha_trip = Beta("alpha_trip", 0.30, 0.0, 1.0, 0)
alpha_purpose = 1 - alpha_trip   # Duty/Discretion 側への所属度（和=1にする）

# ==============
# 代替ID（9択）
# ==============
NO      = 1
DU_WA   = 2
DU_BI   = 3
DU_BU   = 4
DU_CA   = 5
DI_WA   = 6
DI_BI   = 7
DI_BU   = 8
DI_CA   = 9

# ==============
# 効用
# NoTripは基準：0
# ==============
V_NO = 0

# Trip-type utility（Duty/Disc）
V_DU_PRI = (ASC_DU
            + b_aftime_work_du * AF_WORKTIME_SCALED
            + b_macmtime_du    * MACMTIME_SCALED
            + b_n_eki_du       * NEKI_SCALED
            + b_hhm_du         * HHM_all)

V_DI_PRI = (ASC_DI
            + b_aftime_work_di * AF_WORKTIME_SCALED
            + b_macmtime_di    * MACMTIME_SCALED
            + b_n_eki_di       * NEKI_SCALED
            + b_hhm_di         * HHM_all)

# Mode utility（walk基準でASC=0）
V_WA_MODE = b_at * WA_TT
V_BI_MODE = ASC_BI + b_at * BI_TT
V_BU_MODE = ASC_BU + b_at * BU_TT
V_CA_MODE = ASC_CA + b_at * CA_TT

# 9 alternatives utilities
V = {
    NO:    V_NO,

    DU_WA: V_DU_PRI + V_WA_MODE,
    DU_BI: V_DU_PRI + V_BI_MODE,
    DU_BU: V_DU_PRI + V_BU_MODE,
    DU_CA: V_DU_PRI + V_CA_MODE,

    DI_WA: V_DI_PRI + V_WA_MODE,
    DI_BI: V_DI_PRI + V_BI_MODE,
    DI_BU: V_DI_PRI + V_BU_MODE,
    DI_CA: V_DI_PRI + V_CA_MODE,
}

# availability（NoTripは常に利用可能=1）
av = {
    NO: 1,

    DU_WA: WA_AV,
    DU_BI: BI_AV,
    DU_BU: BU_AV,
    DU_CA: CA_AV,

    DI_WA: WA_AV,
    DI_BI: BI_AV,
    DI_BU: BU_AV,
    DI_CA: CA_AV,
}

# ============================================================
# alpha（所属度）辞書を作る
#   - Duty代替: Tripにalpha_trip, Dutyに(1-alpha_trip)
#   - Discretion代替: Tripにalpha_trip, Discretionに(1-alpha_trip)
#   - NoTrip: NoTripネストに1（他は0）
# ============================================================
choice_set = [NO, DU_WA, DU_BI, DU_BU, DU_CA, DI_WA, DI_BI, DI_BU, DI_CA]
trip_alts  = [DU_WA, DU_BI, DU_BU, DU_CA, DI_WA, DI_BI, DI_BU, DI_CA]
duty_alts  = [DU_WA, DU_BI, DU_BU, DU_CA]
disc_alts  = [DI_WA, DI_BI, DI_BU, DI_CA]

alpha_no = {a: 0.0 for a in choice_set}
alpha_no[NO] = 1.0

alpha_tr = {a: 0.0 for a in choice_set}
for a in trip_alts:
    alpha_tr[a] = alpha_trip

alpha_du = {a: 0.0 for a in choice_set}
for a in duty_alts:
    alpha_du[a] = alpha_purpose

alpha_di = {a: 0.0 for a in choice_set}
for a in disc_alts:
    alpha_di[a] = alpha_purpose

# ============================================================
# CNLのnests定義
# ============================================================
nest_no = OneNestForCrossNestedLogit(
    nest_param=MU_NO,
    dict_of_alpha=alpha_no,
    name="NoTrip",
)

nest_trip = OneNestForCrossNestedLogit(
    nest_param=MU_TR,
    dict_of_alpha=alpha_tr,
    name="Trip",
)

nest_duty = OneNestForCrossNestedLogit(
    nest_param=MU_DU,
    dict_of_alpha=alpha_du,
    name="Duty",
)

nest_disc = OneNestForCrossNestedLogit(
    nest_param=MU_DI,
    dict_of_alpha=alpha_di,
    name="Discretion",
)

nests = NestsForCrossNestedLogit(
    choice_set=choice_set,
    tuple_of_nests=(nest_no, nest_trip, nest_duty, nest_disc),
)

# （任意）仕様チェック
ok, msg = nests.check_validity()
print("CNL nests valid?", ok)
if msg:
    print("message:", msg)

# ============================================================
# 尤度（lognested → logcnl）
# ============================================================
logprob = logcnl(V, av, nests, CHOICE)

# ============================================================
# 推定
# ============================================================
logger = blog.get_screen_logger(level=blog.INFO)

biogeme = BIOGEME(
    master_db,
    logprob,
    generate_html=False,
    generate_yaml=False,
    optimization_algorithm="simple_bounds_BFGS",
)
biogeme.model_name = "CNL_TripDutyDisc_alpha"

biogeme.calculate_null_loglikelihood(av)
results = biogeme.estimate()

print(results.short_summary())
pandas_results = get_pandas_estimated_parameters(estimation_results=results)
print(pandas_results.to_string())


Biogeme parameters read from biogeme.toml. 


CNL nests valid? True
message:  Alternative in exactly one nest, and parameter alpha is defined by an expression, and may not be constant: [1]


*** Initial values of the parameters are obtained from the file __CNL_TripDutyDisc_alpha.iter 
Cannot read file __CNL_TripDutyDisc_alpha.iter. Statement is ignored. 
Starting values for the algorithm: {} 
Optimization algorithm: BFGS with simple bounds [simple_bounds_BFGS]. 
Optimization algorithm: hybrid Newton/BFGS with simple bounds [simple_bounds] 
** Optimization: BFGS with trust region for simple bounds 
