In [None]:
FEATURES_PATH   = 'features_full.parquet'
CHOICE_SET_PATH = r'C:\Users\Chris\Desktop\DTU\8. Semester\Bachelorprojekt-1\Data\Final\choice_set_Final_1_15_Sep23.csv'
MODEL_PATH      = 'tabnet_full_model.zip'
IMPUTER_PATH    = 'imputer.pkl'

# The 12 pilot OD pairs
PILOT_ODS = [
    "8603305-8603331","8603313-8603308","8600741-8600642",
    "8600636-8603308","8600681-8603308","8600677-8603308",
    "8600657-8603308","8600695-8603308","8600678-8603308",
    "8603317-8603339","8603334-8603331","8603336-8603331"
]

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
from pytorch_tabnet.tab_model import TabNetClassifier
from sklearn.impute import SimpleImputer

np.random.seed(42)

df_choice = pd.read_csv(CHOICE_SET_PATH)
df_feat   = pd.read_parquet(FEATURES_PATH)

clf = TabNetClassifier()
clf.load_model(MODEL_PATH)
with open(IMPUTER_PATH, 'rb') as f:
    imputer = pickle.load(f)

feature_cols = [c for c in df_feat.columns if c not in ['choice','OD','Obs_ID']]

X_all = imputer.transform(df_feat[feature_cols].values)
df_choice['proba_orig'] = clf.predict_proba(X_all)[:, 1]

df_pilot = df_choice[df_choice['OD'].isin(PILOT_ODS)].copy()
orig_idx   = df_pilot.groupby('Obs_ID')['proba_orig'].idxmax()
orig_preds = (
    df_pilot.loc[orig_idx, ['Obs_ID','RuteIdSeq','ModalKomb','choice','proba_orig']]
    .rename(columns={
        'ModalKomb':'OriginalRoute',
        'choice':'ActualChosen',
        'proba_orig':'OrigProb'
    })
)

orig_share = orig_preds['OriginalRoute'].value_counts(normalize=True).rename('OriginalShare')
orig_acc   = (orig_preds['ActualChosen']==1).mean()


# drop m3 m4
mask_noM34 = ~df_pilot['RuteIdSeq'].str.contains(r'\bM3\b|\bM4\b')
df_noM34   = df_pilot[mask_noM34].copy()

df_noM34['proba_noM34_raw'] = df_noM34['proba_orig']
df_noM34['proba_noM34'] = (
    df_noM34.groupby('Obs_ID')['proba_noM34_raw']
           .transform(lambda x: x / x.sum())
)

new_idx   = df_noM34.groupby('Obs_ID')['proba_noM34'].idxmax()
new_preds = (
    df_noM34.loc[new_idx, ['Obs_ID','RuteIdSeq','ModalKomb','choice','proba_noM34']]
    .rename(columns={'ModalKomb':'NewRoute'})
)
new_share = new_preds['NewRoute'].value_counts(normalize=True).rename('NoM34Share')
new_acc   = (new_preds['choice']==1).mean()

# tables
summary = pd.concat([orig_share, new_share], axis=1).fillna(0).reset_index()
summary.columns = ['Route','OriginalShare','NoM34Share']

accuracy_df = pd.DataFrame({
    'Scenario': ['Original','No M3 & M4'],
    'Group_Accuracy': [orig_acc,new_acc]
})

print("=== Modal Share Comparison ===")
print(summary.to_string(index=False))
print("\n=== Choice-set Accuracy ===")
print(accuracy_df.to_string(index=False))

# plots
# Modal share
x = np.arange(len(summary))
plt.figure(figsize=(6,4))
plt.bar(x-0.2, summary['OriginalShare'], width=0.4, label='Original')
plt.bar(x+0.2, summary['NoM34Share'],  width=0.4, label='No M3 & M4')
plt.xticks(x, summary['Route'], rotation=30, ha='right')
plt.ylabel('Predicted Share')
plt.title('Original vs. No M3 & M4')
plt.legend()
plt.tight_layout()
plt.show()

# Accuracy
plt.figure(figsize=(4,4))
plt.bar(accuracy_df['Scenario'], accuracy_df['Group_Accuracy'], color=['#1f77b4','#ff7f0e'])
plt.ylim(0,1)
plt.ylabel('Accuracy')
plt.title('Choice-set Accuracy')
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Extract original and new chosen trip features
orig_tt        = df_feat.loc[orig_idx, 'TT_total'].reset_index(drop=True)
new_tt         = df_feat.loc[new_idx,  'TT_total'].reset_index(drop=True)
orig_transfers = df_feat.loc[orig_idx, 'transfers'].reset_index(drop=True)
new_transfers  = df_feat.loc[new_idx,  'transfers'].reset_index(drop=True)

# Compute deltas
delta_tt  = new_tt  - orig_tt
delta_trf = new_transfers - orig_transfers

print(f"Average ΔTT (min): {delta_tt.mean():.2f}, Median ΔTT: {delta_tt.median():.2f}")
print(f"Average Δtransfers: {delta_trf.mean():.2f}, Median Δtransfers: {delta_trf.median():.2f}")

# Merge back to per-trip DataFrame for per-OD analysis
trip_ids = df_pilot.loc[orig_idx, 'Obs_ID'].reset_index(drop=True)
od_series = df_pilot.loc[orig_idx, 'OD'].reset_index(drop=True)
df_impacts = pd.DataFrame({
    'Obs_ID':         trip_ids,
    'OD':             od_series,
    'ΔTT':            delta_tt,
    'ΔTransfers':     delta_trf
})

# top 5 ODs by average ΔTT
od_impacts = (
    df_impacts
      .groupby('OD')
      .agg({'ΔTT':'mean','ΔTransfers':'mean'})
      .sort_values('ΔTT', ascending=False)
)
print("\nTop 5 ODs by Average ΔTT:")
print(od_impacts.head(5).to_string())

# Plot average ΔTT by OD (top 5)
top5 = od_impacts.head(5)
plt.figure(figsize=(6,4))
plt.bar(top5.index, top5['ΔTT'])
plt.ylabel('Avg ΔTravel Time (min)')
plt.title('Top 5 ODs by Increase in Travel Time')
plt.xticks(rotation=30, ha='right')
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd

# Build compare‐table: use 'RuteIdSeq' directly from orig_preds
df_compare = pd.DataFrame({
    'Obs_ID':        orig_preds['Obs_ID'].values,
    'OD':            df_pilot.loc[orig_idx, 'OD'].values,
    'OriginalRoute': orig_preds['RuteIdSeq'].values,
    'NewRoute':      new_preds['RuteIdSeq'].values,
    'ΔTT':           delta_tt.values,
    'ΔTransfers':    delta_trf.values
})

# Preview a few rows
print("Sample of predicted routes & impacts:")
print(df_compare.head(10).to_string(index=False))

# For the top 5 most‐impacted ODs (by avg ΔTT) show all their trips
top5_ods = od_impacts.head(5).index.tolist()
print("\nDetails for Top 5 ODs by avg ΔTT:\n")
for od in top5_ods:
    sub = df_compare[df_compare['OD'] == od]
    print(f"OD = {od}")
    print(sub[['Obs_ID','OriginalRoute','NewRoute','ΔTT','ΔTransfers']].to_string(index=False))
    print()
