In [None]:
import os
import re
import pandas as pd
import matplotlib.pyplot as plt
from datetime import timedelta

In [None]:
scenario = "2025-07-08_4"
# TODO Where is data from rSIPP and @MAEDeR?
filename = os.path.join(os.path.dirname(os.path.abspath("__file__")), "results", "case_study_eurostar", scenario, "eurostar.csv")
df = pd.read_csv(filename)
print(df.columns)
trains = df['trainNumber'].unique()
id_to_train_num = {}
for id in df['id'].unique():
    id_to_train_num[int(id)] = int(df[df['id'] == id]['trainNumber'].unique()[0])
for t in trains:
    df_train = df[df['trainNumber'] == t]
    for i, ut in df_train.iterrows():
        # TODO TrainNumber should wait for Eurostar at this point correct?
        print(f"Train {t} from {df_train['origin'].unique()[0]} at {df_train['start_time'].unique()[0]} to {df_train['destination'].unique()[0]} at {df_train['endTime'].unique()[0]} has {len(df_train)} entries, {i}th entry: delay at {ut['delay_location']} for {ut['delay_amount']} with ATF{ut['zeta'], ut['alpha'], ut['beta'], ut['delta']}")
        print("Path of Eurostar", ut['path'])
df

In [None]:
allowed_delays = [180, 300, 720]

def td_str(td):
    return ':'.join(re.split(r'[:.]+', str(td)) [1:3])

def extract_tipping_point(df):
    def apply_func(df):

        result=df.groupby("Delay Location").agg({
            "beta": "max",
            "Delay Amount": "max",
        })
        # result = result.loc[result['beta'] < 900]
        result["Tipping Point (sec)"] = result["beta"]
        result["Tipping Point"] = result["beta"].apply(lambda x: td_str(timedelta(seconds=x)))

        def tp_finder(x):
            new_tp = x['beta'] - max(0, x['Delay Amount'] - allowed_delay)
            if new_tp > 0:
                return td_str(timedelta(seconds=new_tp))
            return "-"

        for allowed_delay in allowed_delays:
            result[f"Tipping Point ({allowed_delay}s)"] = result.apply(tp_finder, axis=1)
        result["Delay Amount (sec)"] = result["Delay Amount"]
        result["Delay Amount"] = result["Delay Amount"].apply(lambda x: td_str(timedelta(seconds=x)))
        return result.sort_values("Tipping Point", ascending=True).drop(columns=["beta"])

    df["Delay Location"] = df["Delay Location"].str.split("|").apply(lambda x: x[0])
    df = df.groupby(by='Train').apply(apply_func, include_groups=False)
    return df

tp_df = df.rename(columns={
    "delay_amount": "Delay Amount",
    "delay_location": "Delay Location",
    "trainNumber": "Train",
    "scenario": "Scenario",
    "label": "Label",
})
tp_df["Scenario"] = tp_df["Scenario"].apply(lambda x: x.split(".")[0])
tp_df["Delay Location"] = tp_df["Delay Location"].apply(lambda x: x.split("-")[1])
tp_df = tp_df.groupby(["Scenario"]).apply(extract_tipping_point, include_groups=False)
tp_df = tp_df.rename(columns={"Tipping Point": "Tipping Point (safe=beta)"})
tp_df

In [None]:
print("Number of tipping points:", len(tp_df))
scenarios = {}
for i, row in tp_df.iterrows():
    if row.name[0] not in scenarios:
        scenarios[row.name[0]] = set([row.name[1]])
    else:
        scenarios[row.name[0]].add(row.name[1])
for s in scenarios:
    print(f"{s.split('_')[1]} & {len(scenarios[s])} & {len(tp_df)} & {tp_df['Delay Amount (sec)'].mean()} & {tp_df['Tipping Point (sec)'].mean()} | {tp_df['Delay Amount (sec)'].median()}")

