## Calculating 4 corridors headway
Uses the Corrider concept to check trains passing from Oslo S to Asker through National
To run the code you will need to install and extract the txt files from https://storage.googleapis.com/marduk-production/outbound/gtfs/rb_norway-aggregated-gtfs.zip and place them in Modeling\Vera\src\

In [None]:
# Installing packages for jupyter lab
%pip install -r requirements.txt

In [8]:
CORRIDOR = [
    "Oslo S",
    "Nationaltheatret stasjon",
    "Skøyen stasjon",
    "Lysaker stasjon",
    "Sandvika stasjon",
    "Asker stasjon"
]


In [9]:
import pandas as pd
import numpy as np

stops = pd.read_csv("src/stops.txt")
stop_times = pd.read_csv("src/stop_times.txt")
trips = pd.read_csv("src/trips.txt")
calendar_dates = pd.read_csv("src/calendar_dates.txt")

# pick a Monday
service_ids = calendar_dates.loc[
    calendar_dates["date"] == 20251201, "service_id"
].unique()

monday_trips = trips[trips["service_id"].isin(service_ids)]["trip_id"].unique()

stop_times = stop_times[stop_times["trip_id"].isin(monday_trips)]


  stop_times = pd.read_csv("src/stop_times.txt")


In [10]:
corridor_ids = {}

for name in CORRIDOR:
    ids = stops.loc[stops["stop_name"].str.contains(name, case=False), "stop_id"].unique()
    corridor_ids[name] = list(ids)


In [11]:
stop_times["dep_td"] = pd.to_timedelta(stop_times["departure_time"])

valid_trips = []

for trip_id, group in stop_times.groupby("trip_id"):
    seq = group.sort_values("stop_sequence")
    ids = seq["stop_id"].tolist()

    def contains_in_order(stages):
        idx = 0
        for sid in ids:
            if sid in corridor_ids[stages[idx]]:
                idx += 1
                if idx == len(stages):
                    return True
        return False

    if contains_in_order(CORRIDOR):
        valid_trips.append(trip_id)

corridor_stop_times = stop_times[stop_times["trip_id"].isin(valid_trips)]


In [12]:
segment_headways = {}

for i in range(len(CORRIDOR)-1):
    A_name = CORRIDOR[i]
    B_name = CORRIDOR[i+1]

    A_ids = corridor_ids[A_name]
    B_ids = corridor_ids[B_name]

    seg = corridor_stop_times[corridor_stop_times["stop_id"].isin(A_ids)]
    seg = seg.sort_values("dep_td")

    diffs = seg["dep_td"].diff().dropna()

    if not diffs.empty:
        min_hw = diffs.min()
        avg_hw = diffs.mean()
    else:
        min_hw, avg_hw = pd.Timedelta(0), pd.Timedelta(0)

    segment_headways[f"{A_name} → {B_name}"] = {
        "min_headway": min_hw,
        "avg_headway": avg_hw,
        "train_count": len(seg)
    }

segment_headways


{'Oslo S → Nationaltheatret stasjon': {'min_headway': Timedelta('0 days 00:01:00'),
  'avg_headway': Timedelta('0 days 00:05:57.822580645'),
  'train_count': 249},
 'Nationaltheatret stasjon → Skøyen stasjon': {'min_headway': Timedelta('0 days 00:02:00'),
  'avg_headway': Timedelta('0 days 00:05:57.338709677'),
  'train_count': 249},
 'Skøyen stasjon → Lysaker stasjon': {'min_headway': Timedelta('0 days 00:02:00'),
  'avg_headway': Timedelta('0 days 00:05:57.338709677'),
  'train_count': 249},
 'Lysaker stasjon → Sandvika stasjon': {'min_headway': Timedelta('0 days 00:02:00'),
  'avg_headway': Timedelta('0 days 00:05:57.338709677'),
  'train_count': 249},
 'Sandvika stasjon → Asker stasjon': {'min_headway': Timedelta('0 days 00:02:00'),
  'avg_headway': Timedelta('0 days 00:05:58.548387096'),
  'train_count': 249}}