# Off-Peak Simulator Lite

Use the controls below to explore how shifting riders from peak to off-peak periods changes demand, load factors, and revenue.

In [None]:
%matplotlib inline
import math
from datetime import datetime, timedelta
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import widgets
from IPython.display import display, clear_output


In [None]:
plt.style.use('seaborn-v0_8') if 'seaborn-v0_8' in plt.style.available else None
DATA_PATH = Path('data')
PROCESSED_PATH = DATA_PATH / 'processed'
RIDERSHIP_CSV = DATA_PATH / 'ridership.csv'
PROCESSED_PATH.mkdir(parents=True, exist_ok=True)
BASELINE_WEEKS = 4


In [None]:
def fabricate_synthetic_data(path: Path, weeks: int = 12, freq_minutes: int = 15) -> pd.DataFrame:
    start = pd.Timestamp('2025-01-06 00:00:00')
    periods = weeks * 7 * (24 * 60 // freq_minutes)
    index = pd.date_range(start=start, periods=periods, freq=f'{freq_minutes}T')

    rng = np.random.default_rng(42)
    weekend_scale = {5: 0.7, 6: 0.6}

    def daily_pattern(ts: pd.Timestamp) -> float:
        hour_decimal = ts.hour + ts.minute / 60
        base = 80
        morning_peak = 200 * math.exp(-0.5 * ((hour_decimal - 8) / 1.5) ** 2)
        evening_peak = 180 * math.exp(-0.5 * ((hour_decimal - 18) / 1.8) ** 2)
        offpeak = 40 * math.cos((hour_decimal - 12) / 12 * math.pi) + 40
        return base + morning_peak + evening_peak + offpeak

    riders = []
    for ts in index:
        base_demand = daily_pattern(ts)
        factor = weekend_scale.get(ts.dayofweek, 1.0)
        noise = rng.normal(0, 8)
        weather = rng.normal(0, 5) if rng.random() < 0.3 else 0
        riders.append(max(20, int(base_demand * factor + noise + weather)))

    df = pd.DataFrame({'timestamp': index, 'riders': riders})
    df.to_csv(path, index=False)
    return df


def load_or_generate(path: Path) -> pd.DataFrame:
    if path.exists():
        df = pd.read_csv(path, parse_dates=['timestamp'])
    else:
        df = fabricate_synthetic_data(path)
    df = df.sort_values('timestamp').reset_index(drop=True)
    return df


def infer_frequency_minutes(df: pd.DataFrame) -> int:
    diffs = df['timestamp'].diff().dropna()
    if diffs.empty:
        return 60
    mode = diffs.mode().iloc[0]
    return int(mode.total_seconds() // 60) or 60


data = load_or_generate(RIDERSHIP_CSV)
freq_minutes = infer_frequency_minutes(data)
data['hour'] = data['timestamp'].dt.hour + data['timestamp'].dt.minute / 60
data['day_of_week'] = data['timestamp'].dt.dayofweek
data['is_weekend'] = data['day_of_week'].isin([5, 6])
data['time_of_day'] = data['timestamp'].dt.strftime('%H:%M')
print(f'Loaded {len(data):,} rows at a {freq_minutes}-minute cadence.')


In [None]:
def build_forecast(df: pd.DataFrame, freq_minutes: int, history_weeks: int = BASELINE_WEEKS):
    df = df.sort_values('timestamp').copy()
    freq = pd.Timedelta(minutes=freq_minutes)
    last_ts = df['timestamp'].iloc[-1]
    horizon_periods = int(pd.Timedelta(days=7) / freq)
    future_index = pd.date_range(start=last_ts + freq, periods=horizon_periods, freq=freq)

    cutoff = last_ts - pd.Timedelta(weeks=history_weeks) + freq
    history = df[df['timestamp'] >= cutoff]
    if history.empty:
        history = df.copy()

    grouped = history.groupby(['day_of_week', 'time_of_day'])['riders'].mean()
    default_mean = history['riders'].mean()

    records = []
    for ts in future_index:
        key = (ts.dayofweek, ts.strftime('%H:%M'))
        yhat = grouped.get(key, default_mean)
        records.append({
            'timestamp': ts,
            'yhat': yhat,
            'day_of_week': ts.dayofweek,
            'time_of_day': ts.strftime('%H:%M'),
            'is_weekend': ts.dayofweek in (5, 6),
            'hour': ts.hour + ts.minute / 60,
            'minute_of_day': ts.hour * 60 + ts.minute,
        })

    forecast_df = pd.DataFrame(records)
    forecast_df.to_csv(PROCESSED_PATH / 'forecast.csv', index=False)
    return forecast_df, grouped, default_mean


forecast_df, baseline_lookup, baseline_mean = build_forecast(data, freq_minutes)
print(f'Forecast horizon: {forecast_df["timestamp"].min()} to {forecast_df["timestamp"].max()}')


In [None]:
def time_to_minutes(value: str) -> int:
    hour, minute = map(int, value.split(':'))
    return hour * 60 + minute


def mark_offpeak(minutes: pd.Series, start_min: int, end_min: int) -> pd.Series:
    if start_min <= end_min:
        return (minutes >= start_min) & (minutes < end_min)
    return (minutes >= start_min) | (minutes < end_min)


def simulate_demand(forecast: pd.DataFrame, offpeak_start: str, offpeak_end: str, discount_pct: float, elasticity: float):
    df = forecast.copy()
    df['baseline'] = df['yhat']
    start_min = time_to_minutes(offpeak_start)
    end_min = time_to_minutes(offpeak_end)
    df['is_offpeak'] = mark_offpeak(df['minute_of_day'], start_min, end_min)
    df['is_peak'] = ~df['is_offpeak']

    df['after_incentive'] = df['baseline']
    df.loc[df['is_offpeak'], 'after_incentive'] = df.loc[df['is_offpeak'], 'baseline'] * (1 + elasticity * discount_pct)

    baseline_offpeak = df.loc[df['is_offpeak'], 'baseline'].sum()
    baseline_peak = df.loc[df['is_peak'], 'baseline'].sum()
    after_offpeak = df.loc[df['is_offpeak'], 'after_incentive'].sum()
    delta = after_offpeak - baseline_offpeak

    if abs(delta) > 1e-6 and (baseline_peak > 0 or baseline_offpeak > 0):
        target_series = df.loc[df['is_peak'], 'baseline'] if baseline_peak > 0 else df.loc[df['is_offpeak'], 'baseline']
        target_sum = target_series.sum()
        if target_sum > 0:
            adjustment = delta * (target_series / target_sum)
            adjusted_values = target_series - adjustment
            adjusted_values = adjusted_values.clip(lower=0)
            df.loc[target_series.index, 'after_incentive'] = adjusted_values

    total_baseline = df['baseline'].sum()
    total_after = df['after_incentive'].sum()
    diff = total_after - total_baseline
    if abs(diff) > 1e-6:
        peak_mask = df['is_peak'] & (df['after_incentive'] > 0)
        target_mask = peak_mask if peak_mask.any() else df['is_offpeak']
        target_sum = df.loc[target_mask, 'after_incentive'].sum()
        if target_sum > 0:
            correction = diff * (df.loc[target_mask, 'after_incentive'] / target_sum)
            df.loc[target_mask, 'after_incentive'] -= correction

    df.to_csv(PROCESSED_PATH / 'simulation.csv', index=False)

    totals_match = math.isclose(df['after_incentive'].sum(), df['baseline'].sum(), rel_tol=1e-6)
    peak_reduction_pct = 0.0
    if baseline_peak > 0:
        after_peak = df.loc[df['is_peak'], 'after_incentive'].sum()
        peak_reduction_pct = 100 * (1 - after_peak / baseline_peak)
    offpeak_increase_pct = 0.0
    if baseline_offpeak > 0:
        offpeak_increase_pct = 100 * (after_offpeak / baseline_offpeak - 1)

    baseline_revenue = df['baseline'].sum()
    after_revenue = df.loc[df['is_peak'], 'after_incentive'].sum()
    after_revenue += df.loc[df['is_offpeak'], 'after_incentive'].sum() * (1 - discount_pct)
    revenue_change_pct = 100 * (after_revenue / baseline_revenue - 1) if baseline_revenue else 0.0

    summary = {
        'totals_match': totals_match,
        'peak_reduction_pct': peak_reduction_pct,
        'offpeak_increase_pct': offpeak_increase_pct,
        'revenue_change_pct': revenue_change_pct,
    }
    return df, summary


def render_simulation(offpeak_start: str, offpeak_end: str, discount_pct: float, elasticity: float):
    sim_df, summary = simulate_demand(forecast_df, offpeak_start, offpeak_end, discount_pct, elasticity)

    print('Summary:')
    print(f" - Total riders unchanged: {'Yes' if summary['totals_match'] else 'No'}")
    print(f" - Peak hour reduction: {summary['peak_reduction_pct']:.2f}%")
    print(f" - Off-peak increase: {summary['offpeak_increase_pct']:.2f}%")
    print(f" - Revenue change: {summary['revenue_change_pct']:.2f}%")

    week_start = sim_df['timestamp'].min()
    week_end = week_start + pd.Timedelta(days=7)
    week_mask = (sim_df['timestamp'] >= week_start) & (sim_df['timestamp'] < week_end)
    week_df = sim_df.loc[week_mask]

    fig, axes = plt.subplots(2, 1, figsize=(12, 10))
    axes[0].plot(week_df['timestamp'], week_df['baseline'], label='Baseline')
    axes[0].plot(week_df['timestamp'], week_df['after_incentive'], label='After incentive')
    axes[0].set_title('Baseline vs. After incentive (first week of forecast)')
    axes[0].set_ylabel('Riders')
    axes[0].legend()

    hourly = sim_df.copy()
    hourly['hour_of_day'] = hourly['timestamp'].dt.strftime('%H:%M')
    grouped = hourly.groupby('hour_of_day')[['baseline', 'after_incentive']].mean()
    grouped[['baseline', 'after_incentive']].plot(kind='bar', ax=axes[1])
    axes[1].set_title('Average riders by time of day')
    axes[1].set_ylabel('Riders')
    plt.xticks(rotation=90)
    plt.tight_layout()
    plt.show()

    return sim_df


In [None]:
time_options = [f"{hour:02d}:{minute:02d}" for hour in range(24) for minute in (0, 15, 30, 45)]
offpeak_start_widget = widgets.Dropdown(options=time_options, value='10:00', description='Off-peak start')
offpeak_end_widget = widgets.Dropdown(options=time_options, value='16:00', description='Off-peak end')
discount_widget = widgets.FloatSlider(min=0.0, max=0.5, step=0.01, value=0.10, description='Discount', readout_format='.0%')
elasticity_widget = widgets.FloatSlider(min=-1.0, max=0.0, step=0.01, value=-0.30, description='Elasticity')
run_button = widgets.Button(description='Run', button_style='primary')
output = widgets.Output()


def on_run(_):
    with output:
        clear_output(wait=True)
        render_simulation(
            offpeak_start_widget.value,
            offpeak_end_widget.value,
            discount_widget.value,
            elasticity_widget.value,
        )


run_button.on_click(on_run)
controls = widgets.VBox([
    widgets.HBox([offpeak_start_widget, offpeak_end_widget]),
    discount_widget,
    elasticity_widget,
    run_button,
])
display(controls, output)
on_run(None)
