In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import timedelta

from simulate import TaxiSimulator
from constants import (
    TIME_BLOCK_BOUNDARY,
    TAXI_INIT, 
    RIDER_ARRIVAL,
    RIDE_START, 
    RIDER_LOST,
    RIDE_COMPLETION, 
    RELOCATION_START, 
    RELOCATION_COMPLETION
)
from relocation_policies import (
    relocation_policy_blind_sampling,
    relocation_policy_jlcr_eta,
    relocation_policy_shortest_wait,
)

np.set_printoptions(suppress=True)

In [None]:
# Simulation parameters
Delta = 20 # in minutes
max_time = 72  # Simulate 72 hours
N = 8000
start_time = pd.Timestamp('2025-01-02 00:00:00')

In [None]:
def get_rider_arrival_timeseries(df_log, region_id, bin_minutes=20):
    arrivals = df_log[df_log['event_type'] == RIDER_ARRIVAL].copy()
    arrivals['region'] = arrivals['data'].apply(lambda x: x['region'])
    arrivals = arrivals[arrivals['region'] == region_id]

    arrivals['time_bin'] = arrivals['datetime'].dt.floor(f'{bin_minutes}min')
    arrival_counts = arrivals.groupby('time_bin').size().reset_index(name='num_arrivals')

    return arrival_counts

def get_ridestarts_timeseries(df_log, region_id, bin_minutes=20):
    ride_starts = df_log[df_log['event_type'] == RIDE_START].copy()
    ride_starts['region'] = ride_starts['data'].apply(lambda x: x['origin'])
    ride_starts = ride_starts[ride_starts['region'] == region_id]

    ride_starts['time_bin'] = ride_starts['datetime'].dt.floor(f'{bin_minutes}min')
    ride_starts = ride_starts.groupby('time_bin').size().reset_index(name='num_ridestarts')

    return ride_starts

def get_rider_lost_timeseries(df_log, region_id, bin_minutes=20):
    lost_rides = df_log[df_log['event_type'] == RIDER_LOST].copy()
    lost_rides['region'] = lost_rides['data'].apply(lambda x: x['region'])
    lost_rides = lost_rides[lost_rides['region'] == region_id]

    lost_rides['time_bin'] = lost_rides['datetime'].dt.floor(f'{bin_minutes}min')
    lost_rides = lost_rides.groupby('time_bin').size().reset_index(name='num_lost_rides')

    return lost_rides

def fill_missing_time_bins(df_timeseries, start_time, end_time, bin_minutes=20, count_col='num_lost_rides'):
    full_time_index = pd.date_range(start=start_time, end=end_time, freq=f'{bin_minutes}min')
    full_df = pd.DataFrame({'time_bin': full_time_index})
    merged = full_df.merge(df_timeseries, on='time_bin', how='left')
    merged[count_col] = merged[count_col].fillna(0).astype(int)
    return merged

def plot_arrival_versus_ridestarts(df_log, region_id, bin_minutes):
    arrival_ts = get_rider_arrival_timeseries(df_log, region_id=region_id, bin_minutes=bin_minutes)
    ridestarts_ts = get_ridestarts_timeseries(df_log, region_id=region_id, bin_minutes=bin_minutes)
    lostrides_ts = get_rider_lost_timeseries(df_log, region_id=region_id, bin_minutes=bin_minutes)


    start_time = arrival_ts['time_bin'].min() + timedelta(days=2) # ignore first two days for system to reach stationary distribution
    end_time = arrival_ts['time_bin'].max()

    arrival_ts = fill_missing_time_bins(
        arrival_ts,
        start_time=start_time,
        end_time=end_time,
        bin_minutes=bin_minutes,
        count_col='num_arrivals'
    )

    ridestarts_ts = fill_missing_time_bins(
        ridestarts_ts,
        start_time=start_time,
        end_time=end_time,
        bin_minutes=bin_minutes,
        count_col='num_ridestarts'
    )
    
    lostrides_ts = fill_missing_time_bins(
        lostrides_ts,
        start_time=start_time,
        end_time=end_time,
        bin_minutes=bin_minutes,
        count_col='num_lost_rides'
    )
    

    sns.set(style="whitegrid")
    plt.figure(figsize=(12, 6))
    plt.plot(arrival_ts['time_bin'], arrival_ts['num_arrivals'], label='Rider Arrivals', color='blue')
    plt.plot(ridestarts_ts['time_bin'], ridestarts_ts['num_ridestarts'], label='Ride Starts', color='orange')
    plt.plot(lostrides_ts['time_bin'], lostrides_ts['num_lost_rides'], label='Riders Lost', color='purple')
    plt.title('Rider Arrivals/Starts/Lost Rides Over Time')
    plt.xlabel('Time')
    plt.ylabel('Count')
    plt.xticks(rotation=45)
    plt.legend()
    plt.tight_layout()
    plt.show()

In [None]:
print(f"Lookahead T=2 relocates {df_2_log[(df_2_log.event_type == RELOCATION_COMPLETION) & (df_2_log.datetime > '2025-01-04 00:00:00')].shape[0]} times on the third day")
print(f"Lookahead T=4 relocates {df_4_log[(df_4_log.event_type == RELOCATION_COMPLETION) & (df_4_log.datetime > '2025-01-04 00:00:00')].shape[0]} times on the third day")
print(f"Lookahead T=8 relocates {df_8_log[(df_8_log.event_type == RELOCATION_COMPLETION) & (df_8_log.datetime > '2025-01-04 00:00:00')].shape[0]} times on the third day")
print(f"Base policy (no relocation) relocates {df_base_log[(df_base_log.event_type == RELOCATION_COMPLETION) & (df_base_log.datetime > '2025-01-04 00:00:00')].shape[0]} times on the third day")

print(f"Lookahead T=2 loses {df_2_log[(df_2_log.event_type == RIDER_LOST) & (df_2_log.datetime > '2025-01-04 00:00:00')].shape[0]} times on the third day")
print(f"Lookahead T=4 loses {df_4_log[(df_4_log.event_type == RIDER_LOST) & (df_4_log.datetime > '2025-01-04 00:00:00')].shape[0]} times on the third day")
print(f"Lookahead T=8 loses {df_8_log[(df_8_log.event_type == RIDER_LOST) & (df_8_log.datetime > '2025-01-04 00:00:00')].shape[0]} times on the third day")
print(f"Base policy (no relocation) loses {df_base_log[(df_base_log.event_type == RIDER_LOST) & (df_base_log.datetime > '2025-01-04 00:00:00')].shape[0]} times on the third day")