In [23]:
import pandas as pd
import json
import random

# === Load the zone_neighbors.json file ===
with open("zone_neighbors.json", "r") as f:
    ZONE_NEIGHBORS = json.load(f)

# === Load a sample of real dropoff zones from the Parquet file ===
df = pd.read_parquet('./data_test/yellow_tripdata_2024-01.parquet', columns=["DOLocationID"])
df = df.dropna(subset=["DOLocationID"])
df["DOLocationID"] = df["DOLocationID"].astype(str)

# Sample 10 dropoff zones to test on
sample_dropoffs = df["DOLocationID"].sample(10, random_state=42).tolist()


# === Define the Stay Strategy ===
def stay_strategy(state):
    """
    Stay in the same zone (i.e., don't move).
    """
    return str(state)


# === Define the Random Strategy ===
def random_strategy(state):
    """
    Move to a random neighboring zone.
    If there are no neighbors, stay in place.
    """
    neighbors = ZONE_NEIGHBORS.get(str(state), [])
    if not neighbors:
        return str(state)
    return random.choice(neighbors)


# === Apply strategies to the sample data ===
stay_results = [stay_strategy(zone) for zone in sample_dropoffs]
random_results = [random_strategy(zone) for zone in sample_dropoffs]

# === Print out what happened ===
print("Original dropoff zones:", sample_dropoffs)
print("Stay strategy decisions: ", stay_results)
print("Random strategy decisions:", random_results)

# === Rollout Function ===
def rollout(strategy_fn, trip_df, start_time, start_zone, max_steps=10, wait_window='15min'):
    """
    Simulates a driver using a strategy to move between zones and find new passengers.
    """
    current_time = pd.to_datetime(start_time)
    current_zone = str(start_zone)
    pickups = 0
    visited_zones = [current_zone]

    for step in range(max_steps):
        # Choose next zone using strategy
        next_zone = strategy_fn(current_zone)

        # Simulate arriving at the zone and waiting
        earliest_time = current_time
        latest_time = current_time + pd.Timedelta(wait_window)

        # Find next pickup from that zone within the wait window
        possible_pickups = trip_df[
            (trip_df["PULocationID"] == int(next_zone)) &
            (trip_df["tpep_pickup_datetime"] >= earliest_time) &
            (trip_df["tpep_pickup_datetime"] <= latest_time)
        ]

        if not possible_pickups.empty:
            # Assume driver gets first available ride
            first_pickup = possible_pickups.sort_values("tpep_pickup_datetime").iloc[0]
            current_zone = str(first_pickup["DOLocationID"])
            current_time = first_pickup["tpep_dropoff_datetime"]
            pickups += 1
        else:
            # No pickup found â€” assume driver stays idle
            current_time += pd.Timedelta(wait_window)
            # stay in same zone
            current_zone = next_zone

        visited_zones.append(current_zone)

    return pickups, visited_zones


Original dropoff zones: ['233', '75', '20', '263', '238', '163', '141', '239', '141', '61']
Stay strategy decisions:  ['233', '75', '20', '263', '238', '163', '141', '239', '141', '61']
Random strategy decisions: ['161', '263', '60', '262', '24', '230', '236', '143', '140', '225']
