In [1]:
import os
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd

from multiprocessing import Pool
from utils import (
    prepare_arrival_events_from_real_data,
    run_all_simulations_for_seed,
)
from constants import taxi_type

# ⚙️ Simulation Configuration

Define key parameters for the simulation:
- `Delta`: length of time blocks in minutes
- `max_time`: total simulation duration (hours)
- `N`: number of taxis in the fleet
- `eta`: hyperparameter for JLCR policy
- `lookahead_options`: defines which Q-matrix files (for lookahead) to use in some modes

In [3]:
Delta = 20  # in minutes
max_time = 72  # hours
N = 8000
start_time = pd.Timestamp("2025-01-02 00:00:00")
eta = 0.5
lookahead_options = [2, 4, 8]

# 📊 Load Demand and Travel Parameters

This block loads:
- `trip_counts`: counts of trips for computing λ and P
- `mu_`: average travel rates
It computes:
- `lambda_`: arrival rate by region and time
- `P`: destination distribution by region and time
Also initializes `Q_base`, which encodes no relocation (identity matrix).

In [4]:
with np.load("../nyc_trip/trip_counts.npz") as data:
    trip_counts = data["trip_counts"]
    num_dates = data["num_dates"]

with np.load("../nyc_trip/mu_cp.npz") as data:
    mu_ = data["mu"]

trip_counts[trip_counts == 0] = 1
lambda_ = trip_counts.sum(axis=2) / (Delta / 60 * num_dates)
P = trip_counts / trip_counts.sum(axis=2, keepdims=True)

T, R, _ = P.shape
Q_base = np.array([np.identity(R) for _ in range(T)])

# 🚖 Load Observed Demand Data

This block loads the real NYC trip data:
- It finds one file based on `taxi_type`
- It extracts 3 consecutive weekdays of trips using `prepare_arrival_events_from_real_data()`
These trips are later used for the real-demand simulation mode.

Change data_dir where you saved the parquet file. They should look like: `fhv_tripdata_2023-01.parquet`. If you don't have one, download one [here](https://www.nyc.gov/site/tlc/about/tlc-trip-record-data.page)

In [5]:
data_dir = "../nyc_trip/data"
filenames = os.listdir(data_dir)
filenames = [fn for fn in filenames if fn.startswith(taxi_type)]
file = filenames[0]

df = pd.read_parquet(f"../nyc_trip/data/{file}")
arrival_events = prepare_arrival_events_from_real_data(df)

# 🚀 Run Simulations in Parallel

This block launches simulations across 20 seeds using multiprocessing.
Each call to `run_all_simulations_for_seed(...)` will:
- Run 12 simulation modes (2 demand × 6 relocation)
- Save each resulting log under `sim_outputs/{seed}/...`
The total of 240 runs (20×12) are processed in parallel using a 6-core pool.


In [None]:
# Launch multiprocessing pool
num_repeats = 20
args = [
    (
        seed,
        lambda_, mu_, P, Q_base, arrival_events,
        T, R, N, max_time
    )
    for seed in range(num_repeats)
]

with Pool(processes=6) as pool:
    for _ in tqdm(pool.starmap(run_all_simulations_for_seed, args), total=len(args)):
        pass
