In [1]:
import numpy as np
import pandas as pd
from simulate import TaxiSimulator

np.set_printoptions(suppress=True)

In [2]:
# Simulation parameters
Delta = 20 # in minutes
T = int(24 * (60 // Delta))

Use synthetic data

In [3]:
# np.random.seed(42)
# R = 3     # 3 regions
# N = 20    # 20 vehicles

# # Rider arrival rates: 10 to 30 riders per hour during busy times
# lambda_ = np.zeros((T, R))
# for t in range(T):
#     if 16 <= t % T <= 36:  # "Daytime"
#         lambda_[t] = np.random.uniform(15, 30, R)   # Busy hours
#     else:
#         lambda_[t] = np.random.uniform(2, 8, R)     # Nighttime, low demand

# # Travel time rates: Expectation between 5 to 15 minutes (1/12 to 1/4 hours)
# mu_ = np.random.uniform(4, 12, size=(T, R, R))  # 1/μ gives ~5-15 mins expected travel

# # Random valid P and Q matrices
# P = np.array([[np.random.dirichlet(np.ones(R)) for _ in range(R)] for _ in range(T)])
# Q = np.array([[np.random.dirichlet(np.ones(R)) for _ in range(R)] for _ in range(T)])

Use actual parameters

In [4]:
with np.load('../nyc_trip/trip_counts.npz') as data:
    trip_counts = data['trip_counts']
    num_dates = data['num_dates']

with np.load('../nyc_trip/mu_cp.npz') as data:
    mu_ = data['mu']
    
# mask trip_counts by 1 where 0
trip_counts[trip_counts == 0] = 1

# compute arrival rate
lambda_ = trip_counts.sum(axis=2) / (Delta / 60 * num_dates)

# normalize trip_counts
P = trip_counts / trip_counts.sum(axis=2, keepdims=True)

# load time-dependent relocation matrix
with np.load('../nyc_trip/Qs.npz') as data:
    Q = data['Qs']
    
T, R, _ = P.shape

In [7]:
max_time = 72  # Simulate 72 hours
N = 5000

# Initialize and run the simulator
sim = TaxiSimulator(T=T, R=R, N=N, lambda_=lambda_, mu_=mu_, P=P, Q=Q)
sim.run(max_time=max_time)

# Convert log to DataFrame
df_log = pd.DataFrame(sim.logger)

# Basic validation outputs
event_counts = df_log['event_type'].value_counts()

# Vehicle distribution at the end
vehicle_locations = [v.location for v in sim.vehicles]
vehicle_distribution = pd.Series(vehicle_locations).value_counts().sort_index()

In [8]:
df_log

Unnamed: 0,time,event_type,data
0,0.000075,rider_arrival,"{'region': 141, 'origin_time_block': 0}"
1,0.000075,ride_start,"{'vehicle_id': 52, 'origin': 141, 'destination..."
2,0.000085,rider_arrival,"{'region': 166, 'origin_time_block': 0}"
3,0.000085,ride_start,"{'vehicle_id': 360, 'origin': 166, 'destinatio..."
4,0.000089,rider_arrival,"{'region': 231, 'origin_time_block': 0}"
...,...,...,...
3448234,71.999874,rider_arrival,"{'region': 226, 'origin_time_block': 71}"
3448235,71.999901,rider_lost,{'region': 61}
3448236,71.999901,rider_arrival,"{'region': 61, 'origin_time_block': 71}"
3448237,72.000050,rider_lost,{'region': 189}
