In [16]:
import pandas as pd
import numpy as np
import math
from datetime import datetime
import os

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    classification_report,
    roc_auc_score,
    confusion_matrix,
    precision_score,
    recall_score,
    f1_score,
)

from imblearn.ensemble import BalancedRandomForestClassifier
import joblib

from geopy.distance import geodesic

---

# Load the Data

In [17]:
schedule_data = pd.read_csv("./data/driver_schedule.csv", engine="pyarrow")
driver_data = pd.read_csv("./data/driver_update2.csv", engine="pyarrow")
order_data = pd.read_csv("./data/order.csv", engine="pyarrow")
order_driver_data = pd.read_csv("./data/order_driver.csv", engine="pyarrow")

In [18]:
schedule_data

Unnamed: 0,driver_id,date,hour
0,14939,2025-04-07,0
1,10617,2025-04-07,0
2,5547,2025-04-07,0
3,19631,2025-04-07,0
4,13352,2025-04-07,0
...,...,...,...
115225,17442,2025-04-21,22
115226,20241,2025-04-21,23
115227,13710,2025-04-21,22
115228,10810,2025-04-21,22


---

## Train Driver-Order Accept Model

In [19]:
model_output_folder = "./out"
os.makedirs(model_output_folder, exist_ok=True)
model_path = "./Out/acceptance_model.pkl"

# if the model exists, decide if re-train the model is needed
retrain_model: bool = False

In [20]:
if os.path.exists(model_path) and not retrain_model:
    print(f"Model found at {model_path}. Loading model...")
    # with open(model_path, "rb") as f:
    #     model = pickle.load(f)
    model = joblib.load(model_path)
    print("Model loaded successfully.")
else:
    order_driver_data = pd.read_csv("./data/order_driver.csv")

    order_driver_data = order_driver_data.loc[
        (order_driver_data["status"] == 5) & (order_driver_data["outside"] == 0)
    ]
    print(order_driver_data.shape)
    print(order_driver_data["accept"].describe())

    # Define features & target variable
    order_features = order_driver_data[
        ["commission", "driver_distance", "hour", "weather_code", "work_time_minutes"]
    ]
    # order_features = order_driver_data[
    #     ["commission", "distance", "hour", "weather_code", "work_time_minutes"]
    # ]
    print(order_features.head())
    acceptance_status = order_driver_data["accept"]
    # Define features & target variable
    # Train-test split
    features_train, features_test, target_train, target_test = train_test_split(
        order_features, acceptance_status, test_size=0.2, random_state=42
    )

    # Train model (BalancedRandomForest handles imbalance natively)
    model = BalancedRandomForestClassifier(random_state=42)
    model.fit(features_train, target_train)

    # Make predictions
    y_pred = model.predict(features_test)
    y_probs = model.predict_proba(features_test)[:, 1]

    # Evaluate model performance
    print("Precision:", precision_score(target_test, y_pred))
    print("Recall:", recall_score(target_test, y_pred))
    print("F1 Score:", f1_score(target_test, y_pred))
    print("Classification Report:\n", classification_report(target_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(target_test, y_pred))
    print("ROC AUC Score:", roc_auc_score(target_test, y_probs))
    # Save trained model (only the classifier, without SMOTE)
    joblib.dump(model, "./Out/acceptance_model.pkl")

Model found at ./Out/acceptance_model.pkl. Loading model...
Model loaded successfully.


---

## Get Weather Code

In [21]:
class WeatherService:
    def __init__(self, weather_csv_path: str):
        df = pd.read_csv(weather_csv_path)
        # Convert 'datetime' column to datetime objects and normalize to the hour start
        df["datetime"] = pd.to_datetime(df["datetime"]).dt.floor("H")
        # Set 'datetime' as index and convert 'weather_code' to a dictionary
        self.weather_data = df.set_index("datetime")["weather_code"].to_dict()

    def get_weather_code(self, dt) -> int:
        """Get weather code for the hour containing datetime dt"""
        hour_key = dt.replace(minute=0, second=0)
        # Default: 1 (sunny)
        return self.weather_data.get(hour_key, 1)
    
weather_service = WeatherService(weather_csv_path="./data/weather.csv")

  df["datetime"] = pd.to_datetime(df["datetime"]).dt.floor("H")


---

## Define the Order

In [None]:
class Order:
    """
    Represents a single customer order with details about pickup, dropoff, pricing,
    and calculated commission/revenue.
    """

    def __init__(
        self,
        order_id: int,
        datetime_str: str,
        pickup_area: int,
        dropoff_area: int,
        pickup_lat: float,
        pickup_lon: float,
        dropoff_lat: float,
        dropoff_lon: float,
        customer_price: float,
        commissionPercent: float,
    ):
        """
        Initializes an Order object.

        Args:
            order_id (int): Unique identifier for the order.
            datetime_str (str): Date and time of the order creation in '%Y-%m-%d %H:%M:%S.%f' format.
            pickup_area (int): Identifier for the pickup geographical area.
            dropoff_area (int): Identifier for the dropoff geographical area.
            pickup_lat (float): Latitude coordinate of the pickup location.
            pickup_lon (float): Longitude coordinate of the pickup location.
            dropoff_lat (float): Latitude coordinate of the dropoff location.
            dropoff_lon (float): Longitude coordinate of the dropoff location.
            customer_price (float): The total price paid by the customer for the order.
            commissionPercent (float): The percentage of the customer price taken as platform commission (e.g., 0.20 for 20%).
        """
        self.order_id = order_id
        # Convert datetime string to a datetime object for easier manipulation
        # self.datetime = datetime.strptime(datetime_str, "%Y-%m-%d %H:%M:%S.%f")
        # self.datetime = datetime.strptime(datetime_str, "%Y-%m-%d %H:%M:%S")
        self.datetime: datetime = (
            datetime_str.to_pydatetime()
        )  # Convert Timestamp to datetime.datetime object

        self.pickup_area = pickup_area
        self.dropoff_area = dropoff_area
        self.pickup_lat = pickup_lat
        self.pickup_lon = pickup_lon
        self.dropoff_lat = dropoff_lat
        self.dropoff_lon = dropoff_lon
        self.customer_price = customer_price
        self.commissionPercent = commissionPercent

        # These calculations were previously in __post_init__
        # Calculate the driver's earnings from the order
        # self.driver_commission = self.customer_price * (1 - self.commissionPercent)
        self.driver_commission = self.customer_price * self.commissionPercent
        # Calculate the platform's revenue from the order
        # self.platform_revenue = self.customer_price * self.commissionPercent
        self.platform_revenue = self.customer_price * (1 - self.commissionPercent)
        # Extract the hour of the day when the order was placed (0-23)
        self.hour_of_day = self.datetime.hour
        self.weather_code = weather_service.get_weather_code(self.datetime)

    def __repr__(self):
        """
        Returns a string representation of the Order object for easy debugging and display.
        """
        return (
            f"Order(\n"
            f"    order_id={self.order_id},\n"
            f"    datetime={self.datetime},\n"
            f"    pickup_area={self.pickup_area},\n"
            f"    dropoff_area={self.dropoff_area},\n"
            f"    pickup_lat={self.pickup_lat},\n"
            f"    pickup_lon={self.pickup_lon},\n"
            f"    dropoff_lat={self.dropoff_lat},\n"
            f"    dropoff_lon={self.dropoff_lon},\n"
            f"    customer_price={self.customer_price:.2f},\n"
            f"    commissionPercent={self.commissionPercent:.2f},\n"
            f"    driver_commission={self.driver_commission:.2f},\n"
            f"    platform_revenue={self.platform_revenue:.2f},\n"
            f"    hour_of_day={self.hour_of_day}\n"
            f"    weather_code={self.weather_code}\n"
            f")"
        )

---

## Define the Driver

In [23]:
class Driver:
    def __init__(
        self,
        driver_id: int,
        current_lat: float,
        current_lon: float,
        current_area: int,
        work_time_minutes: float,
        available: bool = True,
        accepted_order: bool = False,
    ):
        """
        Initializes a Driver object.

        Args:
            driver_id (int): Unique identifier for the driver.
            current_lat (float): Current latitude coordinate of the driver's location.
            current_lon (float): Current longitude coordinate of the driver's location.
            current_area (int): Identifier for the driver's current geographical area.
            work_time_minutes (float): Total minutes the driver has worked.
            available (bool, optional): True if the driver is available for new orders, False otherwise. Defaults to True.
            accepted_order (bool, optional): True if the driver has accepted an order and is en route, False otherwise. Defaults to False.
        """
        self.driver_id = driver_id
        self.current_lat = current_lat
        self.current_lon = current_lon
        self.current_area = current_area
        self.work_time_minutes = work_time_minutes
        self.available = available
        self.accepted_order = accepted_order
        self.model = None  # Model still needs to be set externally

        print(
            f"Driver {self.driver_id} is initialized with location ({self.current_lat}, {self.current_lon})"
        )

    def distance_to(self, order: Order) -> float:
        """Calculate Euclidean distance in kilometers (approx)."""
        # original_distance = (
        #     math.sqrt(
        #         (self.current_lat - order.pickup_lat) ** 2
        #         + (self.current_lon - order.pickup_lon) ** 2
        #     )
        #     * 111
        # ) * 1000

        # print(f"The distance calculated by traditional method is {original_distance}")
        """Geodesic Distance (Calculate the geodesic distance in meters from the driver's current location to the pickup location of a given order.)"""
        point_current = (self.current_lat, self.current_lon)
        point_pickup = (order.pickup_lat, order.pickup_lon)
        distance = geodesic(point_current, point_pickup).m

        print(f"The distance calculated by geodesic is {distance}")

        return distance

    def calculate_accept_prob(self, order: Order,
                            #   weather_code: int,
                              ) -> float:
        """Predict acceptance probability using logistic regression."""
        if self.model is None:
            raise ValueError(
                "Driver model not initialized! Must be set in DeliverySimulator."
            )

        features = {
            "commission": [order.driver_commission],
            # "distance": [self.distance_to(order)],
            "driver_distance": [self.distance_to(order)],
            "hour": [order.hour_of_day],
            # "weather_code": [weather_code],
            "weather_code": [order.weather_code],
            "work_time_minutes": [self.work_time_minutes],
        }
        print("Features input to the model for prediction:")
        print(features)
        return self.model.predict_proba(pd.DataFrame(features))[0][1]

    def decide_acceptance(
        self,
        order: Order,
        # weather_code: int,
        schedule_data: pd.DataFrame,
        threshold: float = np.random.random(),
    ) -> bool:
        """Make acceptance decision based on probability."""
        

        # check if the driver is avaliable:
        order_data_ymd = order.datetime.date()
        order_hour = order.hour_of_day
        driver_schedule_data:pd.DataFrame = schedule_data[
            (schedule_data['driver_id'] == self.driver_id) &
            (pd.to_datetime(schedule_data['date']).dt.date == order_data_ymd) &
            (schedule_data['hour'] == order_hour)
        ]

        if driver_schedule_data.empty:
            self.available = False
            
        if not self.available:
            print(f"Driver {self.driver_id} is not scheduled to work at {order_data_ymd} {order_hour:02d}:00.")
            return False

        # random_value = np.random.random()
        random_value = threshold
        # print(f"Random Value: {random_value:.2f}")
        # prob = self.calculate_accept_prob(order, weather_code)
        prob = self.calculate_accept_prob(order)

        accepted = bool(random_value < prob)
        if accepted:
            print(
                f"Driver {self.driver_id} accept the order with probability of {prob} and threshold {threshold}"
            )
            self.accepted_order = True
        else:
            print(
                f"Driver {self.driver_id} did not accept the order with probability of {prob} and threshold {threshold}"
            )
        return accepted

    # def update_location(self, order: Order):
    #     """Update location only if the driver has taken an order."""
    #     if self.accepted_order:
    #         self.current_lat = order.dropoff_lat
    #         self.current_lon = order.dropoff_lon
    #         self.current_area = order.dropoff_area
    #         print(
    #             f"Driver {self.driver_id} location moves to ({self.current_lat}, {self.current_lon})"
    #         )
    #     else:
    #         print(f"Driver {self.driver_id} keeps the same location")

---

## Test Order and Driver

In [24]:
# test_driver = Driver(
#     driver_id=19852,
#     current_lat=34.0,
#     current_lon=-118.0,
#     current_area=100,
#     work_time_minutes=300,
#     available=True,
# )

# test_order = Order(
#     order_id=1,
#     datetime_str="2025-04-07 08:07:35",
#     pickup_area=101,
#     dropoff_area=202,
#     pickup_lat=34.05,
#     pickup_lon=-118.05,
#     dropoff_lat=34.1,
#     dropoff_lon=-118.1,
#     customer_price=100.0,
#     commissionPercent=0.20,
# )

# test_weather_code = 1
# test_driver.model = model

In [25]:
# # manully set threshold
# decide_pred = test_driver.decide_acceptance(
#     test_order, test_weather_code, threshold=0.5
# )


# # random threshold
# decide_pred = test_driver.decide_acceptance(test_order, test_weather_code)

In [26]:
# location_pred = test_driver.update_location(test_order)

---

In [27]:
for index, order in order_data.iterrows():

    order_id = order["order_id"]
    datetime_str = order["datetime"]
    pickup_area = order["pickup_area2"]
    dropoff_area = order["dropoff_area2"]
    pickup_lat = order["pickup_lat"]
    pickup_lon = order["pickup_lon"]
    dropoff_lat = order["dropoff_lat"]
    dropoff_lon = order["dropoff_lon"]

    customer_price = order["customer_price"]

    # Check for any missing values before proceeding
    if (
        pd.isna(order_id)
        or pd.isna(datetime_str)
        or pd.isna(pickup_area)
        or pd.isna(dropoff_area)
        or pd.isna(pickup_lat)
        or pd.isna(pickup_lon)
        or pd.isna(dropoff_lat)
        or pd.isna(dropoff_lon)
        or pd.isna(customer_price)
    ):
        # Skip to the next iteration if any value is missing
        print()
        print(f"--- Skipping Order (Index: {index}) due to missing values ---")
        continue
    order = Order(
        order_id=order_id,
        datetime_str=datetime_str,
        pickup_area=pickup_area,
        dropoff_area=dropoff_area,
        pickup_lat=pickup_lat,
        pickup_lon=pickup_lon,
        dropoff_lat=dropoff_lat,
        dropoff_lon=dropoff_lon,
        customer_price=customer_price,
        commissionPercent=0.20,
    )
    weather_code = weather_service.get_weather_code(order.datetime)
    print()
    print(f"--- Order Details (Index: {index}) ---")
    # print(f"Order ID: {order_id}")
    # print(f"Datetime: {datetime_str}")
    # print(f"Pickup Area: {pickup_area}")
    # print(f"Dropoff Area: {dropoff_area}")
    # print(f"Pickup Latitude: {pickup_lat}")
    # print(f"Pickup Longitude: {pickup_lon}")
    # print(f"Dropoff Latitude: {dropoff_lat}")
    # print(f"Dropoff Longitude: {dropoff_lon}")
    # print(f"Customer Price: {customer_price}")
    # print(f"The weather code is: {weather_code}")
    print(order)
    print("---------------------------------")

    if index == 100:
        break


--- Skipping Order (Index: 0) due to missing values ---

--- Order Details (Index: 1) ---
Order(
    order_id=4863452,
    datetime=2025-04-07 08:08:52,
    pickup_area=598.0,
    dropoff_area=328.0,
    pickup_lat=32.6959297,
    pickup_lon=51.7367204,
    dropoff_lat=32.6326779,
    dropoff_lon=51.6529232,
    customer_price=96000.00,
    commissionPercent=0.20,
    driver_commission=76800.00,
    platform_revenue=19200.00,
    hour_of_day=8
    weather_code=0.0
)
---------------------------------

--- Order Details (Index: 2) ---
Order(
    order_id=4863453,
    datetime=2025-04-07 08:10:17,
    pickup_area=396.0,
    dropoff_area=595.0,
    pickup_lat=32.651796,
    pickup_lon=51.6078153,
    dropoff_lat=32.69834,
    dropoff_lon=51.7077533,
    customer_price=90000.00,
    commissionPercent=0.20,
    driver_commission=72000.00,
    platform_revenue=18000.00,
    hour_of_day=8
    weather_code=0.0
)
---------------------------------

--- Order Details (Index: 3) ---
Order(
    ord

In [15]:
test_driver = Driver(
    driver_id=19852,
    current_lat=34.0,
    current_lon=-118.0,
    current_area=100,
    work_time_minutes=300,
    available=True,
)
test_driver.model = model
decide_pred = test_driver.decide_acceptance(
    order=order, schedule_data=schedule_data, threshold=0.5

)

print(decide_pred)

Driver 19852 is initialized with location (34.0, -118.0)
The distance calculated by geodesic is 12544044.049690763
Features input to the model for prediction:
{'commission': [76800.0], 'driver_distance': [12544044.049690763], 'hour': [8], 'weather_code': [0.0], 'work_time_minutes': [300]}
Driver 19852 accept the order with probability of 0.64 and threshold 0.5
True


### Factors of Rider Accepting an Order
1. "commission": [order.driver_commission]
2. "driver_distance": [self.distance_to(order)]
3. "hour": [order.hour_of_day]
4. "weather_code": [weather_code]
5. *"work_time_minutes": [self.work_time_minutes]*

* original_driver_set for a specific order (using order): order_id, we need to know the system order assignment for this order and get the unique driver ids
* get the driver's info with original_driver_assign_ids in the driver_data: driver_id, driver_lat & driver_lon, driver_area
* update_driver_set, it contains driver_id, driver_lat & driver_lon, driver_area
    * create this df with driver_id, driver_lat & driver_lon, driver_area columns, it should be empty when init the environment
    * check if there is any driver_id in original_driver_assign_ids

In [None]:
update_driver_set = pd.DataFrame(
    columns=["driver_id", "driver_lat", "driver_lon", "driver_area"]
)

update_driver_set

In [None]:
order.pickup_area

In [None]:
# 4863457
# order.order_id = 4863457

In [None]:
original_driver_assign_set = order_driver_data[order_driver_data["order_id"] == order.order_id]
original_driver_assign_ids = original_driver_assign_set['driver_id'].unique().tolist()
original_driver_assign_ids
# original_driver_set

In [None]:
original_driver_set = driver_data[
    (driver_data['driver_id'].isin(original_driver_assign_ids)) &
    (driver_data['order_id'] == order.order_id)
]
# Because in the driver data set, if a rider accepts an order, the platform will continue to record the update of his location
# keep the first record to get the rider's original position for the order
original_driver_set = original_driver_set.drop_duplicates(subset=['driver_id'], keep='first')
original_driver_set

In [None]:
original_driver_set

update_driver_set

* Driver Schedule

In [None]:
# from collections import defaultdict

# schedule = defaultdict(set)
# for _, row in schedule_data.iterrows():
#     driver_id = row["driver_id"]
#     date = row["date"]
#     hour = row["hour"]
#     schedule[(driver_id, date)].add(hour)

* Driver Attampts

In [None]:
# attempts = defaultdict(list)

# for _, row in driver_data.iterrows():
#     order_id = row["order_id"]
#     driver_id = row["driver_id"]
#     datetime = row["datetime"]
#     lat, lon, area = row["driver_lat"], row["driver_lon"], row["driver_area"]
#     work_time_minutes = row["work_time_minutes"]
#     attempts[order_id].append((driver_id, datetime, lat, lon, area, work_time_minutes))

In [None]:
# len(driver_data['driver_area'].unique().tolist())
# len(order_data['pickup_area2'].unique().tolist())
# len(order_data['dropoff_area2'].unique().tolist())

---

# Define Ride Hail Env

In [None]:
# import gym
import gymnasium as gym
from gymnasium import spaces
from collections import defaultdict


class DeliveryEnv(gym.Env):
    def __init__(
        self,
        orders: list,
        driver_data: pd.DataFrame,
        schedule_data: pd.DataFrame,
        weather_service: WeatherService,
    ):
        super(DeliveryEnv, self).__init__()

        # self.simulator = DeliverySimulator(
        #     orders, driver_data, schedule_data, weather_service
        # )
        self.orders = sorted(orders, key=lambda o: o.datetime)
        self.weather = weather_service
        self.driver_schedule = self._load_driver_schedule(schedule_data)
        self.driver_attempts = self._load_driver_attempts(driver_data)
        self.drivers_by_id = {}  # Cache all drivers by ID
        self.area_drivers = self._group_drivers_by_area()
        # Track drivers with updated location
        self.drivers_with_updated_location = set()
        # Define action space (continuous commission rate between 0 and 1)
        self.action_space = spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32)

        # Define state space (order + specific driver attributes)
        self.observation_space = spaces.Dict(
            {
                "customer_price": spaces.Box(
                    low=0.0, high=1.0, shape=(1,), dtype=np.float32
                ),
                "pickup_area": spaces.Discrete(501),
                "dropoff_area": spaces.Discrete(501),
                "hour_of_day": spaces.Discrete(24),
                "day_of_week": spaces.Discrete(7),
                "weather": spaces.Discrete(4),
                "driver_area": spaces.Discrete(501),
                "working_status": spaces.Discrete(2),
            }
        )

        self.orders_by_day = defaultdict(list)
        order: Order
        for order in self.orders:
            order_day = order.datetime.date()
            self.orders_by_day[order_day].append(order)

        # Tracking variables
        self.assigned_order = 0  # Tracks # of unassigned orders
        self.current_day_index = 0  # Tracks training epoch (day index)
        self.current_order_index = 0  # Tracks current order within the day
        self.current_driver_index = 0
        self.current_day = None  # Current date being trained
        self.updated_drivers = (
            set()
        )  # Track drivers who have accepted at least one order
        self.next_order = False
        self.episode_rewards = 0
        self.episode_steps = 0
        self.total_driver_commission = 0.0
        self.max_steps = 30000

        self.current_orders = self.orders_by_day[self.current_day]

    def _load_driver_schedule(self, schedule_data: pd.DataFrame):
        """Loads driver work schedules from a CSV file into a dictionary."""
        schedule = defaultdict(set)
        for _, row in schedule_data.iterrows():
            driver_id = row["driver_id"]
            date = row["date"]
            hour = row["hour"]
            schedule[(driver_id, date)].add(hour)
        return schedule

    def _load_driver_attempts(self, driver_data: pd.DataFrame):
        """Loads driver assignment attempts, tracking all instances a driver receives an order."""
        attempts = defaultdict(list)
        for _, row in driver_data.iterrows():
            order_id = row["order_id"]
            driver_id = row["driver_id"]
            datetime = row["datetime"]
            lat, lon, area = row["driver_lat"], row["driver_lon"], row["driver_area"]
            work_time_minutes = row["work_time_minutes"]
            attempts[order_id].append(
                (driver_id, datetime, lat, lon, area, work_time_minutes)
            )
        return attempts

    def _group_drivers_by_area(self):
        """Groups drivers by their current area for efficient order assignment."""
        area_drivers = defaultdict(list)
        for order_id, driver_attempts in self.driver_attempts.items():
            for (
                driver_id,
                datetime,
                lat,
                lon,
                area,
                work_time_minutes,
            ) in driver_attempts:
                if driver_id not in self.drivers_by_id:
                    driver = Driver(
                        driver_id=driver_id,
                        current_lat=lat,
                        current_lon=lon,
                        current_area=area,
                        work_time_minutes=work_time_minutes,
                    )
                    # driver.model = DeliverySimulator.shared_model
                    self.drivers_by_id[driver_id] = driver
                    area_drivers[area].append(driver)
        return area_drivers

    def _get_driver_pool(self, order: Order):
        """Retrieve the pool of drivers (historical + internal updates) while filtering out moved drivers."""

        combined_attempts = list(self.driver_attempts.get(order.order_id, []))
        valid_drivers = []

        # Include dynamic drivers from updated locations
        driver: Driver
        for driver in self.area_drivers.get(order.pickup_area, []):
            if driver.driver_id not in [
                a[0] for a in combined_attempts
            ]:  # Avoid duplicates
                if order.datetime.hour in self.driver_schedule.get(
                    (driver.driver_id, order.datetime.date()), set()
                ):
                    combined_attempts.append(driver)

        # Filter drivers to ensure they are actually in the pickup area
        for attempt in combined_attempts:
            driver_id, datetime, lat, lon, area, work_time_minutes = attempt
            driver: Driver = self.drivers_by_id.get(driver_id)

            if not driver:
                continue

            # If the driver has moved, **skip them** unless they are actually in the correct pickup area
            if driver_id in self.drivers_with_updated_location:
                if driver.current_area != order.pickup_area:
                    continue  # Driver moved to another area, so exclude them
                # Otherwise, use updated location
                driver.current_lat, driver.current_lon, driver.current_area = (
                    lat,
                    lon,
                    area,
                )  # Update location

            valid_drivers.append(driver)

        if len(valid_drivers) == 0:
            print(
                f"WARNING: No available drivers for Order {order.order_id} in pickup area {order.pickup_area}!"
            )
        return valid_drivers

        # normalize states to make RL training more stable

    def _normalize_state(self, order: Order):
        """Normalize key order attributes for RL training."""
        return {
            "pickup_area": int(order.pickup_area),
            "dropoff_area": int(order.dropoff_area),
            "hour_of_day": int(order.hour_of_day),
            "weather_code": int(self.weather.get_weather_code(order.datetime)),
            "customer_price": order.customer_price
            / 10100000.0,  #  Normalized by max price threshold
            "commissionPercent": order.commissionPercent
            / 100.0,  # Already between 0-1 (no changes needed)
        }

    def _get_observation(self):
        """Extracts order & driver state for RL input."""
        if self.current_order_index >= len(self.current_orders):
            return None
            # return np.zeros(self.observation_space.shape, dtype=np.float32) # return dummy observations

        order = self.current_orders[self.current_order_index]
        normalized_state = self._normalize_state(order)

        if self.next_order:
            self.current_order_driver_pool = self._get_driver_pool(order)
            self.current_driver_index = 0

        driver: Driver = self.current_order_driver_pool[self.current_driver_index]
        order: Order
        
        obs_dict = {
            "customer_price": np.array(
                [normalized_state["customer_price"]], dtype=np.float32
            ),
            "pickup_area": normalized_state["pickup_area"],
            "dropoff_area": normalized_state["dropoff_area"],
            "hour_of_day": normalized_state["hour_of_day"],
            "day_of_week": order.datetime.weekday(),
            "weather": normalized_state["weather_code"],
            #'driver_id': driver.driver_id,
            "driver_area": driver.current_area,
            "working_status": 1 if driver.available else 0,
        }

        return obs_dict

    def _is_done(self):
        """Terminates an episode at the end of the day."""
        if self.current_order_index >= len(self.current_orders):
            print("Current training day completed!")
            print("# of assigned orders ", self.assigned_order)
            return True  # End the current day and reset

        return False

    def _is_driver_working(self, driver: Driver, datetime: datetime):
        """Checks if a driver is scheduled to work at a given time."""

        # date = order.datetime.date()
        # hour = order.datetime.hour
        date = datetime.date()
        hour = datetime.hour
        return hour in self.driver_schedule.get((driver.driver_id, date), set())

    def _get_next_order_time(self):
        """Retrieve the timestamp for the next order."""
        current_order: Order
        if self.current_order_index + 1 < len(self.current_orders):
            current_order = self.current_orders[self.current_order_index + 1]
        else:
            current_order = self.current_orders[self.current_order_index]
        return current_order.datetime

    def reset(self):
        """Resets environment at the start of each operational day (8 AM)."""

        self.current_day = list(self.orders_by_day.keys())[self.current_day_index]
        self.current_orders = self.orders_by_day[self.current_day]

        self.current_order_index = 0
        self.current_driver_index = 0
        self.updated_drivers.clear()

        self.episode_rewards = 0
        self.episode_steps = 0
        self.assigned_order = 0
        self.total_driver_commission = 0.0

        # Count order-driver pairs correctly
        self.current_day_order_driver_pairs = sum(
            len(self._get_driver_pool(order)) for order in self.current_orders
        )
        print(
            f"DEBUG: Resetting for day {self.current_day}, Orders: {len(self.current_orders)}, Order-Driver Pairs: {self.current_day_order_driver_pairs}"
        )

        if len(self.current_orders) > 0:
            self.current_order_driver_pool = self._get_driver_pool(
                self.current_orders[self.current_order_index]
            )
        else:
            self.current_order_driver_pool = []
            print("EBUG: no order at all!")

        obs = self._get_observation()

        return obs

    def step(self, action):
        """Processes one order-driver pair, ensuring driver availability updates for the next order."""
        order: Order
        driver: Driver

        self.next_order = False

        order = self.current_orders[self.current_order_index]

        driver = self.current_order_driver_pool[self.current_driver_index]

        # Offer commission rate
        order.commissionPercent = np.clip(action[0], 0.0, 1.0)
        order.driver_commission = order.customer_price * (1 - order.commissionPercent)
        self.total_driver_commission += order.driver_commission
        weather_code = self.weather.get_weather_code(order.datetime)

        accepted = driver.decide_acceptance(order, weather_code)
        # pdb.set_trace()

        # Update working status for next order no matter what
        driver.available = self._is_driver_working(driver, self._get_next_order_time())

        if accepted:
            self.assigned_order += 1
            reward = order.customer_price * order.commissionPercent

            # Track old area before moving
            old_area = driver.current_area

            # Track movement and availability updates
            driver.update_location(
                order.dropoff_lat, order.dropoff_lon, order.dropoff_area
            )
            self.updated_drivers.add(driver.driver_id)

            # Update area_drivers mapping
            if driver in self.area_drivers[old_area]:
                self.area_drivers[old_area].remove(driver)
            self.area_drivers[driver.current_area].append(driver)

            # Move to next order
            self.current_order_index += 1
            self.next_order = True
            self.current_driver_index = 0

        else:
            # Move to next driver for the same order
            reward = 0
            self.current_driver_index += 1
            if self.current_driver_index >= len(self.current_order_driver_pool):
                self.current_order_index += 1
                self.next_order = True
                # self.unassigned_order += 1
                self.current_driver_index = 0

        self.episode_steps += 1
        self.episode_rewards += reward

        done = self._is_done(order.datetime)

        info = {}

        if done:
            info["episode"] = {
                "r": self.episode_rewards,  # Report FINAL totals
                "l": self.episode_steps,
                "a": self.assigned_order,
                "o": len(self.current_orders),
                "c": self.total_driver_commission,
            }

        obs = self._get_observation()

        return obs, reward, done, info

In [None]:
def load_data():
    """Load and preprocess environment data."""
    orders_df = pd.read_csv("./data/order.csv")
    driver_data = pd.read_csv("./data/driver_update2.csv")
    schedule_data = pd.read_csv("./data/driver_schedule.csv")

    orders_df = orders_df.loc[
        (orders_df["outside"] == 0) & (orders_df["pickup_area"].notnull())
    ]
    orders_df = orders_df[
        (pd.to_datetime(orders_df["datetime"]).dt.hour >= 8)
        & (pd.to_datetime(orders_df["datetime"]).dt.hour < 24)
    ]

    orders_df["date"] = pd.to_datetime(orders_df["date"]).dt.date
    orders_df = orders_df[orders_df["date"] <= pd.to_datetime("2025-04-16").date()]
    valid_days = orders_df["date"].unique().tolist()

    # orders_df['revenue'] = orders_df.loc[orders_df['status'] == 5]['customer_price'] * (orders_df.loc[orders_df['status'] == 5]['commissionPercent'] / 100)
    # print('total revenue ', orders_df['revenue'].sum())
    # pdb.set_trace()

    orders_df = orders_df[
        [
            "order_id",
            "datetime",
            "pickup_area",
            "dropoff_area",
            "pickup_lat",
            "pickup_lon",
            "dropoff_lat",
            "dropoff_lon",
            "customer_price",
            "commissionPercent",
            "date",
        ]
    ]

    # orders = [Order(**row) for _, row in orders_df.iterrows()]
    return orders_df, driver_data, schedule_data, valid_days


# Load data & initialize environment
orders_df, driver_data, schedule_data, valid_days = load_data()

In [None]:
selected_day = valid_days[0]

weather_service = WeatherService("./data/weather.csv")
daily_orders = orders_df[orders_df["date"] == selected_day]
daily_orders = daily_orders[
    [
        "order_id",
        "datetime",
        "pickup_area",
        "dropoff_area",
        "pickup_lat",
        "pickup_lon",
        "dropoff_lat",
        "dropoff_lon",
        "customer_price",
        "commissionPercent",
    ]
]

In [None]:
daily_orders

In [None]:
env = DeliveryEnv(orders, driver_data, schedule_data, weather_service)