In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import os

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    classification_report,
    roc_auc_score,
    confusion_matrix,
    precision_score,
    recall_score,
    f1_score,
)

from imblearn.ensemble import BalancedRandomForestClassifier
import joblib

from geopy.distance import geodesic

---

# Load the Data

In [2]:
schedule_data = pd.read_csv("./data/driver_schedule.csv", engine="pyarrow")
driver_data = pd.read_csv("./data/driver_update2.csv", engine="pyarrow")
order_data = pd.read_csv("./data/order.csv", engine="pyarrow")
order_driver_data = pd.read_csv("./data/order_driver.csv", engine="pyarrow")

---

## Train Driver-Order Accept Model

In [3]:
model_output_folder = "./out"
os.makedirs(model_output_folder, exist_ok=True)
model_path = "./Out/acceptance_model.pkl"

# if the model exists, decide if re-train the model is needed
retrain_model: bool = False

In [4]:
order_driver_data

Unnamed: 0,id,order_id,driver_id,delivered,seen,ignored,silent,timedout,accepted,cancelled,...,date,weather_code,commission,status,outside,driver_area,time_gap_minutes,new_start_time,work_time_minutes,working
0,33510101,4863457,8,0,0,0,0,0,0,0,...,2025-04-07,0.0,54400.0,5,0.0,35,,2025-04-07 08:20:00,0.000000,0
1,33510106,4863457,8,1,1,0,0,1,0,0,...,2025-04-07,0.0,54400.0,5,0.0,35,0.150000,2025-04-07 08:20:00,0.150000,1
2,33510116,4863457,8,1,1,0,0,1,0,0,...,2025-04-07,0.0,54400.0,5,0.0,35,3.966667,2025-04-07 08:20:00,4.116667,1
3,33510120,4863459,8,1,1,0,0,1,0,0,...,2025-04-07,0.0,62050.0,5,,35,2.983333,2025-04-07 08:20:00,7.100000,1
4,33510123,4863457,8,1,1,0,0,0,0,0,...,2025-04-07,0.0,54400.0,5,0.0,35,0.100000,2025-04-07 08:20:00,7.200000,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
336539,33846028,4890389,21730,1,1,0,1,0,0,0,...,2025-04-21,3.0,42500.0,5,0.0,36,19.900000,2025-04-21 19:09:59,77.383333,1
336540,33846064,4890389,21730,1,1,0,1,1,0,0,...,2025-04-21,3.0,42500.0,5,0.0,36,0.966667,2025-04-21 19:09:59,78.350000,1
336541,33846089,4890396,21730,1,1,0,1,1,0,0,...,2025-04-21,3.0,51000.0,5,0.0,36,2.233333,2025-04-21 19:09:59,80.583333,1
336542,33846126,4890400,21730,1,1,0,1,0,0,0,...,2025-04-21,3.0,59500.0,5,0.0,36,4.583333,2025-04-21 19:09:59,85.166667,1


In [5]:
if os.path.exists(model_path) and not retrain_model:
    print(f"Model found at {model_path}. Loading model...")
    # with open(model_path, "rb") as f:
    #     model = pickle.load(f)
    model = joblib.load(model_path)
    print("Model loaded successfully.")
else:
    order_driver_data = pd.read_csv("./data/order_driver.csv")

    order_driver_data = order_driver_data.loc[
        (order_driver_data["status"] == 5) & (
            order_driver_data["outside"] == 0)
    ]
    print(order_driver_data.shape)
    print(order_driver_data["accept"].describe())

    # Define features & target variable
    order_features = order_driver_data[
        ["commission", "driver_distance", "hour",
            "weather_code", "work_time_minutes"]
    ]
    # order_features = order_driver_data[
    #     ["commission", "distance", "hour", "weather_code", "work_time_minutes"]
    # ]
    print(order_features.head())
    acceptance_status = order_driver_data["accept"]
    # Define features & target variable
    # Train-test split
    features_train, features_test, target_train, target_test = train_test_split(
        order_features, acceptance_status, test_size=0.2, random_state=42
    )

    # Train model (BalancedRandomForest handles imbalance natively)
    model = BalancedRandomForestClassifier(random_state=42)
    model.fit(features_train, target_train)

    # Make predictions
    y_pred = model.predict(features_test)
    y_probs = model.predict_proba(features_test)[:, 1]

    # Evaluate model performance
    print("Precision:", precision_score(target_test, y_pred))
    print("Recall:", recall_score(target_test, y_pred))
    print("F1 Score:", f1_score(target_test, y_pred))
    print("Classification Report:\n", classification_report(target_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(target_test, y_pred))
    print("ROC AUC Score:", roc_auc_score(target_test, y_probs))
    # Save trained model (only the classifier, without SMOTE)
    joblib.dump(model, "./Out/acceptance_model.pkl")

Model found at ./Out/acceptance_model.pkl. Loading model...
Model loaded successfully.


---

## Get Weather Code

In [6]:
class WeatherService:
    def __init__(self, weather_csv_path: str):
        df = pd.read_csv(weather_csv_path)
        # Convert 'datetime' column to datetime objects and normalize to the hour start
        df["datetime"] = pd.to_datetime(df["datetime"]).dt.floor("h")
        # Set 'datetime' as index and convert 'weather_code' to a dictionary
        self.weather_data = df.set_index("datetime")["weather_code"].to_dict()

    def get_weather_code(self, dt) -> int:
        """Get weather code for the hour containing datetime dt"""
        hour_key = dt.replace(minute=0, second=0)
        # Default: 1 (sunny)
        return self.weather_data.get(hour_key, 1)


weather_service = WeatherService(weather_csv_path="./data/weather.csv")

---

## Define the Order

In [7]:
class Order:
    """
    Represents a single customer order with details about pickup, dropoff, pricing,
    and calculated commission/revenue.
    """

    def __init__(
        self,
        order_id: int,
        datetime_str: str,
        pickup_area: int,
        dropoff_area: int,
        pickup_lat: float,
        pickup_lon: float,
        dropoff_lat: float,
        dropoff_lon: float,
        customer_price: float,
        commissionPercent: float,
        complete_time: float,
    ):
        """
        Initializes an Order object.

        Args:
            order_id (int): Unique identifier for the order.
            datetime_str (str): Date and time of the order creation in '%Y-%m-%d %H:%M:%S.%f' format.
            pickup_area (int): Identifier for the pickup geographical area.
            dropoff_area (int): Identifier for the dropoff geographical area.
            pickup_lat (float): Latitude coordinate of the pickup location.
            pickup_lon (float): Longitude coordinate of the pickup location.
            dropoff_lat (float): Latitude coordinate of the dropoff location.
            dropoff_lon (float): Longitude coordinate of the dropoff location.
            customer_price (float): The total price paid by the customer for the order.
            commissionPercent (float): The percentage of the customer price taken as platform commission (e.g., 0.20 for 20%).
        """
        self.order_id = order_id
        # Convert datetime string to a datetime object for easier manipulation
        # self.datetime = datetime.strptime(datetime_str, "%Y-%m-%d %H:%M:%S.%f")
        # self.datetime = datetime.strptime(datetime_str, "%Y-%m-%d %H:%M:%S")
        self.datetime: datetime = (
            datetime_str.to_pydatetime()
        )  # Convert Timestamp to datetime.datetime object

        self.pickup_area = pickup_area
        self.dropoff_area = dropoff_area
        self.pickup_lat = pickup_lat
        self.pickup_lon = pickup_lon
        self.dropoff_lat = dropoff_lat
        self.dropoff_lon = dropoff_lon
        self.customer_price = customer_price
        self.commissionPercent = commissionPercent
        self.complete_time = complete_time
        # These calculations were previously in __post_init__
        # Calculate the driver's earnings from the order
        # self.driver_commission = self.customer_price * (1 - self.commissionPercent)
        self.driver_commission = self.customer_price * self.commissionPercent
        # Calculate the platform's revenue from the order
        # self.platform_revenue = self.customer_price * self.commissionPercent
        self.platform_revenue = self.customer_price * \
            (1 - self.commissionPercent)
        # Extract the hour of the day when the order was placed (0-23)
        self.hour_of_day = self.datetime.hour
        self.weather_code = weather_service.get_weather_code(self.datetime)

    def __repr__(self):
        """
        Returns a string representation of the Order object for easy debugging and display.
        """
        return (
            f"Order(\n"
            f"    order_id={self.order_id},\n"
            f"    datetime={self.datetime},\n"
            f"    pickup_area={self.pickup_area},\n"
            f"    dropoff_area={self.dropoff_area},\n"
            f"    pickup_lat={self.pickup_lat},\n"
            f"    pickup_lon={self.pickup_lon},\n"
            f"    dropoff_lat={self.dropoff_lat},\n"
            f"    dropoff_lon={self.dropoff_lon},\n"
            f"    customer_price={self.customer_price:.2f},\n"
            f"    commissionPercent={self.commissionPercent:.2f},\n"
            f"    driver_commission={self.driver_commission:.2f},\n"
            f"    platform_revenue={self.platform_revenue:.2f},\n"
            f"    hour_of_day={self.hour_of_day}\n"
            f"    weather_code={self.weather_code}\n"
            f"    complete_time={self.complete_time}\n"
            f")"
        )

---

## Define the Driver

In [8]:
class Driver:
    def __init__(
        self,
        driver_id: int,
        current_lat: float,
        current_lon: float,
        current_area: int,
        work_time_minutes: float,
        available: bool = True,
        accepted_order: bool = False,
    ):
        """
        Initializes a Driver object.

        Args:
            driver_id (int): Unique identifier for the driver.
            current_lat (float): Current latitude coordinate of the driver's location.
            current_lon (float): Current longitude coordinate of the driver's location.
            current_area (int): Identifier for the driver's current geographical area.
            work_time_minutes (float): Total minutes the driver has worked.
            available (bool, optional): True if the driver is available for new orders, False otherwise. Defaults to True.
            accepted_order (bool, optional): True if the driver has accepted an order and is en route, False otherwise. Defaults to False.
        """
        self.driver_id = driver_id
        self.current_lat = current_lat
        self.current_lon = current_lon
        self.current_area = current_area
        self.work_time_minutes = work_time_minutes
        self.available = available
        self.accepted_order = accepted_order
        self.model = None  # Model still needs to be set externally

        print(
            f"Driver {self.driver_id} is initialized with location ({self.current_lat}, {self.current_lon})"
        )

    def distance_to(self, order: Order) -> float:
        """Calculate Euclidean distance in kilometers (approx)."""
        # original_distance = (
        #     math.sqrt(
        #         (self.current_lat - order.pickup_lat) ** 2
        #         + (self.current_lon - order.pickup_lon) ** 2
        #     )
        #     * 111
        # ) * 1000

        # print(f"The distance calculated by traditional method is {original_distance}")
        """Geodesic Distance (Calculate the geodesic distance in meters from the driver's current location to the pickup location of a given order.)"""
        point_current = (self.current_lat, self.current_lon)
        point_pickup = (order.pickup_lat, order.pickup_lon)
        distance = geodesic(point_current, point_pickup).m

        print(f"The distance calculated by geodesic is {distance}")

        return distance

    def calculate_accept_prob(self, order: Order,
                              #   weather_code: int,
                              ) -> float:
        """Predict acceptance probability using logistic regression."""
        if self.model is None:
            raise ValueError(
                "Driver model not initialized! Must be set in DeliverySimulator."
            )

        features = {
            "commission": [order.driver_commission],
            # "distance": [self.distance_to(order)],
            "driver_distance": [self.distance_to(order)],
            "hour": [order.hour_of_day],
            # "weather_code": [weather_code],
            "weather_code": [order.weather_code],
            "work_time_minutes": [self.work_time_minutes],
        }
        print("Features input to the model for prediction:")
        print(features)
        return self.model.predict_proba(pd.DataFrame(features))[0][1]

    def decide_acceptance(
        self,
        order: Order,
        # weather_code: int,
        schedule_data: pd.DataFrame,
        threshold: float = np.random.random(),
    ) -> bool:
        """Make acceptance decision based on probability."""

        # check if the driver is avaliable:
        order_data_ymd = order.datetime.date()
        order_hour = order.hour_of_day
        driver_schedule_data: pd.DataFrame = schedule_data[
            (schedule_data['driver_id'] == self.driver_id) &
            (pd.to_datetime(schedule_data['date']).dt.date == order_data_ymd) &
            (schedule_data['hour'] == order_hour)
        ]

        if driver_schedule_data.empty:
            self.available = False

        if not self.available:
            print(
                f"Driver {self.driver_id} is not scheduled to work at {order_data_ymd} {order_hour:02d}:00.")
            return False
        else:
            print(
                f"Driver {self.driver_id} can work at {order_data_ymd} {order_hour:02d}:00.")

        # random_value = np.random.random()
        random_value = threshold
        # print(f"Random Value: {random_value:.2f}")
        # prob = self.calculate_accept_prob(order, weather_code)
        prob = self.calculate_accept_prob(order)

        accepted = bool(random_value < prob)
        if accepted:
            print(
                f"Driver {self.driver_id} accept the order with probability of {prob} and threshold {threshold}"
            )
            self.accepted_order = True
        else:
            print(
                f"Driver {self.driver_id} did not accept the order with probability of {prob} and threshold {threshold}"
            )
        return accepted

    # def update_location(self, order: Order):
    #     """Update location only if the driver has taken an order."""
    #     if self.accepted_order:
    #         self.current_lat = order.dropoff_lat
    #         self.current_lon = order.dropoff_lon
    #         self.current_area = order.dropoff_area
    #         print(
    #             f"Driver {self.driver_id} location moves to ({self.current_lat}, {self.current_lon})"
    #         )
    #     else:
    #         print(f"Driver {self.driver_id} keeps the same location")

---

## Test Order and Driver

In [9]:
for index, order in order_data.iterrows():

    order_id = order["order_id"]
    datetime_str = order["datetime"]
    pickup_area = order["pickup_area2"]
    dropoff_area = order["dropoff_area2"]
    pickup_lat = order["pickup_lat"]
    pickup_lon = order["pickup_lon"]
    dropoff_lat = order["dropoff_lat"]
    dropoff_lon = order["dropoff_lon"]

    customer_price = order["customer_price"]
    complete_time = order['complete_time']

    # Check for any missing values before proceeding
    if (
        pd.isna(order_id)
        or pd.isna(datetime_str)
        or pd.isna(pickup_area)
        or pd.isna(dropoff_area)
        or pd.isna(pickup_lat)
        or pd.isna(pickup_lon)
        or pd.isna(dropoff_lat)
        or pd.isna(dropoff_lon)
        or pd.isna(customer_price)
        # complete_time
        or pd.isna(complete_time)
    ):
        # Skip to the next iteration if any value is missing
        print()
        print(f"--- Skipping Order (Index: {index}) due to missing values ---")
        continue
    order = Order(
        order_id=order_id,
        datetime_str=datetime_str,
        pickup_area=pickup_area,
        dropoff_area=dropoff_area,
        pickup_lat=pickup_lat,
        pickup_lon=pickup_lon,
        dropoff_lat=dropoff_lat,
        dropoff_lon=dropoff_lon,
        customer_price=customer_price,
        commissionPercent=0.20,
        complete_time=complete_time,
    )
    weather_code = weather_service.get_weather_code(order.datetime)
    print()
    print(f"--- Order Details (Index: {index}) ---")
    # print(f"Order ID: {order_id}")
    # print(f"Datetime: {datetime_str}")
    # print(f"Pickup Area: {pickup_area}")
    # print(f"Dropoff Area: {dropoff_area}")
    # print(f"Pickup Latitude: {pickup_lat}")
    # print(f"Pickup Longitude: {pickup_lon}")
    # print(f"Dropoff Latitude: {dropoff_lat}")
    # print(f"Dropoff Longitude: {dropoff_lon}")
    # print(f"Customer Price: {customer_price}")
    # print(f"The weather code is: {weather_code}")
    print(order)
    print("---------------------------------")

    if index == 100:
        break


--- Skipping Order (Index: 0) due to missing values ---

--- Order Details (Index: 1) ---
Order(
    order_id=4863452,
    datetime=2025-04-07 08:08:52,
    pickup_area=598.0,
    dropoff_area=328.0,
    pickup_lat=32.6959297,
    pickup_lon=51.7367204,
    dropoff_lat=32.6326779,
    dropoff_lon=51.6529232,
    customer_price=96000.00,
    commissionPercent=0.20,
    driver_commission=19200.00,
    platform_revenue=76800.00,
    hour_of_day=8
    weather_code=0.0
    complete_time=31.600000000000005
)
---------------------------------

--- Order Details (Index: 2) ---
Order(
    order_id=4863453,
    datetime=2025-04-07 08:10:17,
    pickup_area=396.0,
    dropoff_area=595.0,
    pickup_lat=32.651796,
    pickup_lon=51.6078153,
    dropoff_lat=32.69834,
    dropoff_lon=51.7077533,
    customer_price=90000.00,
    commissionPercent=0.20,
    driver_commission=18000.00,
    platform_revenue=72000.00,
    hour_of_day=8
    weather_code=0.0
    complete_time=21.266666666666666
)
--------

In [10]:
# # manully set threshold
# decide_pred = test_driver.decide_acceptance(
#     test_order, test_weather_code, threshold=0.5
# )


# # random threshold
# decide_pred = test_driver.decide_acceptance(test_order, test_weather_code)

# location_pred = test_driver.update_location(test_order)

In [11]:
test_driver = Driver(
    driver_id=19852,
    current_lat=34.0,
    current_lon=-118.0,
    current_area=100,
    work_time_minutes=300,
    available=True,
)
test_driver.model = model
decide_pred = test_driver.decide_acceptance(
    order=order, schedule_data=schedule_data, threshold=0.5

)

print(decide_pred)

Driver 19852 is initialized with location (34.0, -118.0)
Driver 19852 can work at 2025-04-07 09:00.
The distance calculated by geodesic is 12551071.364283588
Features input to the model for prediction:
{'commission': [8400.0], 'driver_distance': [12551071.364283588], 'hour': [9], 'weather_code': [0.0], 'work_time_minutes': [300]}
Driver 19852 accept the order with probability of 0.66 and threshold 0.5
True


### Factors of Rider Accepting an Order
1. "commission": [order.driver_commission]
2. "driver_distance": [self.distance_to(order)]
3. "hour": [order.hour_of_day]
4. "weather_code": [weather_code]
5. *"work_time_minutes": [self.work_time_minutes]*

* original_driver_set for a specific order (using order): order_id, we need to know the system order assignment for this order and get the unique driver ids
* get the driver's info with original_driver_assign_ids in the driver_data: driver_id, driver_lat & driver_lon, driver_area
* update_driver_set, it contains driver_id, driver_lat & driver_lon, driver_area
    * create this df with driver_id, driver_lat & driver_lon, driver_area columns, it should be empty when init the environment
    * check if there is any driver_id in original_driver_assign_ids

In [12]:
class DriverManager:

    """
    Manages the driver information DataFrame.
    """

    def __init__(
        self,
        order_driver_data: pd.DataFrame,
        driver_data: pd.DataFrame,
    ):
        self.order_driver_data = order_driver_data
        self.driver_data = driver_data

        self.update_driver_set = pd.DataFrame(
            columns=["driver_id", "driver_lat", "driver_lon",
                     "driver_area", "work_time_minutes"]
        )

        self.original_driver_set = None

    def get_original_driver_set(
        self,
        order: Order
    ) -> pd.DataFrame:
        """
        Get the original driver set for a specific order.
        """
        original_driver_assign_set = self.order_driver_data[self.order_driver_data["order_id"]
                                                            == order.order_id]
        original_driver_assign_ids = original_driver_assign_set['driver_id'].unique(
        ).tolist()

        # select the target driver
        original_driver_set = driver_data[
            (driver_data['driver_id'].isin(original_driver_assign_ids)) &
            (driver_data['order_id'] == order.order_id)
        ]

        original_driver_set = original_driver_set[["driver_id", "driver_lat", "driver_lon",
                                                  "driver_area", "work_time_minutes"]]
        # original_driver_set = original_driver_set.reset_index(drop=True)
        # Because in the driver data set, if a rider accepts an order, the platform will continue to record the update of his location
        # keep the last record to get the rider's original position for the order
        # original_driver_set = original_driver_set.drop_duplicates(
        #     subset=['driver_id'], keep='last')
        # no need to drop duplicates

        # self.original_driver_set = original_driver_set

        return original_driver_set


    def get_driver_pool(self, order: Order) -> pd.DataFrame:
        """
        Constructs the driver pool, prioritizing updated driver information
        and including new drivers based on matching pickup area.
        """
        original_driver_set = self.get_original_driver_set(order)
        driver_pool = original_driver_set.copy()

        # New Logic: Add drivers from update_driver_set if their area matches order.pickup_area
        # and they are not already in the original driver pool.

        # 1. Filter update_driver_set for drivers matching the pickup area
        area_matched_drivers_from_updates = self.update_driver_set[
            self.update_driver_set['driver_area'] == order.pickup_area
        ].copy()  # Use .copy() to ensure an independent DataFrame

        # 2. Identify truly new drivers (not in original_driver_set) from the area-matched set
        truly_new_drivers_to_add = area_matched_drivers_from_updates[
            ~area_matched_drivers_from_updates['driver_id'].isin(
                driver_pool['driver_id'])
        ]

        # 3. Add these truly new drivers to the driver_pool
        if not truly_new_drivers_to_add.empty:
            driver_pool = pd.concat(
                [driver_pool, truly_new_drivers_to_add], ignore_index=True)
            print(
                f"Added new drivers to the pool based on matching 'driver_area' ('{order.pickup_area}'): {truly_new_drivers_to_add['driver_id'].tolist()}")
        else:
            print(
                f"No new drivers from update_driver_set with matching 'driver_area' ('{order.pickup_area}') to add.")

        # Prepare for update: Set 'driver_id' as index for both DataFrames.
        # driver_pool now might contain newly added drivers.
        driver_pool_indexed = driver_pool.set_index('driver_id')
        update_set_indexed = self.update_driver_set.set_index('driver_id')

        # Check if driver_pool (which now includes original + potentially new area-matched drivers)
        # and update_set_indexed have common driver_ids.
        # These are the drivers whose information will be updated.
        drivers_to_be_updated = driver_pool_indexed.index.intersection(
            update_set_indexed.index)

        # Perform the update operation. This will update rows in driver_pool_indexed
        # where the 'driver_id' exists in update_set_indexed.
        driver_pool_indexed.update(update_set_indexed)

        # Reset the index to 'driver_id' column again for the final DataFrame
        driver_pool = driver_pool_indexed.reset_index()

        # Check if any updates actually happened based on common_driver_ids
        if not drivers_to_be_updated.empty:
            print(
                f"Using data in update_driver_set to update the following drivers in the pool (original and new area-matched): {drivers_to_be_updated.tolist()}")
        else:
            print("update_driver_set has no matched driver ID for existing drivers in the pool, no update from it.")

        # Randomize the order of the driver_pool
        driver_pool = driver_pool.sample(frac=1).reset_index(drop=True)
        print("Driver pool has been randomized.")

        return driver_pool

In [13]:
driver_manager = DriverManager(
    order_driver_data=order_driver_data, driver_data=driver_data)
# driver_manager.original_driver_set
# original_driver_set = driver_manager.get_original_driver_set(order=order)

# test data
driver_manager.update_driver_set = pd.DataFrame({
    "driver_id": [1, 2, 3, 4, 5, 6, 15528],
    "driver_lat": [34.05, 34.06, 34.07, 34.08, 34.09, 34.10, 0],
    "driver_lon": [-118.24, -118.25, -118.26, -118.27, -118.28, -118.29, 0],
    "driver_area": [1, 285, 3, 285, 5, 6, 285],
    "work_time_minutes": [120, 180, 90, 240, 150, 60, 90],
})

In [20]:
driver_pool = driver_manager.get_driver_pool(order=order)
driver_pool

Added new drivers to the pool based on matching 'driver_area' ('285.0'): [2, 4]
Using data in update_driver_set to update the following drivers in the pool (original and new area-matched): [15528, 2, 4]
Driver pool has been randomized.


Unnamed: 0,driver_id,driver_lat,driver_lon,driver_area,work_time_minutes
0,2,34.06,-118.25,285,180.0
1,20627,32.624953,51.650667,283,0.766667
2,15528,0.0,0.0,285,90.0
3,4,34.08,-118.27,285,240.0


In [15]:
original_driver_set = driver_manager.get_original_driver_set(order=order)
driver_pool

Unnamed: 0,driver_id,driver_lat,driver_lon,driver_area,work_time_minutes
0,20627,32.624953,51.650667,283,0.766667
1,4,34.08,-118.27,285,240.0
2,15528,0.0,0.0,285,90.0
3,2,34.06,-118.25,285,180.0


In [16]:
driver_manager.update_driver_set

Unnamed: 0,driver_id,driver_lat,driver_lon,driver_area,work_time_minutes
0,1,34.05,-118.24,1,120
1,2,34.06,-118.25,285,180
2,3,34.07,-118.26,3,90
3,4,34.08,-118.27,285,240
4,5,34.09,-118.28,5,150
5,6,34.1,-118.29,6,60
6,15528,0.0,0.0,285,90


---

# Define Ride Hail Env

In [None]:
def load_data():
    """Load and preprocess environment data."""
    orders_df = pd.read_csv("./data/order.csv")
    driver_data = pd.read_csv("./data/driver_update2.csv")
    schedule_data = pd.read_csv("./data/driver_schedule.csv")

    orders_df = orders_df.loc[
        (orders_df["outside"] == 0) & (orders_df["pickup_area"].notnull())
    ]
    orders_df = orders_df[
        (pd.to_datetime(orders_df["datetime"]).dt.hour >= 8)
        & (pd.to_datetime(orders_df["datetime"]).dt.hour < 24)
    ]

    orders_df["date"] = pd.to_datetime(orders_df["date"]).dt.date
    orders_df = orders_df[orders_df["date"] <=
                          pd.to_datetime("2025-04-16").date()]
    valid_days = orders_df["date"].unique().tolist()

    # orders_df['revenue'] = orders_df.loc[orders_df['status'] == 5]['customer_price'] * (orders_df.loc[orders_df['status'] == 5]['commissionPercent'] / 100)
    # print('total revenue ', orders_df['revenue'].sum())
    # pdb.set_trace()

    orders_df = orders_df[
        [
            "order_id",
            "datetime",
            "pickup_area",
            "dropoff_area",
            "pickup_lat",
            "pickup_lon",
            "dropoff_lat",
            "dropoff_lon",
            "customer_price",
            "commissionPercent",
            "date",
        ]
    ]

    # orders = [Order(**row) for _, row in orders_df.iterrows()]
    return orders_df, driver_data, schedule_data, valid_days


# Load data & initialize environment
orders_df, driver_data, schedule_data, valid_days = load_data()

In [None]:
selected_day = valid_days[0]

weather_service = WeatherService("./data/weather.csv")
daily_orders = orders_df[orders_df["date"] == selected_day]
daily_orders = daily_orders[
    [
        "order_id",
        "datetime",
        "pickup_area",
        "dropoff_area",
        "pickup_lat",
        "pickup_lon",
        "dropoff_lat",
        "dropoff_lon",
        "customer_price",
        "commissionPercent",
    ]
]

In [None]:
daily_orders