In [1]:
import pandas as pd
import numpy as np
import math
from datetime import datetime
import os

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    classification_report,
    roc_auc_score,
    confusion_matrix,
    precision_score,
    recall_score,
    f1_score,
)

from imblearn.ensemble import BalancedRandomForestClassifier
import joblib

---

## Train Driver-Order Accept Model

In [None]:
model_output_folder = "./out"
os.makedirs(model_output_folder, exist_ok=True)

In [None]:
# Load training data
df = pd.read_csv("./data/order_driver.csv")

df = df.loc[(df["status"] == 5) & (df["outside"] == 0)]
print(df.shape)
print(df["accept"].describe())

# Define features & target variable
# X = df[["commission", "driver_distance", "hour", "weather_code", "work_time_minutes"]]
X = df[["commission", "distance", "hour", "weather_code", "work_time_minutes"]]
print(X.head())
y = df["accept"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train model (BalancedRandomForest handles imbalance natively)
model = BalancedRandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)
y_probs = model.predict_proba(X_test)[:, 1]

# Evaluate model performance
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("ROC AUC Score:", roc_auc_score(y_test, y_probs))

# Save trained model (only the classifier, without SMOTE)
joblib.dump(model, "./Out/acceptance_model.pkl")

(301066, 39)
count    301066.000000
mean          0.081208
std           0.273155
min           0.000000
25%           0.000000
50%           0.000000
75%           0.000000
max           1.000000
Name: accept, dtype: float64
   commission  driver_distance  hour  weather_code  work_time_minutes
0     54400.0             1662     8           0.0           0.000000
1     54400.0             1667     8           0.0           0.150000
2     54400.0             1680     8           0.0           4.116667
4     54400.0             1667     8           0.0           7.200000
5     54400.0             1668     8           0.0           8.116667
Precision: 0.1472621596818599
Recall: 0.4890288500609508
F1 Score: 0.22636008839986835
Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.75      0.83     55292
           1       0.15      0.49      0.23      4922

    accuracy                           0.73     60214
   macro avg       0.54  

['./Out/acceptance_model.pkl']

In [None]:
class WeatherService:
    def __init__(self, weather_csv_path: str):
        df = pd.read_csv(weather_csv_path)
        # Convert 'datetime' column to datetime objects and normalize to the hour start
        df["datetime"] = pd.to_datetime(df["datetime"]).dt.floor("H")
        # Set 'datetime' as index and convert 'weather_code' to a dictionary
        self.weather_data = df.set_index("datetime")["weather_code"].to_dict()

    def get_weather_code(self, dt) -> int:
        """Get weather code for the hour containing datetime dt"""
        hour_key = dt.replace(minute=0, second=0)
        # Default: 1 (sunny)
        return self.weather_data.get(hour_key, 1)

In [None]:
class Order:
    """
    Represents a single customer order with details about pickup, dropoff, pricing,
    and calculated commission/revenue.
    """

    def __init__(
        self,
        order_id: int,
        datetime_str: str,
        pickup_area: int,
        dropoff_area: int,
        pickup_lat: float,
        pickup_lon: float,
        dropoff_lat: float,
        dropoff_lon: float,
        customer_price: float,
        commissionPercent: float,
    ):
        """
        Initializes an Order object.

        Args:
            order_id (int): Unique identifier for the order.
            datetime_str (str): Date and time of the order creation in '%Y-%m-%d %H:%M:%S.%f' format.
            pickup_area (int): Identifier for the pickup geographical area.
            dropoff_area (int): Identifier for the dropoff geographical area.
            pickup_lat (float): Latitude coordinate of the pickup location.
            pickup_lon (float): Longitude coordinate of the pickup location.
            dropoff_lat (float): Latitude coordinate of the dropoff location.
            dropoff_lon (float): Longitude coordinate of the dropoff location.
            customer_price (float): The total price paid by the customer for the order.
            commissionPercent (float): The percentage of the customer price taken as platform commission (e.g., 0.20 for 20%).
        """
        self.order_id = order_id
        # Convert datetime string to a datetime object for easier manipulation
        self.datetime = datetime.strptime(datetime_str, "%Y-%m-%d %H:%M:%S.%f")
        self.pickup_area = pickup_area
        self.dropoff_area = dropoff_area
        self.pickup_lat = pickup_lat
        self.pickup_lon = pickup_lon
        self.dropoff_lat = dropoff_lat
        self.dropoff_lon = dropoff_lon
        self.customer_price = customer_price
        self.commissionPercent = commissionPercent

        # These calculations were previously in __post_init__
        # Calculate the driver's earnings from the order
        self.driver_commission = self.customer_price * (1 - self.commissionPercent)
        # Calculate the platform's revenue from the order
        self.platform_revenue = self.customer_price * self.commissionPercent
        # Extract the hour of the day when the order was placed (0-23)
        self.hour_of_day = self.datetime.hour

In [None]:
test_order = Order(
    order_id=1,
    datetime_str="2023-01-15 10:30:00.000000",
    pickup_area=101,
    dropoff_area=202,
    pickup_lat=34.0,
    pickup_lon=-118.0,
    dropoff_lat=34.1,
    dropoff_lon=-118.1,
    customer_price=100.0,
    commissionPercent=0.20,  # 20% commission
)

In [None]:
test_order.datetime

In [None]:
class Driver:
    def __init__(
        self,
        driver_id: int,
        current_lat: float,
        current_lon: float,
        current_area: int,
        work_time_minutes: float,
        available: bool = True,
        accepted_order: bool = False,
    ):
        self.driver_id = driver_id
        self.current_lat = current_lat
        self.current_lon = current_lon
        self.current_area = current_area
        self.work_time_minutes = work_time_minutes
        self.available = available
        self.accepted_order = accepted_order
        self.model = None  # Model still needs to be set externally

    def distance_to(self, lat: float, lon: float) -> float:
        """Calculate Euclidean distance in kilometers (approx)."""
        return (
            math.sqrt((self.current_lat - lat) ** 2 + (self.current_lon - lon) ** 2)
            * 111
        ) * 1000

    def calculate_accept_prob(self, order: Order, weather_code: int) -> float:
        """Predict acceptance probability using logistic regression."""
        if self.model is None:
            raise ValueError(
                "Driver model not initialized! Must be set in DeliverySimulator."
            )

        features = {
            "commission": [order.driver_commission],
            "driver_distance": [self.distance_to(order.pickup_lat, order.pickup_lon)],
            "hour": [order.hour_of_day],
            "weather_code": [weather_code],
            "work_time_minutes": [self.work_time_minutes],
        }

        # print("\nDEBUG - Acceptance Decision Features:")
        # for key, val in features.items():
        #    print(f"{key}: {val:.4f}" if isinstance(val, float) else f"{key}: {val}")

        return self.model.predict_proba(pd.DataFrame(features))[0][1]

    def decide_acceptance(self, order, weather_code: int) -> bool:
        """Make acceptance decision based on probability."""
        if not self.available:
            return False

        random_value = np.random.random()
        print(f"Random Value: {random_value:.2f}")
        prob = self.calculate_accept_prob(order, weather_code)
        # print(f"Random Value: {random_value:.2f}, Acceptance Probability: {prob:.2f}")
        # return random_value < prob

        accepted = random_value < prob
        if accepted:
            print("")
            self.accepted_order = True
        return accepted

    def update_location(self, lat: float, lon: float, new_area: int):
        """Update location only if the driver has taken an order."""
        if self.accepted_order:
            self.current_lat = lat
            self.current_lon = lon
            self.current_area = new_area