In [None]:
from models import WeatherService
from models import Order
from models import DriverManager
from models import DriverRecord

import pandas as pd
import numpy as np
import os
import joblib
import random

from geopy.distance import geodesic

import gymnasium as gym
from gymnasium import spaces

from imblearn.ensemble import BalancedRandomForestClassifier

In [None]:
class DeliveryEnv(gym.Env):

    def __init__(
        self,
        order_data: pd.DataFrame,
        driver_data: pd.DataFrame,
        order_driver_data: pd.DataFrame,
        schedule_data: pd.DataFrame,
        acceptance_model: BalancedRandomForestClassifier,
        weather_service: WeatherService,
    ):
        super(DeliveryEnv, self).__init__()

        self.order_data = order_data
        self.driver_data = driver_data
        self.order_driver_data = order_driver_data
        self.schedule_data = schedule_data
        self.acceptance_model = acceptance_model
        self.weather_service = weather_service
        self.driver_record = DriverRecord()

        self.driver_manager = DriverManager(
            order_driver_data=self.order_driver_data,
            driver_data=self.driver_data,
            schedule_data=self.schedule_data,
            acceptance_model=self.acceptance_model,
            driver_record=self.driver_record,
        )

        self.order_data_specific_day_concatenated = (
            self._get_order_data_specific_day_concatenated()
        )
        self.order_ids = (
            self.order_data_specific_day_concatenated["order_id"].unique().tolist()
        )
        # important info to judge if need to update driver_manager.update_driver_set
        # when change to a new day
        self.current_order_index: int = 0
        # self.current_order_date = None
        self.previous_order_date = None
        self.order_length = len(self.order_ids)
        # Define action space (continuous commission rate between 0 and 1)
        self.action_space = spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32)

        # obs_area_num = max(
        #     self.driver_data["driver_area"].nunique(),
        #     self.order_data["pickup_area2"].nunique(),
        #     self.order_data["dropoff_area2"].nunique(),
        # )
        obs_area_num = int(
            max(
                self.driver_data["driver_area"].max(),
                self.order_data["pickup_area2"].max(),
                self.order_data["dropoff_area2"].max(),
            )
        )
        # Define state space (order + specific driver attributes)
        self.observation_space = spaces.Dict(
            {
                "customer_price": spaces.Box(
                    low=self.order_data["customer_price"].min(),
                    high=self.order_data["customer_price"].max(),
                    shape=(1,),
                    dtype=np.float32,
                ),
                "pickup_area": spaces.Discrete(obs_area_num + 1),
                "dropoff_area": spaces.Discrete(obs_area_num + 1),
                "hour_of_day": spaces.Discrete(24 + 1),
                "day_of_week": spaces.Discrete(7 + 1),
                "complete_time": spaces.Box(
                    low=self.order_driver_data["complete_time"].min(),
                    high=self.order_driver_data["complete_time"].max(),
                    shape=(1,),
                    dtype=np.float32,
                ),
                "distance": spaces.Box(
                    low=self.order_driver_data["driver_distance"].min(),
                    high=self.order_driver_data["driver_distance"].max(),
                    shape=(1,),
                    dtype=np.float32,
                ),
                "weather": spaces.Discrete(4 + 1),
                # TODO: need to check this
                "driver_count": spaces.Box(
                    low=0, high=1000, shape=(1,), dtype=np.float32
                ),
                "driver_distance_mean": spaces.Box(
                    low=self.order_driver_data["driver_distance"].min(),
                    high=self.order_driver_data["driver_distance"].max(),
                    shape=(1,),
                    dtype=np.float32,
                ),
                "driver_work_min_mean": spaces.Box(
                    low=self.driver_data["work_time_minutes"].min(),
                    high=self.driver_data["work_time_minutes"].max(),
                    shape=(1,),
                    dtype=np.float32,
                ),
            }
        )

        self.state = None
        self.steps = 0

    def reset(self, seed=None):
        super().reset(seed=seed)
        self.driver_record = DriverRecord()
        self.driver_manager = DriverManager(
            order_driver_data=self.order_driver_data,
            driver_data=self.driver_data,
            schedule_data=self.schedule_data,
            acceptance_model=self.acceptance_model,
            driver_record=self.driver_record,
        )
        self.order_data_specific_day_concatenated = (
            self._get_order_data_specific_day_concatenated()
        )
        self.order_ids = (
            self.order_data_specific_day_concatenated["order_id"].unique().tolist()
        )
        # important info to judge if need to update driver_manager.update_driver_set
        # when change to a new day
        self.current_order_index: int = 0
        # self.current_order_date = None
        self.previous_order_date = None
        self.order_length = len(self.order_ids)
        # self.state = None
        self.state = self.observation_space.sample()
        return self.state, {}
        # ---------------------------------

    def step(self, action: np.ndarray):
        commission_percent = float(action[0])
        current_order_id = self.order_ids[self.current_order_index]
        order_data_specific_day_concatenated: pd.DataFrame = (
            self.order_data_specific_day_concatenated
        )

        order_info = order_data_specific_day_concatenated[
            order_data_specific_day_concatenated["order_id"] == current_order_id
        ]
        order_info: pd.Series = order_info.iloc[0]
        current_order_datetime = order_info["date"]

        if current_order_datetime != self.previous_order_date:
            print(
                f"The date of Current Order {current_order_datetime} is different from Previous Order {self.previous_order_date}"
            )
            print("Need to set update driver set to empty")
            self.driver_manager.update_driver_set = (
                self.driver_manager.update_driver_set.head(0)
            )

        # ---------------------------------------------------
        order = Order(
            order_id=order_info["order_id"],
            datetime_str=order_info["datetime"],
            pickup_area=order_info["pickup_area2"],
            dropoff_area=order_info["dropoff_area2"],
            pickup_lat=order_info["pickup_lat"],
            pickup_lon=order_info["pickup_lon"],
            dropoff_lat=order_info["dropoff_lat"],
            dropoff_lon=order_info["dropoff_lon"],
            customer_price=order_info["customer_price"],
            commissionPercent=commission_percent,
            complete_time=order_info["complete_time"],
            weather_service=self.weather_service,
        )
        print(order)

        # ---------------------------------------------------
        accept_order = self.driver_manager.get_driver_attampt(order=order)
        if accept_order:
            reward = order.platform_revenue
        else:
            reward = 0
        # ---------------------------------------------------
        # update current order index for the next order
        self.current_order_index += 1
        self.previous_order_date = current_order_datetime
        print(f"Update current_order_index to {self.current_order_index}")

        # ---------------------------------------------------
        # check the termination condition
        if self.current_order_index >= self.order_length:
            terminated = True
            print("Reach terminated condition: Finish simulating all the orders.")
        else:
            terminated = False

        # ---------------------------------------------------
        # no truncated condition
        truncated = False
        driver_count = len(self.driver_manager.driver_pool)
        driver_distance_mean = self.driver_manager.driver_pool["distance"].mean()
        driver_work_min_mean = self.driver_manager.driver_pool[
            "work_time_minutes"
        ].mean()

        # self.state = {
        #     "customer_price": order.customer_price,
        #     "pickup_area": order.pickup_area,
        #     "dropoff_area": order.dropoff_area,
        #     "hour_of_day": order.hour_of_day,
        #     "day_of_week": order.datetime.isoweekday(),
        #     "complete_time": order.complete_time,
        #     "distance": geodesic(
        #         (order.pickup_lat, order.pickup_lon),
        #         (order.dropoff_lat, order.dropoff_lon),
        #     ).m,
        #     "weather": order.weather_code,
        #     "driver_count": driver_count,
        #     "driver_distance_mean": driver_distance_mean,
        #     "driver_work_min_mean": driver_work_min_mean,
        # }
        self.state = {
            "customer_price": np.array([order.customer_price], dtype=np.float32),
            "pickup_area": np.int64(order.pickup_area),
            "dropoff_area": np.int64(order.dropoff_area),
            "hour_of_day": np.int64(order.hour_of_day),
            "day_of_week": np.int64(order.datetime.isoweekday()),
            "complete_time": np.array([order.complete_time], dtype=np.float32),
            "distance": np.array(
                [
                    geodesic(
                        (order.pickup_lat, order.pickup_lon),
                        (order.dropoff_lat, order.dropoff_lon),
                    ).m
                ],
                dtype=np.float32,
            ),
            "weather": np.int64(order.weather_code),
            "driver_count": np.array([driver_count], dtype=np.float32),
            "driver_distance_mean": np.array([driver_distance_mean], dtype=np.float32),
            "driver_work_min_mean": np.array([driver_work_min_mean], dtype=np.float32),
        }
        return self.state, reward, terminated, truncated, {}

    def render(self, mode="human"):
        print(f"Step: {self.steps}, State: {self.state}")

    def close(self):
        pass

    def _get_order_data_specific_day_concatenated(self):
        order_data_columns = [
            "order_id",
            "datetime",
            "date",
            "pickup_area2",
            "dropoff_area2",
            "pickup_lat",
            "pickup_lon",
            "dropoff_lat",
            "dropoff_lon",
            "customer_price",
            "complete_time",
        ]
        valid_days = self.order_data["date"].unique().tolist()
        random.shuffle(valid_days)
        # Initialize an empty list to store processed dataframes
        all_processed_days_data = []

        for day in valid_days:  # Loop through each valid day
            order_data_specific_day = self.order_data[
                self.order_data["date"] == day
            ]  # Use the current day from the loop
            order_data_specific_day = order_data_specific_day.dropna(
                subset=order_data_columns
            )
            order_data_specific_day = order_data_specific_day.sort_values(
                "datetime", ascending=True
            )
            order_data_specific_day = order_data_specific_day[order_data_columns]
            all_processed_days_data.append(
                order_data_specific_day
            )  # Append the processed dataframe for the current day
        # Concatenate all dataframes in the list
        order_data_specific_day_concatenated = pd.concat(all_processed_days_data)

        return order_data_specific_day_concatenated

---

## Test Delivery Enviornment

In [None]:
schedule_data = pd.read_csv("./data/driver_schedule.csv", engine="pyarrow")
driver_data = pd.read_csv("./data/driver_update2.csv", engine="pyarrow")
order_data = pd.read_csv("./data/order.csv", engine="pyarrow")
order_driver_data = pd.read_csv("./data/order_driver.csv", engine="pyarrow")
weather_service = WeatherService(weather_csv_path="./data/weather.csv")

# sample order data
order_data = order_data.sample(frac=1/100, random_state=42).reset_index(drop=True)

In [None]:
order_data['platform_revenue'] = order_data['customer_price'] - order_data['driver_commission']

In [None]:
order_data['platform_revenue'].mean()

In [None]:
model_output_folder = "./out"
os.makedirs(model_output_folder, exist_ok=True)
model_path = "./Out/acceptance_model.pkl"

# if the model exists, decide if re-train the model is needed
retrain_model: bool = False

if os.path.exists(model_path) and not retrain_model:
    print(f"Model found at {model_path}. Loading model...")
    # with open(model_path, "rb") as f:
    #     model = pickle.load(f)
    model = joblib.load(model_path)
    print("Model loaded successfully.")

In [None]:
env = DeliveryEnv(
    order_data=order_data,
    driver_data=driver_data,
    order_driver_data=order_driver_data,
    schedule_data=schedule_data,
    acceptance_model=model,
    weather_service=weather_service,
)

In [None]:
from stable_baselines3.common.env_checker import check_env

check_env(env)

In [None]:
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3 import PPO

log_dir = "./logs/"
os.makedirs(log_dir, exist_ok=True)
env = Monitor(env, log_dir)

eval_callback = EvalCallback(
    env,
    best_model_save_path=log_dir,
    log_path=log_dir,
    eval_freq=1,
    deterministic=True,
    render=False,
)

model = PPO("MultiInputPolicy", env, verbose=1, tensorboard_log=log_dir)
model.learn(total_timesteps=3, callback=eval_callback)
# model = PPO("MultiInputPolicy", env, verbose=1)
# model.learn(total_timesteps=3)