In [15]:
# Load Libraries
import gym
from gym import spaces
import pandas as pd
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
import matplotlib.pyplot as plt

In [2]:
# Load dataset
df = pd.read_csv("synthetic_ride_hailing_dataset.csv")
df.fillna(0, inplace=True)

In [3]:
import numpy as np

# Simulate realistic fares (e.g., between $8 and $15)
np.random.seed(45)
df['base_fare'] = np.random.uniform(8, 15, size=len(df))


In [4]:
df.head()

Unnamed: 0,Pickup Location,Request to Pickup,Hour of Day,Time of Day,Month of Year,Surge Pricing Indicator,DWF Reward Applied,Historical Demand Forecast,RPI,incentive,fare_adjustment,DPI,CR,base_fare
0,91,316.0,8,morning,1,1,0.0,0.656204,0.34473,1.047148,0.089986,0.013614,0.5505,14.923081
1,177,356.0,16,afternoon,10,0,0.0,0.381314,0.213189,0.545256,-0.05148,0.0,0.580529,11.846813
2,80,556.0,3,night,8,0,3.12,0.132054,0.11724,0.789231,-0.082679,0.018303,0.607068,9.970131
3,196,423.0,17,evening,7,1,4.08,0.883895,0.138296,1.826744,-0.040154,0.017422,0.5961,8.541027
4,165,155.0,21,night,1,0,0.0,0.505134,0.879757,4.083967,-0.030278,0.006524,0.436169,11.111286


In [32]:
def evaluate_cost_by_outcome(model_path, env_class, df):
    env = DummyVecEnv([lambda: env_class(df)])
    model = PPO.load(model_path)
    
    obs = env.reset()
    done = False
    success_costs = []
    cancel_costs = []
    idx = 0

    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, _ = env.step(action)

        row = df.iloc[idx]
        base_fare = row['base_fare']
        incentive = action[0][1]
        cost = base_fare + incentive

        # ✅ Use CR as a proxy for completion
        if row['CR'] < 0.5:
            success_costs.append(cost)
        else:
            cancel_costs.append(cost)

        idx += 1

    return np.mean(success_costs), np.mean(cancel_costs)


In [34]:
class RideHailingEnv_Baseline(gym.Env):
    def __init__(self, df):
        super(RideHailingEnv_Baseline, self).__init__()
        self.df = df.reset_index(drop=True)
        self.current_idx = 0

        # ⬅️ Only 4 features: no RPI, DPI, CR, HDF
        self.observation_space = spaces.Box(low=0, high=1, shape=(4,), dtype=np.float32)
        self.action_space = spaces.Box(low=np.array([-0.15, 0.0]), high=np.array([0.15, 5.0]), dtype=np.float32)

    def reset(self):
        self.current_idx = 0
        return self._get_observation()

    def step(self, action):
        if self.current_idx >= len(self.df) - 1:
            return self._get_observation(), 0, True, {}

        row = self.df.iloc[self.current_idx]
        fare_adjustment, rider_incentive = action

        # Use same reward logic, but optionally adapt since CR is no longer present
        reward = 1.0  # Assume all rides complete by default or use proxy like wait time
        if rider_incentive > 3.0:
            reward -= 0.5
        if fare_adjustment < -0.10:
            reward -= 0.3

        self.current_idx += 1
        done = self.current_idx >= len(self.df)
        return self._get_observation(), reward, done, {}

    def _get_observation(self):
        row = self.df.iloc[self.current_idx]
        obs = np.array([
            row['Pickup Location'],
            row['Request to Pickup'],
            row['Time of Day'],
            row['Month of Year']
        ], dtype=np.float32)

In [19]:
class RideHailingEnv_RPI(gym.Env):
    def __init__(self, df):
        super().__init__()
        self.df = df.reset_index(drop=True)
        self.current_idx = 0
        self.action_space = spaces.Box(low=np.array([-0.15, 0.0]), high=np.array([0.15, 5.0]), dtype=np.float32)
        self.observation_space = spaces.Box(low=0, high=1, shape=(5,), dtype=np.float32)

    def reset(self):
        self.current_idx = 0
        return self._get_observation()

    def step(self, action):
        row = self.df.iloc[self.current_idx]
        reward = 1.0 if row['Cancelled'] == 0 else -1.0
        self.current_idx += 1
        done = self.current_idx >= len(self.df)
        return self._get_observation(), reward, done, {}

    def _get_observation(self):
        row = self.df.iloc[self.current_idx]
        return np.array([
            row['Pickup Location'],
            row['Request to Pickup'],
            row['Time of Day'],
            row['Month of Year'],
            row['RPI']
        ], dtype=np.float32)


In [20]:
class RideHailingEnv_RPI_DPI(gym.Env):
    def __init__(self, df):
        super().__init__()
        self.df = df.reset_index(drop=True)
        self.current_idx = 0
        self.action_space = spaces.Box(low=np.array([-0.15, 0.0]), high=np.array([0.15, 5.0]), dtype=np.float32)
        self.observation_space = spaces.Box(low=0, high=1, shape=(6,), dtype=np.float32)

    def reset(self):
        self.current_idx = 0
        return self._get_observation()

    def step(self, action):
        row = self.df.iloc[self.current_idx]
        reward = 1.0 if row['Cancelled'] == 0 else -1.0
        self.current_idx += 1
        done = self.current_idx >= len(self.df)
        return self._get_observation(), reward, done, {}

    def _get_observation(self):
        row = self.df.iloc[self.current_idx]
        return np.array([
            row['Pickup Location'],
            row['Request to Pickup'],
            row['Time of Day'],
            row['Month of Year'],
            row['RPI'],
            row['DPI']
        ], dtype=np.float32)


In [21]:
class RideHailingEnv_RPI_DPI_CR(gym.Env):
    def __init__(self, df):
        super().__init__()
        self.df = df.reset_index(drop=True)
        self.current_idx = 0
        self.action_space = spaces.Box(low=np.array([-0.15, 0.0]), high=np.array([0.15, 5.0]), dtype=np.float32)
        self.observation_space = spaces.Box(low=0, high=1, shape=(7,), dtype=np.float32)

    def reset(self):
        self.current_idx = 0
        return self._get_observation()

    def step(self, action):
        row = self.df.iloc[self.current_idx]
        reward = 1.0 if row['Cancelled'] == 0 else -1.0
        self.current_idx += 1
        done = self.current_idx >= len(self.df)
        return self._get_observation(), reward, done, {}

    def _get_observation(self):
        row = self.df.iloc[self.current_idx]
        return np.array([
            row['Pickup Location'],
            row['Request to Pickup'],
            row['Time of Day'],
            row['Month of Year'],
            row['RPI'],
            row['DPI'],
            row['CR']
        ], dtype=np.float32)


In [22]:
class RideHailingEnv_HDF(gym.Env):
    def __init__(self, df):
        super().__init__()
        self.df = df.reset_index(drop=True)
        self.current_idx = 0
        self.action_space = spaces.Box(low=np.array([-0.15, 0.0]), high=np.array([0.15, 5.0]), dtype=np.float32)
        self.observation_space = spaces.Box(low=0, high=1, shape=(5,), dtype=np.float32)

    def reset(self):
        self.current_idx = 0
        return self._get_observation()

    def step(self, action):
        row = self.df.iloc[self.current_idx]
        reward = 1.0 if row['Cancelled'] == 0 else -1.0
        self.current_idx += 1
        done = self.current_idx >= len(self.df)
        return self._get_observation(), reward, done, {}

    def _get_observation(self):
        row = self.df.iloc[self.current_idx]
        return np.array([
            row['Pickup Location'],
            row['Request to Pickup'],
            row['Time of Day'],
            row['Month of Year'],
            row['Historical Demand Forecast']
        ], dtype=np.float32)


In [23]:
class RideHailingEnv(gym.Env):
    def __init__(self, df):
        super().__init__()
        self.df = df.reset_index(drop=True)
        self.current_idx = 0
        self.action_space = spaces.Box(low=np.array([-0.15, 0.0]), high=np.array([0.15, 5.0]), dtype=np.float32)
        self.observation_space = spaces.Box(low=0, high=1, shape=(8,), dtype=np.float32)

    def reset(self):
        self.current_idx = 0
        return self._get_observation()

    def step(self, action):
        row = self.df.iloc[self.current_idx]
        reward = 1.0 if row['Cancelled'] == 0 else -1.0
        self.current_idx += 1
        done = self.current_idx >= len(self.df)
        return self._get_observation(), reward, done, {}

    def _get_observation(self):
        row = self.df.iloc[self.current_idx]
        return np.array([
            row['Pickup Location'],
            row['Request to Pickup'],
            row['Time of Day'],
            row['Month of Year'],
            row['RPI'],
            row['DPI'],
            row['CR'],
            row['Historical Demand Forecast']
        ], dtype=np.float32)


In [None]:
models = {
    "Baseline": ("dwf_rl_baseline_model", RideHailingEnv_Baseline),
    "RPI": ("dwf_rl_RPI_model", RideHailingEnv_RPI),
    "RPI+DPI": ("dwf_rl_RPI_DPI_model", RideHailingEnv_RPI_DPI),
    "RPI+DPI+CR": ("dwf_rl_RPI_DPI_CR_model", RideHailingEnv_RPI_DPI_CR),
    "HDF": ("dwf_rl_HDF_model", RideHailingEnv_HDF),
    "Full DWF": ("dwf_rl_pricing_model_v6", RideHailingEnv)  # If this one includes all 8
}

In [26]:
from sklearn.preprocessing import LabelEncoder

# Encode only if not already encoded
if df['Time of Day'].dtype == 'object':
    df['Time of Day'] = LabelEncoder().fit_transform(df['Time of Day'])

if df['Month of Year'].dtype == 'object':
    df['Month of Year'] = LabelEncoder().fit_transform(df['Month of Year'])

if df['Pickup Location'].dtype == 'object':
    df['Pickup Location'] = LabelEncoder().fit_transform(df['Pickup Location'])


In [33]:
success_costs = []
cancel_costs = []
labels = []

for name, (model_path, env_class) in models.items():
    success, cancel = evaluate_cost_by_outcome(model_path, env_class, df)
    labels.append(name)
    success_costs.append(success)
    cancel_costs.append(cancel)


ValueError: Error: Unexpected observation shape (1, 5) for Box environment, please use (4,) or (n_env, 4) for the observation shape.

In [None]:
import matplotlib.pyplot as plt
import numpy as np

x = np.arange(len(labels))
width = 0.35

fig, ax = plt.subplots(figsize=(12, 6))
ax.bar(x - width/2, success_costs, width, label='Success', color='blue')
ax.bar(x + width/2, cancel_costs, width, label='Cancel', color='lightblue')

ax.set_ylabel('Average Cost per Ride ($)')
ax.set_title('Cost per Successful vs Cancelled Ride by Model Variant')
ax.set_xticks(x)
ax.set_xticklabels(labels, rotation=45)
ax.legend()

plt.tight_layout()
plt.show()
