In [2]:
import sys
from pathlib import Path
repo_root = Path.cwd().parent
sys.path.insert(0, str(repo_root / "src"))

import numpy as np
import pandas as pd
import random

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.utils import set_random_seed

from GurobiParamEnv import InexactGBDEnv 

from policies import PPOPolicy, OptimalPolicy, RandomPolicy, ExponentialPolicy

In [7]:
def make_env():
    return InexactGBDEnv()

# 1) Create and load the VecNormalize-wrapped env once
raw_env = DummyVecEnv([make_env])
env = VecNormalize.load("../models/vecnormalize_benders.pkl", raw_env)
env.training = False    # freeze running stats
env.norm_reward = False # use true rewards

Configuring transition blocks...
Configuring initial transition blocks...
Configuring scheduling block...
Adding noise...
In scheduling_opt, calculating transition time bounds...
In scheduling_opt, calculating initial transition time bounds...
Configuring transition cost bounds...
Configuring initial transition cost bounds...


In [3]:
# 3) Evaluation function over N episodes
def evaluate_n(agent, env, policy_name, seed=100):
    np.random.seed(seed)
    random.seed(seed)

    obs = env.reset()
    done = False
    total_rew = 0.0
    mptimes = []
    sptimes = []
    steps = 0

    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        obs, reward, done, info = env.step(action)
        total_rew += reward[0]
        mptimes.append(info[0]["mp_time"])
        sptimes.append(info[0]["sp_time"])
        steps += 1

    rec = {
        "policy": policy_name,
        "total_reward": total_rew,
        "avg_mptime": np.mean(mptimes),
        "total_mptime": np.sum(mptimes),
        "total_sptime": np.sum(sptimes),
        "iterations": steps
    }
    print('mptime =', np.sum(mptimes))
    print('sptime =', np.sum(sptimes))
    return rec, info

In [4]:
model = PPO.load("ppo_benders_model", env=env)

rl_policy = PPOPolicy(model, env)
baseline_rand = RandomPolicy(env)
baseline_exp = ExponentialPolicy(env, 0.3, 0.8)
baseline_opt = OptimalPolicy(env, )

In [None]:
n_episodes = 50
# 4) Run evaluations
recs_ppo = []
recs_opt = []
recs_const = []
recs_rand = []

for i in range(n_episodes):
    rec_ppo, info = evaluate_n(rl_policy, env, "RL", seed=i+10)
    recs_ppo.append(rec_ppo)

    rec_opt, info = evaluate_n(baseline_opt, env, "Optimal", seed=i+10)
    recs_opt.append(rec_opt)

    rec_exp, info = evaluate_n(baseline_exp, env, "Exponential", seed=i+10)
    recs_const.append(rec_exp)

    rec_rand, info = evaluate_n(baseline_rand, env, "Random", seed=i+10)
    recs_rand.append(rec_rand)

In [None]:
# df = pd.read_csv('computational_results.csv')
df = pd.DataFrame(recs_ppo + recs_opt + recs_const + recs_rand)

df['total_runtime'] = df['total_mptime'] + df['total_sptime']
summary = df.groupby("policy").mean().reset_index()
metrics = ["total_reward", "avg_runtime", "total_runtime", "iterations"]
x = np.arange(len(metrics))

df_base = df[df['policy']=='Optimal']
df_base = df_base.reset_index(drop=True)
df_base['policy'] = 'Optimal'

df_base_rand = df[df['policy']=='Random']
df_base_rand = df_base_rand.reset_index(drop=True)
df_base_rand['policy'] = 'Random'

df_base_const = df[df['policy']=='Constant']
df_base_const = df_base_const.reset_index(drop=True)
df_base_const['policy'] = 'Constant'

df_ppo = df[df['policy']=='DRL']
df_ppo = df_ppo.reset_index(drop=True)
df_ppo['policy'] = 'DRL'

In [None]:
# df.to_csv('test_stat_data.csv', index=False, encoding='utf-8')

In [5]:
def data_clean(df):
    """
    Delete one outlier.
    """

    max_idxs = df.groupby('policy')['total_mptime'].idxmax()
    df_clean = df.drop(max_idxs).reset_index(drop=True)

    return df_clean
df = pd.read_csv('test_stats_data.csv')
df = data_clean(df)
df['num_prod'] = 5


In [6]:
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

color_map = {
    'RL':         'rgb(255,127,14)',
    'Random':     'rgb(44,160,44)',
    'Exponential':'rgb(148,103,189)',
    'Optimal':    'rgb(31,119,180)',
}

label_map = {
    'RL':          'RL-iGBD',
    'Random':      'Rand-iGBD',
    'Exponential': 'Exp-iGBD',
    'Optimal':     'GBD'
}

scales = sorted(df['num_prod'].unique())
n = len(scales)

# bump cols to 3 so we can put an extra panel to the right
cols = 2
rows = 1

# two titles for the two scales + one for the runtime plot of the second scale
subplot_titles = [f"<span style='font-size:20px;'> Five Products: Master Runtime </span>"] + [f"<span style='font-size:20px;'> Five Products: Total Runtime </span>"]

fig = make_subplots(
    rows=rows, cols=cols,
    subplot_titles=subplot_titles,
    horizontal_spacing=0.12,
    vertical_spacing=0.12
)

for i, scale in enumerate(scales):
    r = 1
    c = 1         # column 1 and 2
    d = df[df['num_prod'] == scale]
    # 1) the original total_mptime plots
    for policy in ['RL', 'Random', 'Exponential', 'Optimal']:
        fig.add_trace(
            go.Box(
                y=d.loc[d['policy'] == policy, 'total_mptime'],
                name=label_map[policy],
                boxpoints='all',
                marker_color=color_map[policy],
                showlegend=(i == 0)
            ),
            row=1, col=1
        )

    # 2) if this is the second scale (col==2), mirror it in col==3 with total_runtime
    for policy in ['RL', 'Random', 'Exponential', 'Optimal']:
        fig.add_trace(
            go.Box(
                y=d.loc[d['policy'] == policy, 'total_runtime'],
                name=label_map[policy],
                boxpoints='all',
                marker_color=color_map[policy],
                showlegend=False
            ),
            row=1, col=2
        )

# rest of your formatting unchanged...
fig.update_xaxes(tickfont=dict(family="Times New Roman"))
fig.update_yaxes(row=1, col=1, title_text="Cumulative master runtime (s)",
                 title_font=dict(family="Times New Roman", size=20))

fig.update_yaxes(row=1, col=2, title_text="Cumulative total runtime (s)",
                 title_font=dict(family="Times New Roman", size=20))

fig.update_xaxes(showgrid=False, gridcolor='lightgrey', gridwidth=1,
                 zeroline=False, showline=True, mirror=True,
                 linecolor='black', ticks='outside')
fig.update_yaxes(showgrid=True, gridcolor='lightgrey', gridwidth=1,
                 zeroline=False, showline=True, mirror=True,
                 linecolor='black', ticks='outside')

fig.update_layout(
    font=dict(family="Times New Roman", color="black", size=18),
    height=440 * rows,
    width=420 * cols,
    showlegend=False,
    margin=dict(l=70, r=10, t=25, b=0),
    paper_bgcolor='white',
    plot_bgcolor='white'
)

# pad the x-axis so boxes aren't jammed against the edges
pad_left  = -0.6
pad_right = len(label_map) - 0.6
for cc in range(1, cols+1):
    fig.update_xaxes(range=[pad_left, pad_right], row=1, col=cc)

fig.show()

In [None]:
# fig.write_image("statistical_total.png", scale=5)