## Environment

In [78]:

import numpy as np
import matplotlib.pyplot as plt

import imp_act
from imp_act import make


### Define the environment

In [77]:
env = make("ToyExample-v1") # toy
#env = make("Montenegro-v1") # small
# env = make("Denmark-v1") # medium
# env = make("Belgium-v1") # large

In [92]:
def rollout(env, policy, verbose=False, plot=False, current_time=0, output_area=None):

    obs = env.reset()
    done = False
    total_reward = 0
    store_rewards = {'reward': [], 
                    'travel_time_reward':[],
                    'maintenance_reward': [],
                    'total_travel_time': []}

    while not done:
        actions = policy(env, obs['edge_observations'], current_time)
        # print(state, actions)
        next_obs, reward, done, info = env.step(actions)

        obs = next_obs
        current_time += 1

        if verbose:
            # print(f"reward: {reward}, travel times: {info}['travel_times], total_travel_time: {info['total_travel_time']}")
            print(f"reward: {reward:.3f}, total_travel_time: {info['total_travel_time']:.3f}")

        total_reward += reward
        store_rewards['reward'].append(reward)
        store_rewards['total_travel_time'].append(info['total_travel_time'])
        store_rewards['travel_time_reward'].append(info['reward_elements'][0])
        store_rewards['maintenance_reward'].append(info['reward_elements'][1])

    if plot and output_area is not None:
        with output_area:
            clear_output(wait=True)

            fig, ax = plt.subplots(1, 3, figsize=(16, 4))

            # plot total travel time over episode
            ax[0].plot(store_rewards['total_travel_time'])
            ax[0].set_yscale('symlog')
            ax[0].set_xlabel('timestep')
            ax[0].set_title('total travel time')
            ax[0].grid()

            # plot rewards and reward elements over episode
            ax[1].plot(store_rewards['reward'], color='tab:red', label=r'($r_{tt}$ + $r_m$)')
            ax[1].plot(store_rewards['travel_time_reward'], color='tab:blue', alpha=0.5, label=r'travel_time ($r_{tt}$)')
            ax[1].plot(store_rewards['maintenance_reward'], color='tab:green', alpha=0.5, label=r'maintenance ($r_m$)')
            ax[1].tick_params(axis='y', labelcolor='tab:red')
            ax[1].set_title('reward')
            ax[1].set_xlabel('timestep')
            ax[1].grid()
            ax[1].legend()

            # pie chart of the reward elements
            labels = 'travel-time', 'maintenance'
            sizes = [abs(-np.sum(store_rewards['travel_time_reward'])), abs(-np.sum(store_rewards['maintenance_reward']))] 
            explode = (0, 0.1)
            ax[2].pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%',
                    shadow=True, startangle=90)
            ax[2].axis('equal')
            ax[2].set_title('reward breakdown')
            plt.show()

        # Now close the plot to avoid consuming memory
        #plt.close(fig)


    return total_reward, store_rewards

In [80]:
def do_nothing_policy(env, edge_obs): # env forces segments to be replaced when they fail
    return [[0 for obs in e] for e in edge_obs]

def failure_replace_policy(env, edge_obs):
    action = [[3 if obs == 3 else 0 for obs in e] for e in edge_obs]
    return action

def always_replace_policy(env, edge_obs):
    return [[4 for obs in e] for e in edge_obs]

In [94]:
def maintenance_policy(env, edge_obs, current_time):
    actions = []
    for e in edge_obs:
        edge_actions = []
        for obs in e:
            if current_time % reconstruction_4_slider.value == 0 and current_time > 0:
                edge_actions.append(4)  # Reconstruction
            elif current_time % major_repair_3_slider.value == 0 and current_time > 0:
                edge_actions.append(3)  # Major repair
            elif current_time % minor_repair_2_slider.value == 0 and current_time > 0:
                edge_actions.append(2)  # Minor repair
            elif current_time % inspection_1_slider.value == 0 and current_time > 0:
                edge_actions.append(1)  # Inspection
            else:
                edge_actions.append(0)  # Do nothing
        actions.append(edge_actions)
    return actions

In [95]:
import ipywidgets as widgets
from IPython.display import display
from IPython.display import clear_output
output = widgets.Output()
# Assuming the sliders have already been created, as in your earlier code:
inspection_1_slider = widgets.FloatSlider(value=5, min=0, max=50, step=1, description='Inspection:', continuous_update=False)
minor_repair_2_slider = widgets.FloatSlider(value=10, min=0, max=50, step=1, description='Minor Repair:', continuous_update=False)
major_repair_3_slider = widgets.FloatSlider(value=12, min=0, max=50, step=1, description='Major Repair:', continuous_update=False)
reconstruction_4_slider = widgets.FloatSlider(value=30, min=0, max=50, step=1, description='Reconstruction:', continuous_update=False)

# Define the function to run when a slider's value changes
def on_slider_change(change):
    with output:
        clear_output(wait=True)
    global env, total_reward, store_rewards  # Ensure you use the global environment
    total_reward, store_rewards = rollout(env, maintenance_policy, plot=True, output_area=output)
    # You may want to add code here to display or process `total_reward` and `store_rewards`

# Attach the observation to the sliders
inspection_1_slider.observe(on_slider_change, names='value')
minor_repair_2_slider.observe(on_slider_change, names='value')
major_repair_3_slider.observe(on_slider_change, names='value')
reconstruction_4_slider.observe(on_slider_change, names='value')

# Display the sliders
display(inspection_1_slider, minor_repair_2_slider, major_repair_3_slider, reconstruction_4_slider)

# Run the function once to initialize `total_reward` and `store_rewards`
display(output)
on_slider_change({'new': inspection_1_slider.value})




FloatSlider(value=5.0, continuous_update=False, description='Inspection:', max=50.0, step=1.0)

FloatSlider(value=10.0, continuous_update=False, description='Minor Repair:', max=50.0, step=1.0)

FloatSlider(value=12.0, continuous_update=False, description='Major Repair:', max=50.0, step=1.0)

FloatSlider(value=30.0, continuous_update=False, description='Reconstruction:', max=50.0, step=1.0)

Output()

In [86]:
total_reward

-1445945677.4193978

In [84]:
store_rewards

{'reward': [-408707067.7034704,
  -24000.0,
  -24000.0,
  -24000.0,
  -24000.0,
  -35314757.75392421,
  -24000.0,
  -24000.0,
  -24000.0,
  -24000.0,
  -16410231.526322346,
  -24000.0,
  -142238315.80193916,
  -24000.0,
  -24000.0,
  -80000.0,
  -24000.0,
  -51474787.07042186,
  -24000.0,
  -24000.0,
  -49668248.53696035,
  -24000.0,
  -24000.0,
  -24000.0,
  -61705802.90790661,
  -80000.0,
  -24000.0,
  -24000.0,
  -24000.0,
  -51474787.07042186,
  -408707067.7034704,
  -24000.0,
  -24000.0,
  -24000.0,
  -24000.0,
  -80000.0,
  -61705802.90790661,
  -24000.0,
  -24000.0,
  -24000.0,
  -16410231.526322346,
  -24000.0,
  -24000.0,
  -24000.0,
  -24000.0,
  -80000.0,
  -24000.0,
  -44474787.07042186,
  -96541789.83990978,
  -24000.0],
 'travel_time_reward': [-128707067.70347041,
  -0.0,
  -0.0,
  -0.0,
  -0.0,
  -244757.75392421611,
  -0.0,
  -0.0,
  -0.0,
  -0.0,
  -386231.52632234676,
  -0.0,
  -13220315.80193916,
  -0.0,
  -0.0,
  -0.0,
  -0.0,
  -9453787.070421858,
  -0.0,
  -0.0,
 

In [None]:
from IPython.display import clear_output
import matplotlib.pyplot as plt

# Generate the new plot
fig, ax = plt.subplots()
ax.plot(x, y)

# Clear the previous output
clear_output(wait=True)

# Display the new plot
plt.show()


In [None]:
act

Baselines: do-nothing, failure-replace, always-replace

In [None]:
NUM_EPISODES = 1_00

store_do_nothing_rewards = np.zeros(NUM_EPISODES)
store_failure_replace_rewards = np.zeros(NUM_EPISODES)
store_always_replace_rewards = np.zeros(NUM_EPISODES)

for episode in range(NUM_EPISODES):

    store_do_nothing_rewards[episode], _ = rollout(env, do_nothing_policy)
    store_failure_replace_rewards[episode], _ = rollout(env, failure_replace_policy)
    store_always_replace_rewards[episode], _ = rollout(env, always_replace_policy)

print(f'Mean reward (do-nothing): {np.mean(store_do_nothing_rewards):.3e}')
print(f'Mean reward (failure replace): {np.mean(store_failure_replace_rewards):.3e}')
print(f'Mean reward (always replace): {np.mean(store_always_replace_rewards):.3e}')

In [None]:
# plot bar chart
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1,1, figsize=(8, 4))
_x = ['do-nothing', 'failure-replace', 'always-replace']
_y = np.array([np.mean(store_do_nothing_rewards), 
                np.mean(store_failure_replace_rewards),
               np.mean(store_always_replace_rewards)])

# add value text on top of each bar
ax.bar(_x, _y)

ax.set_xlabel('Policy')
ax.set_yscale('symlog')
ax.set_ylabel('Mean reward')
plt.show()