## Environment

In [1]:

import numpy as np
import matplotlib.pyplot as plt

from imp_act import make
!pip install ipywidgets




### Define the environment

In [2]:
# env = make("ToyExample-v1") # toy
env = make("Montenegro-v1") # small
# env = make("Denmark-v1") # medium
# env = make("Belgium-v1") # large

In [3]:
def rollout(env, policy, verbose=False, plot=False, current_time=0, output_area=None):

    obs = env.reset()
    done = False
    total_reward = 0
    store_rewards = {'reward': [], 
                    'travel_time_reward':[],
                    'maintenance_reward': [],
                    'total_travel_time': []}

    while not done:
        actions = policy(env, obs['edge_observations'], current_time)
        # print(state, actions)
        next_obs, reward, done, info = env.step(actions)

        obs = next_obs
        current_time += 1

        if verbose:
            # print(f"reward: {reward}, travel times: {info}['travel_times], total_travel_time: {info['total_travel_time']}")
            print(f"reward: {reward:.3f}, total_travel_time: {info['total_travel_time']:.3f}")

        total_reward += reward
        store_rewards['reward'].append(reward)
        store_rewards['total_travel_time'].append(info['total_travel_time'])
        store_rewards['travel_time_reward'].append(info['reward_elements'][0])
        store_rewards['maintenance_reward'].append(info['reward_elements'][1])

    store_reward_m=np.array(store_rewards['maintenance_reward'], dtype=np.float64)
    store_reward_t=np.array(store_rewards['travel_time_reward'], dtype=np.float64)
    if plot and output_area is not None:
        with output_area:
            clear_output(wait=True)

            fig, ax = plt.subplots(1, 3, figsize=(16, 4))

            # plot total travel time over episode
            ax[0].plot(store_rewards['total_travel_time'])
            ax[0].set_yscale('symlog')
            ax[0].set_xlabel('timestep')
            ax[0].set_title('total travel time')
            ax[0].grid()

            # plot rewards and reward elements over episode
            ax[1].plot(store_rewards['reward'], color='tab:red', label=r'($r_{tt}$ + $r_m$)')
            ax[1].plot(store_rewards['travel_time_reward'], color='tab:blue', alpha=0.5, label=r'travel_time ($r_{tt}$)')
            ax[1].plot(store_rewards['maintenance_reward'], color='tab:green', alpha=0.5, label=r'maintenance ($r_m$)')
            ax[1].tick_params(axis='y', labelcolor='tab:red')
            ax[1].set_title('reward')
            ax[1].set_xlabel('timestep')
            ax[1].grid()
            ax[1].legend()

            # pie chart of the reward elements
            labels = 'travel-time', 'maintenance'
            sizes = [-np.sum(store_reward_t),-np.sum(store_reward_m)] 
            explode = (0, 0.1)
            ax[2].pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%',
                    shadow=True, startangle=90)
            ax[2].axis('equal')
            ax[2].set_title('reward breakdown')
            plt.show()

        # Now close the plot to avoid consuming memory
        #plt.close(fig)


    return total_reward, store_rewards

In [4]:
def do_nothing_policy(env, edge_obs): # env forces segments to be replaced when they fail
    return [[0 for obs in e] for e in edge_obs]

def failure_replace_policy(env, edge_obs):
    action = [[3 if obs == 3 else 0 for obs in e] for e in edge_obs]
    return action

def always_replace_policy(env, edge_obs):
    return [[4 for obs in e] for e in edge_obs]

In [5]:
def maintenance_policy(env, edge_obs, current_time):
    actions = []
    for e in edge_obs:
        edge_actions = []
        for obs in e:
            if obs >= recon == 0:
                edge_actions.append(4)  # Reconstruction
            elif obs >= major == 0:
                edge_actions.append(3)  # Major repair
            elif obs >= minor == 0:
                edge_actions.append(2)  # Minor repair
            elif current_time % inspection == 0:
                edge_actions.append(1)  # Inspection
            else:
                edge_actions.append(0)  # Do nothing
        actions.append(edge_actions)
    return actions

In [9]:
# Example ranges for each threshold, adjust these based on your specific needs
reconstruction_thresholds = range(0, 5, 1)  # Example range for reconstruction threshold
major_repair_thresholds = range(0, 5, 1)  # Example range for major repair threshold
minor_repair_thresholds = range(0, 5, 1)  # Example range for minor repair threshold
inspection_intervals = range(1, 50, 5)  # Example range for inspection interval

# Store the mean reward for each combination
mean_rewards = {}
NUM_EPISODES=100
for recon in reconstruction_thresholds:
    
  #  reconstruction_4_slider.value=recon
    for major in major_repair_thresholds:
      #  major_repair_3_slider.value=major
        for minor in minor_repair_thresholds:
          #  minor_repair_2_slider.value=minor
            for inspection in inspection_intervals:
                print(inspection)
              #  inspection_1_slider.value=inspection
                # Update your policy function to use these thresholds
                # This might involve modifying the maintenance_policy function or
                # setting global variables that it uses for thresholds
                
                # Initialize storage for rewards
                rewards = np.zeros(NUM_EPISODES)
                
                # Run 100 episodes
                for episode in range(NUM_EPISODES):
                    rewards[episode], _ = rollout(env, maintenance_policy)
                
                # Calculate the mean reward for this combination
                mean_reward = np.mean(rewards)
                mean_rewards[(recon, major, minor, inspection)] = mean_reward
best_combination = max(mean_rewards, key=mean_rewards.get)
best_reward = mean_rewards[best_combination]

print(f"Best combination: Reconstruction={best_combination[0]}, Major Repair={best_combination[1]}, Minor Repair={best_combination[2]}, Inspection Interval={best_combination[3]} with mean reward: {best_reward}")


1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
36
41
46
1
6
11
16
21
26
31
3

Baselines: do-nothing, failure-replace, always-replace