In [10]:
# Autoreload
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
import time

import numpy as np

from gym_routing.envs.tsp import TSPEnv
import stable_baselines3 as sb3
from stable_baselines3.common.env_checker import check_env

In [12]:
tsp_env = TSPEnv(render_mode='human')

In [13]:
tsp_env.reset()

({'nodes': array([[0.34226986, 0.52179924],
         [0.65923336, 0.2693412 ],
         [0.10351433, 0.39014623],
         [0.51968294, 0.55767278],
         [0.89153578, 0.61221266]]),
  'current_node': 0,
  'visited_nodes': array([1., 0., 0., 0., 0.])},
 {'distances': 1.4155259004016902, 'visited': 1.0, 'visit_order': [0]})

In [14]:
order = np.arange(1, tsp_env.size)
np.random.shuffle(order)
np.append(order, 0)
for i in order:
    time.sleep(1)
    tsp_env.step(i)

In [15]:
tsp_env.close()

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import gym_routing
import gymnasium as gym
from gymnasium.wrappers import FlattenObservation
from stable_baselines3.common.env_checker import check_env

print("Checking TSPEnv")
tsp_env = gym.make("gym_routing/TSP-v0")
check_env(tsp_env)
print("Done checking TSPEnv")

print("Checking FlattenObservation")
flattened_env = FlattenObservation(tsp_env)
check_env(flattened_env)
print("Done checking FlattenObservation")

Checking TSPEnv
Done checking TSPEnv
Checking FlattenObservation
Done checking FlattenObservation




In [3]:
am_tsp_env = gym.make("gym_routing/ActionMaskedTSP-v0", size=10, render_mode='human')
flattened_env = FlattenObservation(am_tsp_env)
flattened_env.reset()

(array([1.        , 0.        , 0.        , 0.        , 0.        ,
        0.79236318, 0.32100158, 0.12167897, 0.14218514, 0.7524331 ,
        0.0945132 , 0.07769063, 0.44208484, 0.49718899, 0.25051977,
        1.        , 0.        , 0.        , 0.        , 0.        ]),
 {'n_visited_nodes': 1,
  'visit_order': [0],
  'action_mask': array([False,  True,  True,  True,  True])})

In [4]:
# from sb3_contrib.common.wrappers import ActionMasker

# def mask_fn(env: gym.Env) -> np.ndarray:
#     action_mask = env.unwrapped.action_mask()
#     return action_mask

# print(mask_fn(flattened_env))


In [5]:
from sb3_contrib import MaskablePPO

# flattened_env = FlattenObservation(tsp_env)
model = MaskablePPO("MlpPolicy", flattened_env, gamma=0.4, seed=32, verbose=1)
model.learn(total_timesteps=5_000, progress_bar=True)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 5        |
|    ep_rew_mean     | 2.46     |
| time/              |          |
|    fps             | 1139     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 2.35         |
| time/                   |              |
|    fps                  | 949          |
|    iterations           | 2            |
|    time_elapsed         | 4            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0070371423 |
|    clip_fraction        | 0.0719       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.632       |
|    explained_variance   | -0.0521      |
|    learning_r

<sb3_contrib.ppo_mask.ppo_mask.MaskablePPO at 0x135af7610>

In [16]:
from stable_baselines3 import PPO

size = 10
reg_env = gym.make("gym_routing/TSP-v0", size=size, render_mode='human')
flattened_env = FlattenObservation(reg_env)
model = PPO("MlpPolicy", flattened_env, verbose=1)
model.learn(total_timesteps=100)


KeyboardInterrupt: 

: 

In [14]:
from stable_baselines3.common.evaluation import evaluate_policy

mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=100)

In [15]:
print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

In [6]:
import time

env = gym.make("gym_routing/TSP-v0", render_mode="human")
flattened_env = FlattenObservation(env)
observation, info = flattened_env.reset()
terminated = False
truncated = False
while not terminated and not truncated:
    action, _states = model.predict(observation, deterministic=True)
    observation, reward, terminated, truncated, info = flattened_env.step(action)
    flattened_env.render()
    time.sleep(1)


  logger.warn(f"{pre} should be an int or np.int64, actual type: {type(obs)}")


In [7]:
info

{'n_visited_nodes': 1, 'visit_order': [0, 0]}

# OR-Tools

In [13]:
from gym_routing.envs import TSPEnv


def create_data_model(env: TSPEnv):
    data = {}
    try:
        data['distance_matrix'] = env._distance_matrix
    except AttributeError:
        raise ValueError("env has not been reset before calling create_data_model()")
    data['num_vehicles'] = 1
    data['depot'] = 0
    return data

env = TSPEnv(render_mode="human")
env.reset()
data = create_data_model(env)

In [14]:
from ortools.constraint_solver import pywrapcp
from ortools.constraint_solver import routing_enums_pb2

manager = pywrapcp.RoutingIndexManager(
    len(data['distance_matrix']),
    data['num_vehicles'],
    data['depot'],
)
routing = pywrapcp.RoutingModel(manager)

def distance_callback(from_index, to_index):
    from_node = manager.IndexToNode(from_index)
    to_node = manager.IndexToNode(to_index)
    return data['distance_matrix'][from_node][to_node]

transit_callback_index = routing.RegisterTransitCallback(distance_callback)
routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)

# search_parameters = pywrapcp.DefaultRoutingSearchParameters()
# search_parameters.first_solution_strategy = (
#     routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC
# )

search_parameters = pywrapcp.DefaultRoutingSearchParameters()
search_parameters.local_search_metaheuristic = (
    routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH)
search_parameters.time_limit.seconds = 30
search_parameters.log_search = True

def print_solution(manager, routing, solution):
    """Prints solution on console."""
    print(f'Objective: {solution.ObjectiveValue()}')
    index = routing.Start(0)
    plan_output = 'Route for vehicle 0:\n'
    route_distance = 0
    while not routing.IsEnd(index):
        plan_output += f'{manager.IndexToNode(index)} -> '
        previous_index = index
        index = solution.Value(routing.NextVar(index))
        route_distance += env._distance_matrix[manager.IndexToNode(previous_index)][manager.IndexToNode(index)]
        env.step(manager.IndexToNode(index))
    plan_output += f'{manager.IndexToNode(index)}\n'
    plan_output += f'Distance of the route: {route_distance}m\n'
    print(plan_output)

In [15]:
solution = routing.SolveWithParameters(search_parameters)
if solution:
    print_solution(manager, routing, solution)

Objective: 0


I0000 00:00:1708815434.215104 1135810 search.cc:282] Start search (memory used = 146.07 MB)
I0000 00:00:1708815434.215137 1135810 search.cc:282] Root node processed (time = 0 ms, constraints = 28, memory used = 146.07 MB)
I0000 00:00:1708815434.215275 1135810 search.cc:282] Solution #0 (0, time = 0 ms, branches = 34, failures = 1, depth = 33, memory used = 146.08 MB, limit = 0%)
I0000 00:00:1708815434.215406 1135810 search.cc:282] Finished search tree (time = 0 ms, branches = 34, failures = 35, memory used = 146.08 MB)
I0000 00:00:1708815434.215418 1135810 search.cc:282] End search (time = 1 ms, branches = 34, failures = 35, memory used = 146.08 MB, speed = 34000 branches/s)


Route for vehicle 0:
0 -> 4 -> 3 -> 2 -> 1 -> 0
Distance of the route: 2.325333645795407m

