In [1]:
# Importing required libraries

import os 
import sys
import random
import argparse
import warnings
warnings.simplefilter('ignore')


import gym
from gym import spaces

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import pickle
from numba import njit

import torch
from torch.utils.data import DataLoader, Dataset
from torch.utils.tensorboard import SummaryWriter


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  long_ = _make_signed(np.long)


In [2]:
import tianshou
from typing import Any, Callable, List, Optional, Tuple, Union, Dict
from tianshou.env import DummyVectorEnv
from tianshou.data import Batch, to_torch, to_torch_as
from tianshou.policy import BasePolicy

from tianshou.trainer import OnpolicyTrainer, onpolicy_trainer
from tianshou.utils import TensorboardLogger


from tianshou.env.worker import (
    DummyEnvWorker,
    EnvWorker,
    RayEnvWorker,
    SubprocEnvWorker,
)

  and should_run_async(code)
  from ._conv import register_converters as _register_converters


In [3]:
from env.VRPEnv import VRPEnv
from data.Graph_Generator import OSM_graph
from data.VRPCollector import Collector
from data.BufferManager import ReplayBuffer, VectorReplayBuffer

from nets.attention_model import AttentionModel
from policy.VRPPolicy import REINFORCEPolicy

  and should_run_async(code)


In [4]:
# Load the saved train and test graphs dataset

train_data_path = "./data/train/"+"train_graphs_nu_1000x10.pickle"
test_data_path = "./data/test/"+"test_graphs_nu_100x10.pickle"

with open(train_data_path, 'rb') as train_handle:
    load_train_graphs = pickle.load(train_handle)
    
with open(test_data_path, 'rb') as test_handle:
    load_test_graphs = pickle.load(test_handle)  

  and should_run_async(code)


In [5]:
print(f"# of Training graphs: {len(load_train_graphs)}")
print(f"# of Test graphs: {len(load_test_graphs)}")

n = 10
print(load_train_graphs[n]["node_features"])
print("\n", load_test_graphs[n]["node_features"])

# of Training graphs: 1000
# of Test graphs: 100
tensor([[1.0000, 0.0000, 0.0000, 0.3009, 0.3019],
        [0.0000, 1.0000, 0.2600, 0.3019, 0.3017],
        [0.0000, 1.0000, 0.1200, 0.2996, 0.3011],
        [0.0000, 1.0000, 0.2400, 0.2996, 0.3017],
        [0.0000, 1.0000, 0.1200, 0.3010, 0.3018],
        [0.0000, 1.0000, 0.4600, 0.3005, 0.3012],
        [0.0000, 1.0000, 0.2400, 0.3052, 0.3016],
        [0.0000, 1.0000, 0.2700, 0.3019, 0.3013],
        [0.0000, 1.0000, 0.3800, 0.3044, 0.3018],
        [0.0000, 1.0000, 0.1000, 0.3002, 0.3012],
        [0.0000, 1.0000, 0.4800, 0.3014, 0.3014]])

 tensor([[1.0000, 0.0000, 0.0000, 0.3019, 0.3018],
        [0.0000, 1.0000, 0.1800, 0.3014, 0.3017],
        [0.0000, 1.0000, 0.4600, 0.3001, 0.3015],
        [0.0000, 1.0000, 0.4100, 0.2997, 0.3017],
        [0.0000, 1.0000, 0.1100, 0.3048, 0.3010],
        [0.0000, 1.0000, 0.3000, 0.3006, 0.3015],
        [0.0000, 1.0000, 0.5300, 0.3014, 0.3018],
        [0.0000, 1.0000, 0.2200, 0.3023, 0.3015]

In [6]:
# Setting up Vectorized environments for train and test datasets

train_envs = DummyVectorEnv([lambda instance=graph, idx=i: VRPEnv(instance, idx) for i,graph in enumerate(load_train_graphs)])
test_envs = DummyVectorEnv([lambda instance=graph, idx=i: VRPEnv(instance, idx) for i,graph in enumerate(load_test_graphs)])

In [7]:
model = AttentionModel(
        embedding_dim=64,
        hidden_dim=16,
        n_encode_layers=2,
        tanh_clipping=10.,
        mask_inner=True, 
        mask_logits=True,
        normalization='batch',
        n_heads=8,
        checkpoint_encoder=False,
        shrink_size=None
    )


optim = torch.optim.Adam(model.parameters(), lr=0.00001)
VRPpolicy = REINFORCEPolicy(model, optim)

  @overload(np.MachAr)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if (hasattr(numpy, value)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if (hasattr(numpy, value)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if (hasattr(numpy, value)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if (hasattr(numpy, value)


In [8]:
init_obs = Batch(test_envs.reset())
init_obs.edge_features[-1]

  and should_run_async(code)


tensor([[ -0.0000, -32.1157, -16.7518,  -5.5887, -24.2328, -17.4735,  -4.0078,
          -6.8287,  -5.2380,  -4.3609,  -7.5235],
        [-34.2876,  -0.0000, -48.0456, -23.9146, -11.0286, -44.7945, -37.1529,
         -28.9573, -43.3069, -31.5637, -33.8855],
        [-16.5722, -45.7372,  -0.0000, -21.9506, -37.8543, -28.3257, -13.0659,
         -22.1283, -12.9903, -17.9547, -18.1834],
        [ -5.4611, -23.4132, -21.9878,  -0.0000, -13.7675, -17.7903,  -9.2438,
          -6.8447, -10.3153,  -8.1392, -10.4610],
        [-24.1986, -11.0326, -37.9567, -13.8256,  -0.0000, -27.2828, -27.0639,
         -17.2468, -33.2179, -21.4747, -23.7966],
        [-17.5278, -43.3740, -28.4183, -18.2265, -37.4426,  -0.0000, -19.0960,
         -11.2965, -16.5368, -21.5281, -24.9208],
        [ -3.8145, -34.8929, -12.9117,  -9.1929, -27.0100, -19.1639,  -0.0000,
          -9.4733,  -3.9865,  -7.1105,  -7.3916],
        [ -6.8463, -28.9124, -22.1851,  -7.2146, -17.4165, -11.2231,  -9.3955,
          -0.0000,

In [9]:
#test_envs.reset()
#obs_next = init_obs
#solved = False
#i = 0
#while not solved:
##for _ in range (20):
#    print(f"\nIter {i}------------------------------------------------")
#    action2 = VRPpolicy(obs_next)
#    result2 = test_envs.step(action2.act)
#    obs_next, rew, done, info = result2
#    obs_next = Batch(obs_next)
#    print(done)
#    print(obs_next)
#    solved = done.all()
#    #print(obs_next["curr_pos_idx"])

In [10]:
buffer_size = 100000


test_replaybuffer = VectorReplayBuffer(buffer_size, buffer_num=len(load_test_graphs))
train_replaybuffer = VectorReplayBuffer(buffer_size, buffer_num=len(load_train_graphs))

test_collector = Collector(VRPpolicy, test_envs, test_replaybuffer)
train_collector = Collector(VRPpolicy, train_envs, train_replaybuffer)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if (hasattr(numpy, value)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if (hasattr(numpy, value)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if (hasattr(numpy, value)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if (hasattr(numpy, value)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if (hasattr(numpy, value)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if (hasattr(numpy, value)


VectorReplayBuffer()
Collector reset!
VectorReplayBuffer()
Collector reset!


In [11]:
# Test run for collector

collect_result = test_collector.collect(n_episode=10)
print(collect_result)
print("\nRewards of all episodes are {}".format(collect_result["rews"]))
print("\nAverage episode reward is {}.".format(collect_result["rew"]))
print("\nAverage episode length is {}.".format(collect_result["len"]))

  and should_run_async(code)


{'n/ep': 10, 'n/st': 130, 'rews': array([-118.56819916, -123.47800064, -151.58570051,  -68.28569961,
       -134.17100143, -122.25099969, -141.29860353, -175.48810005,
       -144.99900389, -140.45909989]), 'bl_rews': array([ -57.30839884,  -78.69490027,  -89.54709959,  -47.11660004,
        -91.2872982 ,  -79.67569995,  -87.18690085,  -98.10980082,
       -101.1696035 ,  -87.31719744]), 'lens': array([13, 13, 13, 13, 13, 13, 13, 13, 13, 13]), 'idxs': array([   0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000]), 'rew': -132.0584408402443, 'bl_rew': -81.74134995043278, 'len': 13.0, 'rew_std': 26.48411020587299, 'len_std': 0.0}

Rewards of all episodes are [-118.56819916 -123.47800064 -151.58570051  -68.28569961 -134.17100143
 -122.25099969 -141.29860353 -175.48810005 -144.99900389 -140.45909989]

Average episode reward is -132.0584408402443.

Average episode length is 13.0.
