## NOTE: ENSURE BEFORE RUNNING FINAL EXPERIMENT:

- CHECKS: 
    - Check and update train and test sizes
    - Check and update experiment_idx, training params and model params
    - Check saving directories are in place and ensure no overwriting of existing files there (due to file_paths where saving
    
- After experiment run:
    - Check all saved results and buffers + model
    - Get saved results, observe and document data/plots below
    - Generate tours from buffer & results using the function generate_tours
    - Document required data points in a final consolidated table

In [3]:
# Importing required libraries
!pip install -r requirements.txt

import os 
import sys
import random
import argparse
import warnings
warnings.simplefilter('ignore')
from functools import partial

import gym
from gym import spaces

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import pickle
from ray import tune

import torch
from torch.utils.data import DataLoader, Dataset
from torch.utils.tensorboard import SummaryWriter

Collecting gym
  Using cached gym-0.26.2.tar.gz (721 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting tianshou
  Using cached tianshou-0.5.0-py3-none-any.whl (162 kB)
Collecting ray
  Using cached ray-2.3.0-cp39-cp39-manylinux2014_x86_64.whl (58.6 MB)
Collecting gym-notices>=0.0.4
  Using cached gym_notices-0.0.8-py3-none-any.whl (3.0 kB)
Collecting numba>=0.51.0
  Using cached numba-0.56.4-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (3.5 MB)
Collecting gymnasium>=0.26.0
  Using cached gymnasium-0.27.1-py3-none-any.whl (883 kB)
Collecting virtualenv>=20.0.24
  Using cached virtualenv-20.21.0-py3-none-any.whl (8.7 MB)
Collecting jax-jumpy>=0.2.0
  Using cached jax_jumpy-1.0.0-py3-none-any.whl (20 kB)
Collecting gymnasium-notices>=0.0.1
  Using cached gymnasium_notices-0.0.1-py3-none-any.whl (2.8 kB)
Collecting llvmlite<0.40,>=0.39.0dev0
  

In [4]:
# Modules from tianshou framework

import tianshou
from typing import Any, Callable, List, Optional, Tuple, Union, Dict
from tianshou.env import DummyVectorEnv
from tianshou.data import Batch, to_torch, to_torch_as
from tianshou.policy import BasePolicy
from tianshou.utils import TensorboardLogger


from tianshou.env.worker import (
    DummyEnvWorker,
    EnvWorker,
    RayEnvWorker,
    SubprocEnvWorker,
)

In [5]:
# Derived modules and custom defined classes

from env.VRPEnv import VRPEnv
from policy.VRPPolicy import REINFORCEPolicy
from nets.attention_model_D2 import AttentionModel

from data.VRPCollector import Collector
from data.BufferManager import ReplayBuffer, VectorReplayBuffer
from policy.VRPtrainer import OnpolicyTrainer, onpolicy_trainer
from data.Graph_Viz import decode_buffer, plot_vehicle_routes

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if device == "cuda":
    torch.cuda.get_device_properties(device)

In [9]:
## Parameters of dataset
def load_data(data_dir):
    """
    Load the saved dataset/graphs
    """

    train_data_path = data_dir + "/train/train_graphs_50000x20.pickle"
    test_data_path = data_dir + "/test/test_graphs_10000x20.pickle"

    with open(train_data_path, 'rb') as train_handle:
        load_train_graphs = pickle.load(train_handle)

    with open(test_data_path, 'rb') as test_handle:
        load_test_graphs = pickle.load(test_handle) 
        
    return load_train_graphs, load_test_graphs

In [10]:
# Training process setup
data_dir = "./data"
load_train_graphs, load_test_graphs = load_data(data_dir)

# Training parameters
training_params = {
"experiment_idx" : "A10x10",

# Optimization
"learning_rate" : 0.0001,
"betas" : (0.9, 0.99), # coefficients used for computing running averages of gradient and its square
"weight_decay" : 0.01,  # weight decay coefficient for regularization
"n_epochs" : 30,
"batch_size" : 64,

#Model configuration
"embedding_dim" : 64,
"hidden_dim" : 16,
"n_encode_layers" : 2,

# Trainer and Collector setup (will remain almost same, increase buffer sizes for larger datasets)
"graph_size" : load_train_graphs[0]["node_features"].shape[0] - 1,
"train_graphs" : len(load_train_graphs),
"test_graphs" : len(load_test_graphs),
"train_buffer_size" : 100000,
"test_buffer_size" : 100000,
"repeat_per_collect" : 1,
"test_in_train" : True}

training_params["episode_per_collect"] = training_params["episode_per_test"] = training_params["train_graphs"]
training_params["step_per_epoch"] = training_params["graph_size"] * training_params["train_graphs"]

print(training_params)


model = AttentionModel(
    embedding_dim = training_params["embedding_dim"],
    hidden_dim = training_params["hidden_dim"],
    n_encode_layers = training_params["n_encode_layers"],
    graph_size = training_params["graph_size"],
    tanh_clipping = 10,
    mask_inner = True, 
    mask_logits = True,
    normalization = 'batch',
    n_heads = 8,
    checkpoint_encoder = False,
    shrink_size = None)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optim = torch.optim.AdamW(
    model.parameters(), 
    lr = training_params["learning_rate"],
    betas = training_params["betas"], 
    weight_decay = training_params["weight_decay"],
    eps = 1e-08)

VRPpolicy = REINFORCEPolicy(model, optim)


# Setting up Vectorized environments for train and test datasets
train_envs = DummyVectorEnv([lambda instance=graph, idx=i: VRPEnv(instance, idx) for i,graph in enumerate(load_train_graphs)])
test_envs = DummyVectorEnv([lambda instance=graph, idx=i: VRPEnv(instance, idx) for i,graph in enumerate(load_test_graphs)])

# Setting up Replay Buffers and Collectors
test_replaybuffer = VectorReplayBuffer(training_params["test_buffer_size"], buffer_num=training_params["test_graphs"])
train_replaybuffer = VectorReplayBuffer(training_params["train_buffer_size"], buffer_num=training_params["train_graphs"])
test_collector = Collector(VRPpolicy, test_envs, test_replaybuffer)
train_collector = Collector(VRPpolicy, train_envs, train_replaybuffer)

# Setting up trainer 
logdir = "./logs/"
exp_num = training_params["experiment_idx"]
# Setup Tensorboard logger
log_path = os.path.join(logdir, f"VRPtraining_exp{exp_num}")
writer = SummaryWriter(log_path)
logger = TensorboardLogger(writer)
train_collector.reset()
test_collector.reset()
train_replaybuffer.reset()
test_replaybuffer.reset()
trainer = OnpolicyTrainer(
    VRPpolicy,
    train_collector,
    test_collector,
    max_epoch = training_params["n_epochs"],
    step_per_epoch = training_params["step_per_epoch"],
    repeat_per_collect = training_params["repeat_per_collect"],
    episode_per_test = training_params["episode_per_test"],
    episode_per_collect = training_params["episode_per_collect"],
    batch_size = training_params["batch_size"],
    logger=logger)

{'experiment_idx': 'A10x10', 'learning_rate': 0.0001, 'betas': (0.9, 0.99), 'weight_decay': 0.01, 'n_epochs': 30, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dim': 16, 'n_encode_layers': 2, 'graph_size': 20, 'train_graphs': 50000, 'test_graphs': 10000, 'train_buffer_size': 100000, 'test_buffer_size': 100000, 'repeat_per_collect': 1, 'test_in_train': True, 'episode_per_collect': 50000, 'episode_per_test': 50000, 'step_per_epoch': 1000000}


TypeError: isinstance() arg 2 must be a type or tuple of types

In [None]:
# Train the model and store epoch stats in a dataframe
losses = []
train_stat = []
for epoch, epoch_stat, info in trainer:
    losses.append(-epoch_stat["loss"])
    epoch_stat["epoch"] = epoch
    train_stat.append(epoch_stat)
    print("\n", epoch_stat)
    
    #with tune.checkpoint_dir(epoch) as checkpoint_dir:
    #    path = os.path.join(checkpoint_dir, "checkpoints")
    #    torch.save((model.state_dict(), optim.state_dict()), path)
        
#train_df_cols = epoch_stat.keys()
#train_df = pd.DataFrame(train_stat, columns = train_df_cols)

print("Finished Training")

In [None]:
# Saving the trained model and results

exp_idx = f"model_exp{exp_num}_g{training_params['graph_size']}_train{len(load_train_graphs)}_test{len(load_test_graphs)}"
file_path = "./trained_models/models/" + f"{exp_idx}.pth"
torch.save(model.state_dict(), file_path)


# Collecting test and train results and buffers data
train_result = train_collector.collect(n_episode=len(load_train_graphs))
test_result = test_collector.collect(n_episode=len(load_test_graphs))
train_buffer_df = decode_buffer(train_replaybuffer)
test_buffer_df = decode_buffer(test_replaybuffer)

print("\n\n")
print(f"Train Results: {test_result}")
print(f"Test Results: {test_result}")

res_path = "./trained_models/results/"
#train_df.to_csv(res_path + f"train_df_{exp_idx}", index=False)
train_buffer_df.to_csv(res_path + f"train_bufferdf_{exp_idx}", index=False)
test_buffer_df.to_csv(res_path + f"test_bufferdf_{exp_idx}", index=False)


with open(res_path + f"test_result_{exp_idx}.pickle", 'wb') as handle: 
        pickle.dump(test_result, handle, protocol=pickle.HIGHEST_PROTOCOL)
        

with open(res_path + f"train_result_{exp_idx}.pickle", 'wb') as handle: 
        pickle.dump(train_result, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
with open(res_path + f"params_{exp_idx}.pickle", 'wb') as handle: 
        pickle.dump(training_params, handle, protocol=pickle.HIGHEST_PROTOCOL) 

In [None]:
##################### EXECUTE TRAINING SCRIPT #################################################
#! python "final_model_training_A1.py"

In [None]:
## LOADING SAVED OBJECTS -- USE IF AND WHEN REQUIRED

# Loading the results for model you wanna explore results
#graph_size = 10
#train_size = 100000
#test_size = 100000
#
## Save directories
#exp_idx = f"model_expA1_g{graph_size}_train{train_size}_test{test_size}"
#m_dir = "./trained_models/models/"
#res_dir = "./trained_models/results/"
#
## Loading training params
#with open(res_dir + "params_" + exp_idx, 'rb') as handle:
#    training_params = pickle.load(handle)
#
#    
## Loading saved model
#model = AttentionModel(
#        embedding_dim=training_params["embedding_dim"],
#        hidden_dim=training_params["hidden_dim"],
#        graph_size = training_params["graph_size"],
#        n_encode_layers=training_params["n_encode_layers"],
#        tanh_clipping=10.,
#        mask_inner=True, 
#        mask_logits=True,
#        normalization='batch',
#        n_heads=8,
#        checkpoint_encoder=False,
#        shrink_size=None)
#
#model.load_state_dict(torch.load(m_dir + f"{exp_idx}.pth"))
#
#
## Loading saved results
#with open(res_dir + "test_result_" + exp_idx, 'rb') as handle:
#    train_result = pickle.load(handle)
#    
#with open(res_dir + "test_result_" + exp_idx, 'rb') as handle:
#    train_result = pickle.load(handle)
#    
#train_df = pd.read_csv(res_dir + f"train_df_{exp_idx}")
#train_buffer_df = pd.read_csv(res_dir + f"train_bufferdf_{exp_idx}")
#test_buffer_df = pd.read_csv(res_dir + f"test_bufferdf_{exp_idx}")

In [None]:
# Test result
print(test_result)

In [None]:
#test_buffer_df

In [None]:
# Plotting training loss and avg rewards over iterations

n_epochs = training_params["n_epochs"]
train_df["loss"] = losses
x = [e for e in range (n_epochs)]
default_x_ticks = range(len(x))
fig = plt.figure(figsize=(20 ,5))


plt.subplot(121)
train_df["loss"].plot(style='o--', label="train loss")
train_df['loss'].expanding().mean().plot(style='k-', label="cumm_loss")
plt.legend()
plt.xlabel("epoch")
plt.xticks(default_x_ticks, x, rotation=20)
plt.ylabel("training_loss")


plt.subplot(122)
train_df["rew"].plot(style='bo--', label="avg train reward")
train_df['rew'].expanding().mean().plot(style='k-', label="cumm_rew")
train_df["bl_rew"].plot(style='r--', label="baseline reward")
plt.xlabel("epoch")
plt.xticks(default_x_ticks, x, rotation=20)
plt.ylabel("training vs baseline average rewards")


plt.legend()
plt.show()

In [None]:
# Epoch-wise training stats
train_df

In [None]:
# Studying and Plotting collected train and test solutions (distance values are abs(rewards))

def get_distances(rewards):
    distances = np.array([round(abs(rew),3) for rew in rewards])
    return distances


# Get computed distance values from the reward list
test_distances = get_distances(test_result["rews"])
test_bl_distances = get_distances(test_result["bl_rews"])

train_distances = get_distances(train_result["rews"])
train_bl_distances = get_distances(train_result["bl_rews"])

In [None]:
avg_test_rew, best_test_rew  = round(np.mean(test_distances, axis=0),3), np.min(test_distances, axis=0)
avg_test_rew_bl, best_test_rew_bl = round(np.mean(test_bl_distances, axis=0),3), np.min(test_bl_distances, axis=0)


avg_train_rew, best_train_rew  = round(np.mean(train_distances, axis=0),3), np.min(train_distances, axis=0)
avg_train_rew_bl, best_train_rew_bl = round(np.mean(train_bl_distances, axis=0),3), np.min(train_bl_distances, axis=0)


print("TEST RESULTS")
print(f"Mean test reward: {avg_test_rew}")
print(f"Best test reward: {best_test_rew}")
print(f"\nMean baseline reward: {avg_test_rew_bl}")
print(f"Best baseline reward: {best_test_rew_bl}")

print("\n----------------\n")
print("TRAIN RESULTS")
print(f"Mean train reward: {avg_train_rew}")
print(f"Best train reward: {best_train_rew}")
print(f"\nMean baseline reward: {avg_train_rew_bl}")
print(f"Best baseline reward: {best_train_rew_bl}")

In [None]:
# Plotting test and train distance values

fig = plt.figure(figsize=(15 ,8))

pd.Series(train_distances).plot.kde(style='b-', label="train rewards")
pd.Series(test_distances).plot.kde(style='r-', label="test rewards")

pd.Series(train_bl_distances).plot.kde(style='b--', label="train baseline rewards")
pd.Series(test_bl_distances).plot.kde(style='r--', label="test baseline rewards")

plt.xlabel("Distance")
plt.ylabel("Probability Density")
plt.title("Model vs Baseline Distances (Test/Train)", size=16)
plt.legend()

In [None]:
# Gap between test distances and corresponding baseline solutions
fig = plt.figure(figsize=(8 ,5)) 

gaps = []
for i, dist in enumerate(test_distances):
    gap = (test_distances[i] - test_bl_distances[i]) / test_bl_distances[i]
    gaps.append(gap)
    
    
gaps = np.sort(np.array(gaps))
plt.hist(gaps, bins=10)
#plt.plot(base)

plt.xlabel("Sorting index")
plt.ylabel("Gap % vs Greedy Baseline")
plt.title("Test Distances Gap", size=16)
plt.legend()

In [None]:
# Test gaps -- greedy vs experiment

test_rewards_df = test_buffer_df.groupby(['env_id']).agg(action_count=('action', 'count'), 
                                        reward=('reward', 'sum'), 
                                       bl_reward=('bl_reward', 'sum'))


test_rewards_df["avg_reward"] = test_rewards_df["reward"] * training_params["graph_size"] / test_rewards_df["action_count"]
test_rewards_df["avg_bl_reward"] = test_rewards_df["bl_reward"] * training_params["graph_size"] / test_rewards_df["action_count"]
test_rewards_df["reward_gap"] = test_rewards_df["avg_reward"] - test_rewards_df["avg_bl_reward"]
test_rewards_df["gap"] = test_rewards_df["reward_gap"]*100 / test_rewards_df["avg_bl_reward"]

test_rewards_df.sort_values("gap")

In [None]:
tours, bl_tours = generate_tours(test_buffer_df, test_result)

In [None]:
idx = 50

graph_data = load_test_graphs[idx]
graph_route = tours[idx]
graph_route_bl = bl_tours[idx]

fig, ax = plt.subplots(figsize=(5, 5))
plot_vehicle_routes(graph_data, graph_route, ax, visualize_demands=False, demand_scale=50, round_demand=True)

#plt.subplot(122)
#plot_vehicle_routes(graph_data, graph_route_bl, ax, visualize_demands=False, demand_scale=50, round_demand=True)

In [None]:
fig, ax = plt.subplots(figsize=(5, 5))
plot_vehicle_routes(graph_data, graph_route_bl, ax, visualize_demands=False, demand_scale=50, round_demand=True)