In [1]:
import torch
import os
from itertools import zip_longest


def export_train_test_stats(args, start_epoch, train_stats, test_stats):
    fpath = os.path.join(args.output_dir, "loss_gap.csv")
    with open(fpath, 'a') as f:
        f.write((' '.join("{: >16}" for _ in range(8)) + '\n').format(
            "#EP", "#LOSS", "#PROB", "#VAL", "#C_Val", "#TEST_MU", "#TEST_STD", "#TEST_GAP"
        ))
        for epoch, (train, test) in enumerate(zip_longest(train_stats, test_stats), start=start_epoch):
            # train = train if train is not None else ['nan'] * 5
            test = test if test is not None else [0.0, 0.0, 0.0]
            f.write(("{: >16d}" + ' '.join("{: >16.3g}" for _ in range(7)) + '\n').format(
                epoch, *train, *test))


def save_checkpoint(args, epoch, model, optim, lr_scheduler=None):
    checkout = {'epoch': epoch,
                'model': model.state_dict(),
                'optim': optim.state_dict()}

    if args.rate_decay is not None:
        checkout['lr_scheduler'] = lr_scheduler.state_dict()
    torch.save(checkout, os.path.join(args.output_dir, "checkout_epoch{}.pth".format(epoch+1)))


def load_checkpoint(args, model, optim, baseline=None, lr_scheduler=None):
    checkout = torch.load(args.resume_state)
    model.load_state_dict(checkout['model'])
    optim.load_state_dict(checkout['optim'])

    if args.rate_decay is not None:
        lr_scheduler.load_state_dict(checkout['lr_scheduler'])

    return checkout['epoch']



In [2]:
import torch


class DVRPSR_Environment:
    vehicle_feature = 8  # vehicle coordinates(x_i,y_i), veh_time_time_budget, total_travel_time, last_customer,
    # next(destination) customer, last rewards, next rewards
    customer_feature = 4

    # TODO: change pending cost for rewards

    def __init__(self, data, nodes=None, customer_mask=None, edges_attributes=None,
                 pending_cost=1,
                 dynamic_reward=0.2,
                 budget_penalty=10):

        self.vehicle_count = data.vehicle_count
        self.vehicle_speed = data.vehicle_speed
        self.vehicle_time_budget = data.vehicle_time_budget

        self.nodes = data.nodes if nodes is None else nodes
        self.edge_index = data.edges_index
        self.edge_attributes = data.edges_attributes if edges_attributes is None else edges_attributes
        self.init_customer_mask = data.customer_mask if customer_mask is None else customer_mask

        self.minibatch, self.nodes_count, _ = self.nodes.size()
        self.distance_matrix = self.edge_attributes.view((self.minibatch, self.nodes_count, self.nodes_count))
        self.pending_cost = pending_cost
        self.dynamic_reward = dynamic_reward
        self.budget_penalty = budget_penalty

    def _update_current_vehicles(self, dest, customer_index, tau=0):

        # calculate travel time
        # TODO: 1) in real world setting we need to calculate the distance of arc
        # If nodes i and j are directly connected by a road segment (i, j) ∈ A, then t(i,j)=t_ij;
        # otherwise, t(i,j)=t_ik1 +t_k1k2 +...+t_knj, where k1,...,kn ∈ V are the nodes along the
        # shortest path from node i to node j.
        #      2) calculate stating time for each vehicle $\tau $, currently is set to zero

        # update vehicle previous and next customer id
        self.current_vehicle[:, :, 4] = self.current_vehicle[:, :, 5]
        self.current_vehicle[:, :, 5] = customer_index

        # get the distance from current vehicle to its next destination
        dist = torch.zeros((self.minibatch, 1)).to(self.nodes.device)
        for i in range(self.minibatch):
            dist[i, 0] = self.distance_matrix[i][int(self.current_vehicle[i, :, 4])][int(self.current_vehicle[i, :, 5])]

        # total travel time
        tt = dist / self.vehicle_speed

        # customers which are dynamicaly appeared
        dyn_cust = (dest[:, :, 3] > 0).float()

        # budget left while travelling to destination nodes
        budget = tt + dest[:, :, 2]
        # print(budget, tau, tt, dest[:,:,2])

        # update vehicle features based on destination nodes
        self.current_vehicle[:, :, :2] = dest[:, :, :2]
        self.current_vehicle[:, :, 2] -= budget
        self.current_vehicle[:, :, 3] += tt
        self.current_vehicle[:, :, 6] = self.current_vehicle[:, :, 7]
        self.current_vehicle[:, :, 7] = -dist

        # update vehicles states
        self.vehicles = self.vehicles.scatter(1,
                                              self.current_vehicle_index[:, :, None].expand(-1, -1,
                                                                                            self.vehicle_feature),
                                              self.current_vehicle)

        return dist, dyn_cust

    def _done(self, customer_index):

        self.vehicle_done.scatter_(1, self.current_vehicle_index, torch.logical_or((customer_index == 0),
                                                                                   (self.current_vehicle[:, :,
                                                                                    2] <= 0)))
        # print(self.veh_done, cust_idx==0,self.cur_veh[:,:,2]<=0, (cust_idx==0) | (self.cur_veh[:,:,2]<=0))
        self.done = bool(self.vehicle_done.all())

    def _update_mask(self, customer_index):

        self.new_customer = False
        self.served.scatter_(1, customer_index, customer_index > 0)

        # cost for a vehicle to go to customer and back to deport considering service duration
        cost = torch.zeros((self.minibatch, self.nodes_count, 1)).to(self.nodes.device)
        for i in range(self.minibatch):
            for j in range(self.nodes_count):
                dist_vehicle_customer_depot = self.distance_matrix[i][int(self.current_vehicle[i, :, 4])][j] + \
                                              self.distance_matrix[i][j][0]
                cost[i, j] = dist_vehicle_customer_depot

        cost = cost / self.vehicle_speed

        cost += self.nodes[:, :, None, 2]

        overtime_mask = self.current_vehicle[:, :, None, 2] - cost
        overtime_mask = overtime_mask.squeeze(2).unsqueeze(1)
        overtime = torch.zeros_like(self.mask).scatter_(1,
                                                        self.current_vehicle_index[:, :, None].expand(-1, -1,
                                                                                                      self.nodes_count),
                                                        overtime_mask < 0)

        self.mask = self.mask | self.served[:, None, :] | overtime | self.vehicle_done[:, :, None]
        self.mask[:, :, 0] = 0  # depot

    # updating current vehicle to find the next available vehicle
    def _update_next_vehicle(self, veh_index=None):

        if veh_index is None:
            avail = self.vehicles[:, :, 3].clone()
            avail[self.vehicle_done] = float('inf')
            self.current_vehicle_index = avail.argmin(1, keepdim=True)
        else:
            self.current_vehicle_index = veh_index

        self.current_vehicle = self.vehicles.gather(1, self.current_vehicle_index[:, :, None].expand(-1, -1,
                                                                                                     self.vehicle_feature))
        self.current_vehicle_mask = self.mask.gather(1, self.current_vehicle_index[:, :, None].expand(-1, -1,
                                                                                                      self.nodes_count))

    def _update_dynamic_customers(self):

        time = self.current_vehicle[:, :, 3].clone()

        if self.init_customer_mask is None:
            reveal_dyn_reqs = torch.logical_and((self.customer_mask), (self.nodes[:, :, 3] <= time))
        else:
            reveal_dyn_reqs = torch.logical_and((self.customer_mask ^ self.init_customer_mask),
                                                (self.nodes[:, :, 3] <= time))

        if reveal_dyn_reqs.any():
            self.new_customer = True
            self.customer_mask = self.customer_mask ^ reveal_dyn_reqs
            self.mask = self.mask ^ reveal_dyn_reqs[:, None, :].expand(-1, self.vehicle_count, -1)
            self.vehicle_done = torch.logical_and(self.vehicle_done, (reveal_dyn_reqs.any(1) ^ True).unsqueeze(1))
            self.vehicles[:, :, 3] = torch.max(self.vehicles[:, :, 3], time)
            self._update_next_vehicle()

    def reset(self):
        # reset vehicle (minibatch*veh_count*veh_feature)
        self.vehicles = self.nodes.new_zeros((self.minibatch, self.vehicle_count, self.vehicle_feature))
        self.vehicles[:, :, :2] = self.nodes[:, :1, :2]
        self.vehicles[:, :, 2] = self.vehicle_time_budget

        # reset vehicle done
        self.vehicle_done = self.nodes.new_zeros((self.minibatch, self.vehicle_count), dtype=torch.bool)
        self.done = False

        # reset cust_mask
        self.customer_mask = self.nodes[:, :, 3] > 0
        if self.init_customer_mask is not None:
            self.customer_mask = self.customer_mask | self.init_customer_mask

        # reset new customers and served customer since now to zero (all false)
        self.new_customer = True
        self.served = torch.zeros_like(self.customer_mask)

        # reset mask (minibatch*veh_count*nodes)
        self.mask = self.customer_mask[:, None, :].repeat(1, self.vehicle_count, 1)

        # reset current vehicle index, current vehicle, current vehicle mask
        self.current_vehicle_index = self.nodes.new_zeros((self.minibatch, 1), dtype=torch.int64)

        self.current_vehicle = self.vehicles.gather(1,
                                                    self.current_vehicle_index[:, :, None].expand(-1, -1,
                                                                                                  self.vehicle_feature))
        self.current_vehicle_mask = self.mask.gather(1,
                                                     self.current_vehicle_index[:, :, None].expand(-1, -1,
                                                                                                   self.nodes_count))

    def step(self, customer_index, veh_index=None):
        dest = self.nodes.gather(1, customer_index[:, :, None].expand(-1, -1, self.customer_feature))
        dist, dyn_cust = self._update_current_vehicles(dest, customer_index)

        # cust = (dest[:, :, 3] >= 0).float()

        self._done(customer_index)
        self._update_mask(customer_index)
        self._update_next_vehicle(veh_index)

        # reward = -dist * (1 - dyn_cust*self.dynamic_reward)
        reward = self.current_vehicle[:, :, 7] - self.current_vehicle[:, :, 6] + self.dynamic_reward * dyn_cust
        pending_static_customers = torch.logical_and((self.served ^ True),
                                                     (self.nodes[:, :, 3] == 0)).float().sum(-1, keepdim=True) - 1

        reward -= self.pending_cost * pending_static_customers

        if self.done:

            if self.init_customer_mask is not None:
                self.served += self.init_customer_mask
            # penalty for pending customers
            pending_customers = torch.logical_and((self.served ^ True),
                                                  (self.nodes[:, :, 3] >= 0)).float().sum(-1, keepdim=True) - 1

            # TODO: penalty for having unused time budget as well not serving customers
            reward -= self.dynamic_reward * pending_customers

        self._update_dynamic_customers()

        return reward

    def state_dict(self, dest_dict=None):
        if dest_dict is None:
            dest_dict = {'vehicles': self.vehicles,
                         'vehicle_done': self.vehicle_done,
                         'served': self.served,
                         'mask': self.mask,
                         'current_vehicle_index': self.current_vehicle_index}

        else:
            dest_dict["vehicles"].copy_(self.vehicles)
            dest_dict["vehicle_done"].copy_(self.vehicle_done)
            dest_dict["served"].copy_(self.served)
            dest_dict["mask"].copy_(self.mask)
            dest_dict["current_vehicle_index"].copy_(self.current_vehicle_index)

        return dest_dict

    def load_state_dict(self, state_dict):
        self.vehicles.copy_(state_dict["vehicles"])
        self.vehicle_done.copy_(state_dict["vehicle_done"])
        self.served.copy_(state_dict["served"])
        self.mask.copy_(state_dict["mask"])
        self.current_vehicle_index.copy_(state_dict["current_vehicle_index"])

        self.current_vehicle = self.vehicles.gather(1,
                                                    self.current_vehicle_index[:, :, None].expand(-1, -1,
                                                                                                  self.vehicle_feature))
        self.current_vehicle_mask = self.mask.gather(1, self.current_vehicle_index[:, :, None].expand(-1, -1,
                                                                                                      self.customer_feature))


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class GraphMultiHeadAttention(nn.Module):

    def __init__(self, num_head, query_size, key_size=None, value_size=None, edge_dim_size=None, bias=False):

        super(GraphMultiHeadAttention, self).__init__()
        self.num_head = num_head
        self.query_size = query_size

        self.key_size = self.query_size if key_size is None else key_size
        self.value_size = self.key_size if value_size is None else value_size
        self.edge_dim_size = self.query_size // 2 if edge_dim_size is None else edge_dim_size

        self.scaling_factor = self.key_size ** -0.5

        self.keys_per_head = self.key_size // self.num_head
        self.values_per_head = self.value_size // self.num_head
        self.edge_size_per_head = self.edge_dim_size

        self.edge_embedding = nn.Linear(self.edge_dim_size, self.edge_size_per_head, bias=bias)
        self.query_embedding = nn.Linear(self.query_size, self.num_head * self.keys_per_head, bias=bias)
        self.key_embedding = nn.Linear(self.key_size, self.num_head * self.keys_per_head, bias=bias)
        self.value_embedding = nn.Linear(self.value_size, self.num_head * self.values_per_head, bias=bias)
        self.recombine = nn.Linear(self.num_head * self.values_per_head, self.value_size, bias=bias)

        self.K_project_pre = None
        self.V_project_pre = None

        self.initialize_weights()

    def initialize_weights(self):
        # TODO: add xavier initialziation as well

        nn.init.uniform_(self.query_embedding.weight, -self.scaling_factor, self.scaling_factor)
        nn.init.uniform_(self.key_embedding.weight, -self.scaling_factor, self.scaling_factor)
        inv_sq_dv = self.value_size ** -0.5
        nn.init.uniform_(self.value_embedding.weight, -inv_sq_dv, inv_sq_dv)

    def precompute(self, keys, values=None):

        values = keys if values is None else values

        size_KV = keys.size(-2)

        self.K_project_pre = self.key_embedding(keys).view(
            -1, size_KV, self.num_head, self.keys_per_head).permute(0, 2, 3, 1)

        self.V_project_pre = self.value_embedding(values).view(
            -1, size_KV, self.num_head, self.values_per_head).permute(0, 2, 1, 3)

    def forward(self, queries, keys=None, values=None, edge_attributes=None, mask=None, edge_mask=None):

        *batch_size, size_Q, _ = queries.size()

        # get queries projection
        Q_project = self.query_embedding(queries).view(
            -1, size_Q, self.num_head, self.keys_per_head).permute(0, 2, 1, 3)

        # get keys projection
        if keys is None:
            if self.K_project_pre is None:
                size_KV = size_Q
                K_project = self.key_embedding(queries).view(
                    -1, size_KV, self.num_head, self.keys_per_head).permute(0, 2, 3, 1)
            else:
                size_KV = self.K_project_pre.size(-1)
                K_project = self.K_project_pre
        else:
            size_KV = keys.size(-2)
            K_project = self.key_embedding(keys).view(
                -1, size_KV, self.num_head, self.keys_per_head).permute(0, 2, 3, 1)

        # get values projection
        if values is None:
            if self.V_project_pre is None:
                V_project = self.value_embedding(queries).view(
                    -1, size_KV, self.num_head, self.values_per_head).permute(0, 2, 1, 3)
            else:
                V_project = self.V_project_pre
        else:
            V_project = self.value_embedding(values).view(
                -1, size_KV, self.num_head, self.values_per_head).permute(0, 2, 1, 3)

        # calculate the compability
        attention = Q_project.matmul(K_project)
        attention *= self.scaling_factor

        # if edge attributes are required
        if edge_attributes is not None:
            # TODO: edge mask (is it required)
            edge_project = self.edge_embedding(edge_attributes).view(
                -1, size_Q, size_Q, self.edge_size_per_head)

            # get enhanced attention inclusing edge attributes
            attention_expanded = attention.unsqueeze(-1).expand(-1, -1, -1, -1, self.edge_size_per_head)

            # Expand edge attributes to match the number of attention heads
            edge_project_expanded = edge_project.unsqueeze(1).expand(-1, attention.size(1), -1, -1, -1)

            attention = attention_expanded * edge_project_expanded
            attention = attention.mean(-1)

            # print(attention.size())

        if mask is not None:

            if mask.numel() * self.num_head == attention.numel():
                m = mask.view(-1, 1, size_Q, size_KV).expand_as(attention)
            else:
                m = mask.view(-1, 1, 1, size_KV).expand_as(attention)

            attention[m.bool()] = -float('inf')

        attention = F.softmax(attention, dim=-1)
        attention = attention.matmul(V_project).permute(0, 2, 1, 3).contiguous().view(
            *batch_size, size_Q, self.num_head * self.values_per_head)

        output = self.recombine(attention)

        return output



In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class GraphMultiHeadAttention(nn.Module):

    def __init__(self, num_head, query_size, key_size=None, value_size=None, edge_dim_size=None, bias=False):

        super(GraphMultiHeadAttention, self).__init__()
        self.num_head = num_head
        self.query_size = query_size

        self.key_size = self.query_size if key_size is None else key_size
        self.value_size = self.key_size if value_size is None else value_size
        self.edge_dim_size = self.query_size // 2 if edge_dim_size is None else edge_dim_size

        self.scaling_factor = self.key_size ** -0.5

        self.keys_per_head = self.key_size // self.num_head
        self.values_per_head = self.value_size // self.num_head
        self.edge_size_per_head = self.edge_dim_size

        self.edge_embedding = nn.Linear(self.edge_dim_size, self.edge_size_per_head, bias=bias)
        self.query_embedding = nn.Linear(self.query_size, self.num_head * self.keys_per_head, bias=bias)
        self.key_embedding = nn.Linear(self.key_size, self.num_head * self.keys_per_head, bias=bias)
        self.value_embedding = nn.Linear(self.value_size, self.num_head * self.values_per_head, bias=bias)
        self.recombine = nn.Linear(self.num_head * self.values_per_head, self.value_size, bias=bias)

        self.K_project_pre = None
        self.V_project_pre = None

        self.initialize_weights()

    def initialize_weights(self):
        # TODO: add xavier initialziation as well

        nn.init.uniform_(self.query_embedding.weight, -self.scaling_factor, self.scaling_factor)
        nn.init.uniform_(self.key_embedding.weight, -self.scaling_factor, self.scaling_factor)
        inv_sq_dv = self.value_size ** -0.5
        nn.init.uniform_(self.value_embedding.weight, -inv_sq_dv, inv_sq_dv)

    def precompute(self, keys, values=None):

        values = keys if values is None else values

        size_KV = keys.size(-2)

        self.K_project_pre = self.key_embedding(keys).view(
            -1, size_KV, self.num_head, self.keys_per_head).permute(0, 2, 3, 1)

        self.V_project_pre = self.value_embedding(values).view(
            -1, size_KV, self.num_head, self.values_per_head).permute(0, 2, 1, 3)

    def forward(self, queries, keys=None, values=None, edge_attributes=None, mask=None, edge_mask=None):

        *batch_size, size_Q, _ = queries.size()

        # get queries projection
        Q_project = self.query_embedding(queries).view(
            -1, size_Q, self.num_head, self.keys_per_head).permute(0, 2, 1, 3)

        # get keys projection
        if keys is None:
            if self.K_project_pre is None:
                size_KV = size_Q
                K_project = self.key_embedding(queries).view(
                    -1, size_KV, self.num_head, self.keys_per_head).permute(0, 2, 3, 1)
            else:
                size_KV = self.K_project_pre.size(-1)
                K_project = self.K_project_pre
        else:
            size_KV = keys.size(-2)
            K_project = self.key_embedding(keys).view(
                -1, size_KV, self.num_head, self.keys_per_head).permute(0, 2, 3, 1)

        # get values projection
        if values is None:
            if self.V_project_pre is None:
                V_project = self.value_embedding(queries).view(
                    -1, size_KV, self.num_head, self.values_per_head).permute(0, 2, 1, 3)
            else:
                V_project = self.V_project_pre
        else:
            V_project = self.value_embedding(values).view(
                -1, size_KV, self.num_head, self.values_per_head).permute(0, 2, 1, 3)

        # calculate the compability
        attention = Q_project.matmul(K_project)
        attention *= self.scaling_factor

        # if edge attributes are required
        if edge_attributes is not None:
            # TODO: edge mask (is it required)
            edge_project = self.edge_embedding(edge_attributes).view(
                -1, size_Q, size_Q, self.edge_size_per_head)

            # get enhanced attention inclusing edge attributes
            attention_expanded = attention.unsqueeze(-1).expand(-1, -1, -1, -1, self.edge_size_per_head)

            # Expand edge attributes to match the number of attention heads
            edge_project_expanded = edge_project.unsqueeze(1).expand(-1, attention.size(1), -1, -1, -1)

            attention = attention_expanded * edge_project_expanded
            attention = attention.mean(-1)

            # print(attention.size())

        if mask is not None:

            if mask.numel() * self.num_head == attention.numel():
                m = mask.view(-1, 1, size_Q, size_KV).expand_as(attention)
            else:
                m = mask.view(-1, 1, 1, size_KV).expand_as(attention)

            attention[m.bool()] = -float('inf')

        attention = F.softmax(attention, dim=-1)
        attention = attention.matmul(V_project).permute(0, 2, 1, 3).contiguous().view(
            *batch_size, size_Q, self.num_head * self.values_per_head)

        output = self.recombine(attention)

        return output



In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from nets import GraphMultiHeadAttention

class GraphEncoderlayer(nn.Module):

    def __init__(self, num_head, model_size, ff_size):
        super().__init__()

        self.attention = GraphMultiHeadAttention(num_head, query_size=model_size)
        self.BN1 = nn.BatchNorm1d(model_size)
        self.FFN_layer1 = nn.Linear(model_size, ff_size)

        self.FFN_layer2 = nn.Linear(ff_size, model_size)
        self.BN2 = nn.BatchNorm1d(model_size)

    def forward(self, h, e=None, mask=None):
        h_attn = self.attention(h, edge_attributes=e, mask=mask)
        h_bn = self.BN1((h_attn + h).permute(0, 2, 1)).permute(0, 2, 1)

        h_layer1 = F.relu(self.FFN_layer1(h_bn))
        h_layer2 = self.FFN_layer2(h_layer1)

        h_out = self.BN2((h_bn + h_layer2).permute(0, 2, 1)).permute(0, 2, 1)

        if mask is not None:
            h_out[mask] = 0

        return h_out


class GraphEncoder(nn.Module):

    def __init__(self, encoder_layer, num_head, model_size, ff_szie):
        super().__init__()

        for l in range(encoder_layer):
            self.add_module(str(l), GraphEncoderlayer(num_head, model_size, ff_szie))

    def forward(self, h_in, e_in=None, mask=None):

        h = h_in
        e = e_in

        for child in self.children():
            h = child(h, e, mask=mask)
        return h


In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions.categorical import Categorical
from nets import GraphMultiHeadAttention
from nets.Encoder import GraphEncoder

class GraphAttentionModel(nn.Module):

    def __init__(self, customer_feature, vehicle_feature, model_size=128, encoder_layer=3,
                 num_head=8, ff_size=128, tanh_xplor=10, edge_embedding_dim=64, greedy=False):
        super().__init__()

        # get models parameters for encoding-decoding
        self.model_size = model_size
        self.scaling_factor = self.model_size ** 0.5
        self.tanh_xplor = tanh_xplor
        self.greedy = greedy

        # Initialize encoder and embeddings
        self.customer_encoder = GraphEncoder(encoder_layer=3, num_head=8, model_size=128, ff_szie=512)
        self.customer_embedding = nn.Linear(customer_feature, model_size)
        self.depot_embedding = nn.Linear(customer_feature, model_size)

        # initialize edge embedding
        self.edge_embedding = nn.Linear(1, edge_embedding_dim)

        # Initialize vehicle embedding and encoding
        # self.vehicle_embedding = nn.Linear(vehicle_feature, ff_size, bias=False)

        self.fleet_attention = GraphMultiHeadAttention(num_head, vehicle_feature, model_size)

        self.vehicle_attention = GraphMultiHeadAttention(num_head, model_size)

        # customer projection
        self.customer_projection = nn.Linear(self.model_size, self.model_size)  # TODO: MLP instaed of nn.Linear

    def encode_customers(self, env, customer_mask=None):

        customer_emb = torch.cat((self.depot_embedding(env.nodes[:, :1, :]),
                                  self.customer_embedding(env.nodes[:, 1:, :])), dim=1)
        if customer_mask is not None:
            customer_emb[customer_mask] = 0

        edge_emb = self.edge_embedding(env.edge_attributes)

        self.customer_encoding = self.customer_encoder(customer_emb, edge_emb, mask=customer_mask)

        self.fleet_attention.precompute(self.customer_encoding)

        self.customer_representation = self.customer_projection(self.customer_encoding)
        if customer_mask is not None:
            self.customer_representation[customer_mask] = 0

    def vehicle_representation(self, vehicles, vehicle_index, vehicle_mask=None):

        # vehicles_embedding = self.vehicle_embedding(vehicles)

        # print(vehicles_embedding.size(), self.customer_representation.size())

        fleet_representation = self.fleet_attention(vehicles, mask=vehicle_mask)

        #         print(fleet_representation.size())

        vehicle_query = fleet_representation.gather(0, vehicle_index.unsqueeze(2).expand(-1, -1, self.model_size))

        self._vehicle_representation = self.vehicle_attention(vehicle_query,
                                                              fleet_representation,
                                                              fleet_representation)

        return self._vehicle_representation

    def score_customers(self, vehicle_representation):

        # print(vehicle_representation.size(), self.customer_representation.size())
        compact = torch.bmm(vehicle_representation,
                            self.customer_representation.transpose(2, 1))
        compact *= self.scaling_factor

        if self.tanh_xplor is not None:
            compact = self.tanh_xplor * compact.tanh()

        return compact

    def get_prop(self, compact, vehicle_mask=None):

        compact = compact

        compact[vehicle_mask] = -float('inf')
        compact = F.softmax(compact, dim=-1)
        return compact

    def step(self, env, old_action=None):

        _vehicle_representation = self.vehicle_representation(env.vehicles,
                                                              env.current_vehicle_index,
                                                              env.current_vehicle_mask)

        compact = self.score_customers(_vehicle_representation)
        prop = self.get_prop(compact, env.current_vehicle_mask)
        # print(compact.size())

        # step actions based on model act or evalaute
        if old_action is not None:

            # get entropy
            dist = Categorical(prop)
            old_actions_logp = dist.log_prob(old_action[:, 1].unsqueeze(-1))
            entropy = dist.entropy()

            is_done = float(env.done)

            entropy = entropy * (1. - is_done)
            old_actions_logp = old_actions_logp * (1. - is_done)
            return old_action[:, 1].unsqueeze(-1), entropy, old_actions_logp


        else:
            dist = Categorical(prop)

            if self.greedy:
                _, customer_index = p.max(dim=-1)
            else:
                customer_index = dist.sample()

            is_done = float(env.done)

            logp = dist.log_prob(customer_index)
            logp = logp * (1. - is_done)

            return customer_index, logp

    def forward(self, env, old_actions=None, is_update=False):

        if is_update:
            env.reset()
            entropys, old_actions_logps = [], []

            steps = old_actions.size(0)

            for i in range(steps):
                if env.new_customer:
                    self.encode_customers(env, env.customer_mask)

                if i < steps - 1:
                    old_action = old_actions[i, :, :]
                    next_action = old_actions[i + 1, :, :]
                else:
                    # this would be the last action which the agent takes and envrionment is done
                    old_action = old_actions[i, :, :]
                    next_action = old_actions[i, :, :]

                next_vehicle_index = next_action[:, 0].unsqueeze(-1)
                # print(next_vehicle_index)

                customer_index, entropy, logp = self.step(env, old_action)

                env.step(customer_index, next_vehicle_index)

                old_actions_logps.append(logp)
                entropys.append(entropy)

            entropys = torch.cat(entropys, dim=1)
            num_e = entropys.ne(0).float().sum(1)
            entropy = entropys.sum(1) / num_e

            old_actions_logps = torch.cat(old_actions_logps, dim=1)
            old_actions_logps = old_actions_logps.sum(1)

            return entropy, old_actions_logps, 0

        else:
            env.reset()
            actions, logps, rewards = [], [], []

            while not env.done:
                if env.new_customer:
                    self.encode_customers(env, env.customer_mask)

                customer_index, logp = self.step(env)
                actions.append((env.current_vehicle_index, customer_index))
                logps.append(logp)
                rewards.append(env.step(customer_index))

            # actions = torch.cat(actions, dim=1)
            logps = torch.cat(logps, dim=1)
            logp = logps.sum(dim=1)

            rewards = torch.cat(rewards, dim=1)
            rewards = rewards.sum(dim=1)

            return actions, logp, rewards





In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions.categorical import Categorical

class Critic(nn.Module):

    # critic will take environment as imput and ouput the values for loss function
    # which is basically the estimation of complexity of actions

    def __init__(self, model, customers_count, ff_size=512):
        super(Critic, self).__init__()

        self.model = model
        self.ff_layer1 = nn.Linear(customers_count, ff_size)
        self.ff_layer2 = nn.Linear(ff_size, customers_count)

    def eval_step(self, env, compatibility, customer_index):
        compact = compatibility.clone()
        compact[env.current_vehicle_mask] = 0

        value = self.ff_layer1(compact)
        value = F.relu(value)
        value = self.ff_layer2(value)

        val = value.gather(2, customer_index.unsqueeze(1)).expand(-1, 1, -1)
        return val.squeeze(1)

    def __call__(self, env):
        self.model.encode_customers(env)
        env.reset()

        values = []

        while not env.done:
            _vehicle_presentation = self.model.vehicle_representation(env.vehicles,
                                                                      env.current_vehicle_index,
                                                                      env.current_vehicle_mask)
            compatibility = self.model.score_customers(_vehicle_presentation)
            prop = self.model.get_prop(compatibility, env.current_vehicle_mask)
            dist = Categorical(prop)
            customer_index = dist.sample()

            values.append(self.eval_step(env, compatibility, customer_index))

            return values[0]




In [8]:
import torch
import torch.nn as nn
from nets import GraphAttentionModel
from agents.Critic import Critic


class Actor_Critic(nn.Module):

    def __init__(self,
                 customer_feature,
                 vehicle_feature,
                 customers_count,
                 model_size=128,
                 encoder_layer=3,
                 num_head=8,
                 ff_size_actor=128,
                 ff_size_critic=512,
                 tanh_xplor=10,
                 edge_embedding_dim=64,
                 greedy=False):
        super(Actor_Critic, self).__init__()

        model = GraphAttentionModel(customer_feature, vehicle_feature, model_size, encoder_layer,
                                    num_head, ff_size_actor, tanh_xplor, edge_embedding_dim, greedy)
        self.actor = model

        self.critic = Critic(model, customers_count, ff_size_critic)

    def act(self, env, old_actions=None, is_update=False):
        actions, logps, rewards = self.actor(env)
        return actions, logps, rewards

    def evaluate(self, env, old_actions, is_update):
        entropys, old_logps, _ = self.actor(env, old_actions, is_update)
        values = self.critic(env)
        return entropys, old_logps, values


In [9]:
class Memory:

    def __init__(self):
        self.nodes = []
        self.edge_attributes = []
        self.actions = []
        self.rewards = []
        self.log_probs = []

    def clear(self):
        self.nodes.clear()
        self.edge_attributes.clear()
        self.actions.clear()
        self.rewards.clear()
        self.log_probs.clear()


In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.loader import DataLoader
from torch_geometric.data import Data
from torch.optim.lr_scheduler import LambdaLR
import time

from agents.Actor_Critic import Actor_Critic
from problems import DVRPSR_Environment


class AgentPPO:

    def __init__(self,
                 customer_feature,
                 vehicle_feature,
                 customers_count,
                 model_size=128,
                 encoder_layer=3,
                 num_head=8,
                 ff_size_actor=128,
                 ff_size_critic=512,
                 tanh_xplor=10,
                 edge_embedding_dim=64,
                 greedy=False,
                 learning_rate=3e-4,
                 ppo_epoch=3,
                 batch_size=256,
                 entropy_value=0.2,
                 epsilon_clip=0.2,
                 max_grad_norm = 2):

        self.policy = Actor_Critic(customer_feature, vehicle_feature, customers_count, model_size,
                                   encoder_layer, num_head, ff_size_actor, ff_size_critic,
                                   tanh_xplor, edge_embedding_dim, greedy)

        self.old_policy = Actor_Critic(customer_feature, vehicle_feature, customers_count, model_size,
                                       encoder_layer, num_head, ff_size_actor, ff_size_critic,
                                       tanh_xplor, edge_embedding_dim, greedy)

        self.old_policy.load_state_dict(self.policy.state_dict())

        # ppo update parameters
        # self.learning_rate = learning_rate
        self.ppo_epoch = ppo_epoch
        self.batch_size = batch_size
        self.entropy_value = entropy_value
        self.epsilon_clip = epsilon_clip
        self.batch_index = 1

        # initialize the Adam optimizer
        #self.optimizer = torch.optim.Adam(self.policy.parameters(), lr=learning_rate)
        self.MSE_loss = nn.MSELoss()

        # actor-critic parameters
        self.customer_feature = customer_feature
        self.vehicle_feature = vehicle_feature
        self.customers_count = customers_count
        self.model_size = model_size
        self.encoder_layer = encoder_layer
        self.num_head = num_head
        self.ff_size_actor = ff_size_actor
        self.ff_size_critic = ff_size_critic
        self.tanh_xplor = tanh_xplor
        self.edge_embedding_dim = edge_embedding_dim
        self.greedy = greedy
        self.max_grad_norm = max_grad_norm

        self.times, self.losses, self.rewards, self.critic_rewards = [], [], [], []

    def advantage_normalization(self, advantage):

        std = advantage.std()

        assert std != 0. and not torch.isnan(std), 'Need nonzero std'

        norm_advantage = (advantage - advantage.mean()) / (advantage.std() + 1e-8)
        return norm_advantage

    def pad_actions(self, actions):
        max_len = max([a.size(0) for a in actions])
        padded_actions = []
        for a in actions:
            pad_length = max_len - a.size(0)
            padded_a = F.pad(a, (0, 0, 0, pad_length))
            padded_actions.append(padded_a)
        return torch.stack(padded_actions), max_len

    def update(self, memory, epoch, data=None, env=None, optim=None, lr_scheduler=None, device=None):

        old_nodes = torch.stack(memory.nodes)
        old_edge_attributes = torch.stack(memory.edge_attributes)
        old_rewards = torch.stack(memory.rewards).unsqueeze(-1)
        old_log_probs = torch.stack(memory.log_probs).unsqueeze(-1)

        # preprocessing on old actions
        padded_actions, max_length = self.pad_actions(memory.actions)

        # create update data for PPO
        datas = []

        # print(memory.actions.size())

        for i in range(old_nodes.size(0)):
            data_to_load = Data(nodes=old_nodes[i],
                                edge_attributes=old_edge_attributes[i],
                                actions=padded_actions[i],
                                rewards=old_rewards[i],
                                log_probs=old_log_probs[i])

            datas.append(data_to_load)
        # print(datas[0], self.batch_size)

        self.policy.to(device)

        data_loader = DataLoader(datas, batch_size=self.batch_size, shuffle=False)

        # scheduler = LambdaLR(self.optimizer, lr_lambda=lambda f: 0.96 ** epoch)
        value_buffer = 0

        env = env if env is not None else DVRPSR_Environment

        for i in range(self.ppo_epoch):

            self.policy.train()
            epoch_start = time.time()
            start = epoch_start

            self.times, self.losses, self.rewards, self.critic_rewards = [], [], [], []

            for batch_index, minibatch_data in enumerate(data_loader):

                self.batch_index += 1
                minibatch_data.to(device)

                if data.customer_mask is None:
                    nodes = minibatch_data.nodes.to(device)
                    customer_mask = None
                    edge_attributes = minibatch_data.edge_attributes.to(device)

                nodes = nodes.view(self.batch_size, self.customers_count, self.customer_feature)
                edge_attributes = edge_attributes.view(self.batch_size, self.customers_count * self.customers_count, 1)

                old_actions_for_env = minibatch_data.actions.view(self.batch_size, max_length, 2).permute(1, 0, 2).to(device)
                #print(old_actions_for_env)

                dyna_env = env(data, nodes, customer_mask, edge_attributes)

                entropy, log_probs, values = self.policy.evaluate(dyna_env, old_actions_for_env, True)

                # normalize the rewards and get the MSE loss with critics values
                R = minibatch_data.rewards
                R_norm = self.advantage_normalization(R)

                mse_loss = self.MSE_loss(R_norm, values.squeeze(-1))

                # PPO ration (r(0)_t)
                ratio = torch.exp(log_probs - minibatch_data.log_probs)

                # PPO advantage
                advantage = R_norm - values.detach()

                # PPO overall loss function
                actor_loss1 = ratio * advantage
                actor_loss2 = torch.clamp(ratio, 1 - self.epsilon_clip, 1 + self.epsilon_clip) * advantage

                actor_loss = torch.min(actor_loss1, actor_loss2)

                # total loss
                loss = actor_loss + 0.5 * mse_loss - self.entropy_value * entropy

                # optimizer and backpropogation
                #self.optimizer.zero_grad()
                optim.zero_grad()
                loss.mean().backward()
                torch.nn.utils.clip_grad_norm_(self.policy.parameters(), self.max_grad_norm)
                #self.optimizer.step()
                optim.step()

                lr_scheduler.step()

                self.rewards.append(torch.mean(R_norm.detach()).item())
                self.losses.append(torch.mean(loss.detach()).item())
                self.critic_rewards.append(torch.mean(values.detach()).item())

        self.old_policy.load_state_dict(self.policy.state_dict())

        return self.rewards, self.losses, self.critic_rewards

# if __name__ == '__main__':
#     raise Exception('Cannot be called from main')


In [11]:
### configuration
import argparse
from argparse import ArgumentParser
import sys
import json


def write_config_file(args, output_file):
    with open(output_file, 'w') as f:
        json.dump(vars(args), f, indent=4)


def ParseArguments(argv=None):
    parser = ArgumentParser()
    parser = argparse.ArgumentParser(description="Reinforcement Learning for Dynamic VRP with Stochastic Requests")

    parser.add_argument("--config-file", "-f", type=str, default=None,
                        help="configuration file")
    parser.add_argument("--verbose", "-v", action='store_true', default=True,
                        help="Showing information while processing")
    parser.add_argument("--gpu", action='store_true', default=True,
                        help="Use GPU to run the model")
    parser.add_argument("--seed", type=int, default=None, help="seed to regenerate same result")

    ### Data related arguments

    parser.add_argument_group("Data Generation for DVRPSR")
    parser.add_argument("--problem", "-p", type=str, default='DVRPSR',
                        help="problem to solve is DVRPSR")
    parser.add_argument("--vehicle-count", "-m", type=int, default=2,
                        help='number of vehicles for DVRPSR')
    parser.add_argument("--vehicle-speed", type=int, default=1.2,
                        help='speed of vehicle for DVRPSR')
    parser.add_argument("--Lambda", type=float, default=0.025,
                        help='Requests rate per minute')
    parser.add_argument("--dod", type=float, default=0.5,
                        help="Degree of dynamism")
    parser.add_argument("--horizon", type=int, default=400,
                        help='Working time for DVRPSR in minutes')
    parser.add_argument("--customers_count", type=int, default=None,
                        help='Working time for DVRPSR in minutes')
    parser.add_argument("--fDmean", type=int, default=10,
                        help="mean value for service duration of customers")
    parser.add_argument("--fDstd", type=float, default=2.5,
                        help="standard deviation for service duration of customers")
    parser.add_argument("--euclidean", action = 'store_true', default=True,
                        help="Wheather to use Euclidean distance or City street network for distance calculation")

    ### Environment related arguments
    parser.add_argument_group(" Environment for DVRPSR")
    parser.add_argument("--pending-cost", type=int, default=0.2,
                        help='Pending cost for not serving a static customers in routes')
    parser.add_argument("--dynamic-reward", type=int, default=0.5,
                        help="Reward for serving a Dynamic customer")

    parser.add_argument_group(" Graph Attention models ")
    parser.add_argument("--model-size", type=int, default=64,
                        help=" Size of for attention models")
    parser.add_argument("--encoder-layer", type=int, default=3,
                        help='Number of Encoder Layers')
    parser.add_argument("--num-head", type=int, default=8,
                        help='Number of heads in MultiHeadAttention modules')
    parser.add_argument("--ff-size-actor", type=int, default=128,
                        help=" Size of fully connected Feed Forward Networks")
    parser.add_argument("--ff-size-critic", type=int, default=128,
                        help=" Size of fully connected Feed Forward Networks")
    parser.add_argument("--tanh-xplor", type=int, default=10)
    parser.add_argument("--edge_embedding_dim", type=int, default=64,
                        help = 'Edge embedding dimention for edge attributes')

    # PPO Agent Training related arguments
    parser.add_argument_group(" Training PPO Agnet ")
    parser.add_argument("--greedy", action='store_true', default=False,
                        help='weather to use greedy or smapling')
    parser.add_argument("--learning-rate", type=int, default=3e-4,
                        help='Learning rate for PPO agent')
    parser.add_argument("--ppo-epoch", type=int, default=2,
                        help='Epoch for PPO to run the sample and evaluate')
    parser.add_argument("--entropy-value", type=int, default=0.2)
    parser.add_argument("--epsilon-clip", type=int, default=0.2)
    parser.add_argument("--timestep", type=int, default=1)

    parser.add_argument("--epoch-count", "-e", type=int, default=20)
    parser.add_argument("--iter-count", "-i", type=int, default=10)
    parser.add_argument("--batch-size", "-b", type=int, default=32)
    parser.add_argument("--rate-decay", '-d', type=float, default=0.96)
    parser.add_argument("--max-grad-norm", type=float, default=2)
    parser.add_argument("--grad-norm-decay", type=float, default=None)

    ### Testing Related arguments
    parser.add_argument("--test-batch-size", type=int, default=8)

    ### Saving paramters
    parser.add_argument_group("Checkpointing")
    parser.add_argument("--output-dir", "-o", type=str, default=None)
    parser.add_argument("--checkpoint-period", "-c", type=int, default=5)
    parser.add_argument("--resume-state", type=str, default=None)

    args = parser.parse_args(argv)
    if args.config_file is not None:
        with open(args.config_file) as f:
            parser.set_defaults(**json.load(f))

    return parser.parse_args(argv)


In [13]:
from nets import *
from agents import *
from problems import *
from utils import *
from TrainPPOAgent import *
from utils.config import *
from utils.ortool import *
from utils.Misc import *
from utils.save_load import *

import torch
from torch.optim import Adam
from torch.optim.lr_scheduler import LambdaLR
from torch.nn.utils import clip_grad_norm_

import time
import os
from itertools import chain
import tqdm
from tqdm import tqdm

ortool_available = True

def run(args):
    device = torch.device("cpu" if torch.backends.mps.is_available() and args.gpu else "cpu")
    print(device)

    if args.seed is not None:
        torch.manual_seed(args.seed)

    if args.verbose:
        verbose_print = print
    else:
        def verbose_print(*args, **kwargs):
            pass

    ## load DVRPSR problem

    verbose_print("Uploading data for training {}".format(args.iter_count * args.batch_size), end=" ", flush=True)
    train_data = torch.load("./data/train/DVRPSR_{}_{}_{}_{}/normalized_train.pth".format(args.Lambda,
                                                                                     args.dod,
                                                                                     args.vehicle_count,
                                                                                     args.horizon))
    verbose_print("Done")

    verbose_print("Uploading data for testing {}".format(args.test_batch_size), end=" ", flush=True)
    # test data is not normalized
    test_data = torch.load("./data/test/DVRPSR_{}_{}_{}_{}/unnormalized_test.pth".format(args.Lambda,
                                                                                         args.dod,
                                                                                         args.vehicle_count,
                                                                                         args.horizon))
    verbose_print("Done")

    if ortool_available:
        reference_routes = ortool_solve(test_data)
    else:
        reference_routes = None
        verbose_print(" No reference to calculate optimality gap", end=" ", flush=True)

    test_data.normalize()

    ## Defining Environemnt for DVRPSR
    env = {"DVRPSR": DVRPSR_Environment}.get(args.problem)
    env_params = [args.pending_cost,
                  args.dynamic_reward]
    env_test = env(test_data, None, None, None, *env_params)

    if reference_routes is not None:
        reference_costs = eval_apriori_routes(env_test, reference_routes, 100)
        print("Reference cost on test dataset {:5.2f} +- {:5.2f}".format(reference_costs.mean(),
                                                                         reference_costs.std()))

    env_test.nodes = env_test.nodes.to(device)
    env_test.edge_attributes = env_test.edge_attributes.to(device)

    ## PPO agent for DVRPSR
    customer_feature = 4  # customer and vehicle features are fixed
    vehicle_feature = 8

    ## customer counts
    if args.customers_count is None:
        args.customers_count = train_data.customer_count+1
    print(args.customers_count)

    trainppo = TrainPPOAgent(customer_feature, vehicle_feature, args.customers_count, args.model_size,
                             args.encoder_layer, args.num_head, args.ff_size_actor, args.ff_size_critic, args.tanh_xplor,
                             args.edge_embedding_dim, args.greedy, args.learning_rate, args.ppo_epoch, args.batch_size,
                             args.entropy_value, args.epsilon_clip, args.epoch_count, args.timestep, args.max_grad_norm)

    ## Checkpoints
    verbose_print("Creating Output directry...", end=" ", flush=True)
    args.output_dir = "./output/{}_{}_{}_{}_{}".format(
                                                        args.problem.upper(),
                                                        args.Lambda,
                                                        args.dod,
                                                        args.vehicle_count,
                                                        time.strftime("%y%m%d")
    ) if args.output_dir is None else args.output_dir

    os.makedirs(args.output_dir, exist_ok=True)
    write_config_file(args, os.path.join(args.output_dir, "args.json"))
    verbose_print("Create Output dir {}".format(args.output_dir), end=" ", flush=True)

    ## Optimizer and LR Scheduler
    verbose_print("Initializing ADAM Optimizer ...", end=" ", flush=True)
    lr_scheduler = None

    optim = Adam([{"params": trainppo.agent.policy.parameters(), 'lr': args.learning_rate}])

    if args.rate_decay is not None:
        lr_scheduler = LambdaLR(optim,
                                lr_lambda=[lambda epoch: args.learning_rate * args.rate_decay ** epoch])

    if args.resume_state is None:
        start_epoch = 0
    else:
        start_epoch = load_checkpoint(args, trainppo.agent.old_policy, optim, lr_scheduler)

    verbose_print("Running PPO models ")
    train_stats = []
    test_stats = []

    try:
        for epoch in range(start_epoch, args.epoch_count):

            #print('running epoch {}'.format(epoch+1))
            train_stats.append(trainppo.run_train(args, train_data, env, env_params, optim, lr_scheduler, device, epoch))

            agent = trainppo.agent.old_policy

            if reference_routes is not None:
                test_stats.append(trainppo.test_epoch(args, env_test, agent, reference_costs))
            if args.rate_decay is not None:
                lr_scheduler.step()
            if args.grad_norm_decay is not None:
                args.max_grad_norm *= args.grad_norm_decay
            if (epoch + 1) % args.checkpoint_period == 0:
                save_checkpoint(args, epoch, agent, optim, lr_scheduler)

    except KeyboardInterrupt:
        save_checkpoint(args, epoch, agent, optim, lr_scheduler)
    finally:
        export_train_test_stats(args, start_epoch, train_stats, test_stats)


if __name__ == "__main__":
    run(ParseArguments())


cpu
Uploading data for training 640 Done
Uploading data for testing 8 Done


Calling ORTools: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:02<00:00,  3.29it/s]


Reference cost on test dataset -16.82 +-  5.95
21
Creating Output directry... Create Output dir ./output/DVRPSR_0.025_0.5_2_230628 Initializing ADAM Optimizer ... Running PPO models 


Epoch #  1/20 :  10%|███████████▌                                                                                                        | 2/20 [00:29<04:26, 14.81s/it, l=0.8052 p=  0.01814 val=-29.86 c_val=0.2486][E thread_pool.cpp:109] Exception in thread pool task: mutex lock failed: Invalid argument
Epoch #  1/20 :  10%|███████████▌                                                                                                        | 2/20 [00:37<05:36, 18.67s/it, l=0.8052 p=  0.01814 val=-29.86 c_val=0.2486]


UnboundLocalError: local variable 'agent' referenced before assignment