In [None]:
import torch
import torch.nn as nn
import time
import argparse

import os
import datetime
import torch.nn.functional as F
import random

from torch.distributions.categorical import Categorical
import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.optim import lr_scheduler
import matplotlib.pyplot as plt
import time
from tqdm import tqdm_notebook
from tqdm import tqdm_notebook
import math
import numpy as np
import torch
import tqdm
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
import matplotlib.pyplot as plt
from scipy.spatial import distance
# visualization 
%matplotlib inline
from IPython.display import set_matplotlib_formats, clear_output
import matplotlib_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('png2x','pdf')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

device = torch.device("cpu"); gpu_id = -1 # select CPU

gpu_id = '0' # select a single GPU  
#gpu_id = '2,3' # select multiple GPUs  
os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)  
if torch.cuda.is_available():
    device = torch.device("cuda")
    print('GPU name: {:s}, gpu_id: {:s}'.format(torch.cuda.get_device_name(0),gpu_id))   
    
print(device)

# HPN Large

In [None]:
def compute_tour_length(x, tour): 
    """
    Compute the length of a batch of tours
    Inputs : x of size (bsz, nb_nodes, 2) batch of tsp tour instances
             tour of size (bsz, nb_nodes) batch of sequences (node indices) of tsp tours
    Output : L of size (bsz,)             batch of lengths of each tsp tour
    """
    bsz = x.shape[0]
    nb_nodes = x.shape[1]
    arange_vec = torch.arange(bsz, device=x.device)
    first_cities = x[arange_vec, tour[:,0], :] # size(first_cities)=(bsz,2)
    previous_cities = first_cities
    L = torch.zeros(bsz, device=x.device)
    with torch.no_grad():
        for i in range(1,nb_nodes):
            current_cities = x[arange_vec, tour[:,i], :] 
            L += torch.sum( (current_cities - previous_cities)**2 , dim=1 )**0.5 # dist(current, previous node) 
            previous_cities = current_cities
        L += torch.sum((current_cities - first_cities)**2 , dim=1)**0.5 # dist(last, first node)  
    return L

class TransEncoderNet(nn.Module):
    """
    Encoder network based on self-attention transformer
    Inputs :  
      h of size      (bsz, nb_nodes+1, dim_emb)    batch of input cities
    Outputs :  
      h of size      (bsz, nb_nodes+1, dim_emb)    batch of encoded cities
      score of size  (bsz, nb_nodes+1, nb_nodes+1) batch of attention scores
    """
    
    def __init__(self, nb_layers, dim_emb, nb_heads, dim_ff, batchnorm):
        super(TransEncoderNet, self).__init__()
        assert dim_emb == nb_heads* (dim_emb//nb_heads) # check if dim_emb is divisible by nb_heads
        self.MHA_layers = nn.ModuleList( [nn.MultiheadAttention(dim_emb, nb_heads) for _ in range(nb_layers)] )
        self.linear1_layers = nn.ModuleList( [nn.Linear(dim_emb, dim_ff) for _ in range(nb_layers)] )
        self.linear2_layers = nn.ModuleList( [nn.Linear(dim_ff, dim_emb) for _ in range(nb_layers)] )   
        if batchnorm:
            self.norm1_layers = nn.ModuleList( [nn.BatchNorm1d(dim_emb) for _ in range(nb_layers)] )
            self.norm2_layers = nn.ModuleList( [nn.BatchNorm1d(dim_emb) for _ in range(nb_layers)] )
        else:
            self.norm1_layers = nn.ModuleList( [nn.LayerNorm(dim_emb) for _ in range(nb_layers)] )
            self.norm2_layers = nn.ModuleList( [nn.LayerNorm(dim_emb) for _ in range(nb_layers)] )
        self.nb_layers = nb_layers
        self.nb_heads = nb_heads
        self.batchnorm = batchnorm
        
    def forward(self, h):      
        # PyTorch nn.MultiheadAttention requires input size (seq_len, bsz, dim_emb) 
        h = h.transpose(0,1) # size(h)=(nb_nodes, bsz, dim_emb)  
        # L layers
        for i in range(self.nb_layers):
            h_rc = h # residual connection, size(h_rc)=(nb_nodes, bsz, dim_emb)
            h, score = self.MHA_layers[i](h, h, h) # size(h)=(nb_nodes, bsz, dim_emb), size(score)=(bsz, nb_nodes, nb_nodes)
            # add residual connection
            
            h = h_rc + h # size(h)=(nb_nodes, bsz, dim_emb)
            if self.batchnorm:
                # Pytorch nn.BatchNorm1d requires input size (bsz, dim, seq_len)
                h = h.permute(1,2,0).contiguous() # size(h)=(bsz, dim_emb, nb_nodes)
                h = self.norm1_layers[i](h)       # size(h)=(bsz, dim_emb, nb_nodes)
                h = h.permute(2,0,1).contiguous() # size(h)=(nb_nodes, bsz, dim_emb)
            else:
                h = self.norm1_layers[i](h)       # size(h)=(nb_nodes, bsz, dim_emb) 
            # feedforward
            h_rc = h # residual connection
            h = self.linear2_layers[i](torch.relu(self.linear1_layers[i](h)))
            h = h_rc + h # size(h)=(nb_nodes, bsz, dim_emb)
            if self.batchnorm:
                h = h.permute(1,2,0).contiguous() # size(h)=(bsz, dim_emb, nb_nodes)
                h = self.norm2_layers[i](h)       # size(h)=(bsz, dim_emb, nb_nodes)
                h = h.permute(2,0,1).contiguous() # size(h)=(nb_nodes, bsz, dim_emb)
            else:
                h = self.norm2_layers[i](h) # size(h)=(nb_nodes, bsz, dim_emb)
        # Transpose h
        h = h.transpose(0,1) # size(h)=(bsz, nb_nodes, dim_emb)
        return h, score
    
class Attention(nn.Module):
    def __init__(self, n_hidden):
        super(Attention, self).__init__()
        self.size = 0
        self.batch_size = 0
        self.dim = n_hidden
        
        v  = torch.FloatTensor(n_hidden).cuda()
        self.v  = nn.Parameter(v)
        self.v.data.uniform_(-1/math.sqrt(n_hidden), 1/math.sqrt(n_hidden))
        
        # parameters for pointer attention
        self.Wref = nn.Linear(n_hidden, n_hidden)
        self.Wq = nn.Linear(n_hidden, n_hidden)
    
    
    def forward(self, q, ref):       # query and reference
        self.batch_size = q.size(0)
        self.size = int(ref.size(0) / self.batch_size)
        q = self.Wq(q)     # (B, dim)
        ref = self.Wref(ref)
        ref = ref.view(self.batch_size, self.size, self.dim)  # (B, size, dim)
        
        q_ex = q.unsqueeze(1).repeat(1, self.size, 1) # (B, size, dim)
        # v_view: (B, dim, 1)
        v_view = self.v.unsqueeze(0).expand(self.batch_size, self.dim).unsqueeze(2)
        
        # (B, size, dim) * (B, dim, 1)
        u = torch.bmm(torch.tanh(q_ex + ref), v_view).squeeze(2)
        
        return u, ref
    
class LSTM(nn.Module):
    def __init__(self, n_hidden):
        super(LSTM, self).__init__()
        
        # parameters for input gate
        self.Wxi = nn.Linear(n_hidden, n_hidden)    # W(xt)
        self.Whi = nn.Linear(n_hidden, n_hidden)    # W(ht)
        self.wci = nn.Linear(n_hidden, n_hidden)    # w(ct)
        
        # parameters for forget gate
        self.Wxf = nn.Linear(n_hidden, n_hidden)    # W(xt)
        self.Whf = nn.Linear(n_hidden, n_hidden)    # W(ht)
        self.wcf = nn.Linear(n_hidden, n_hidden)    # w(ct)
        
        # parameters for cell gate
        self.Wxc = nn.Linear(n_hidden, n_hidden)    # W(xt)
        self.Whc = nn.Linear(n_hidden, n_hidden)    # W(ht)
        
        # parameters for forget gate
        self.Wxo = nn.Linear(n_hidden, n_hidden)    # W(xt)
        self.Who = nn.Linear(n_hidden, n_hidden)    # W(ht)
        self.wco = nn.Linear(n_hidden, n_hidden)    # w(ct)
    
    
    def forward(self, x, h, c):       # query and reference
        
        # input gate
        i = torch.sigmoid(self.Wxi(x) + self.Whi(h) + self.wci(c))
        # forget gate
        f = torch.sigmoid(self.Wxf(x) + self.Whf(h) + self.wcf(c))
        # cell gate
        c = f * c + i * torch.tanh(self.Wxc(x) + self.Whc(h))
        # output gate
        o = torch.sigmoid(self.Wxo(x) + self.Who(h) + self.wco(c))
        
        h = o * torch.tanh(c)
        
        return h, c

class HPN(nn.Module):
    def __init__(self, n_feature, n_hidden):

        super(HPN, self).__init__()
        self.city_size = 0
        self.batch_size = 0
        self.dim = n_hidden
        
        # pointer layer
        self.pointer = Attention(n_hidden)
        self.TransPointer = Attention(n_hidden)
        
        # lstm encoder
        self.encoder = LSTM(n_hidden)
        
        # trainable first hidden input
        h0 = torch.FloatTensor(n_hidden)
        c0 = torch.FloatTensor(n_hidden)
    
        self.h0 = nn.Parameter(h0)
        self.c0 = nn.Parameter(c0)
        
        self.h0.data.uniform_(-1/math.sqrt(n_hidden), 1/math.sqrt(n_hidden))
        self.c0.data.uniform_(-1/math.sqrt(n_hidden), 1/math.sqrt(n_hidden))
        
        r1 = torch.ones(1)
        r2 = torch.ones(1)
        r3 = torch.ones(1)
        
        self.r1 = nn.Parameter(r1)
        self.r2 = nn.Parameter(r2)
        self.r3 = nn.Parameter(r3)
        
        # embedding
        self.embedding_x = nn.Linear(n_feature, n_hidden)
        self.embedding_all1 = nn.Linear(n_feature, n_hidden)
        self.embedding_all2 = nn.Linear(n_feature + 1, n_hidden)
        self.Transembedding_all = TransEncoderNet(6, 128, 8, 512, batchnorm=True)
        
        # vector to start decoding 
        self.start_placeholder = nn.Parameter(torch.randn(n_hidden))
        
        # weights for GNN
        self.W1 = nn.Linear(n_hidden, n_hidden)
        self.W2 = nn.Linear(n_hidden, n_hidden)
        self.W3 = nn.Linear(n_hidden, n_hidden)
        
        # aggregation function for GNN
        self.agg_1 = nn.Linear(n_hidden, n_hidden)
        self.agg_2 = nn.Linear(n_hidden, n_hidden)
        self.agg_3 = nn.Linear(n_hidden, n_hidden)
    
    
    def forward(self, Transcontext, x, X_all, mask, h=None, c=None, latent=None):
        '''
        Inputs (B: batch size, size: city size, dim: hidden dimension)
        
        x: current city coordinate (B, 2)
        X_all: all cities' cooridnates (B, size, 2)
        mask: mask visited cities
        h: hidden variable (B, dim)
        c: cell gate (B, dim)
        latent: latent pointer vector from previous layer (B, size, dim)
        
        Outputs
        
        softmax: probability distribution of next city (B, size)
        h: hidden variable (B, dim)
        c: cell gate (B, dim)
        latent_u: latent pointer vector for next layer
        '''
        
        self.batch_size = X_all.size(0)
        self.city_size = X_all.size(1)
        
        # Check if this iteration is the first one
        if h is None or c is None:
            # Letting the placeholder be the first input
            x          = self.start_placeholder
            #  init-embedding for All Cities
            context = self.embedding_all1(X_all)
            # Transormer context 
            Transcontext,_ = self.Transembedding_all(context)
            
            Transcontext = Transcontext.reshape(-1, self.dim) # (B, size, dim)
            
            # =============================
            # handling the cell and the hidden state for the first iteration 
            # =============================
            h0 = self.h0.unsqueeze(0).expand(self.batch_size, self.dim)
            c0 = self.c0.unsqueeze(0).expand(self.batch_size, self.dim)
            h0 = h0.unsqueeze(0).contiguous()
            c0 = c0.unsqueeze(0).contiguous()
            # let h0, c0 be the hidden variable of first turn
            h = h0.squeeze(0)
            c = c0.squeeze(0)
        else:
            # =============================
            # Feature context
            # =============================
            X_all      = torch.cat((torch.cdist(X_all,x.view(self.batch_size,1,2),p=2), X_all - x.unsqueeze(1).repeat(1, self.city_size, 1)), 2)
            # sequential input Embedding 
            x          = self.embedding_x(x)
            #  init-embedding for All Cities
            context = self.embedding_all2(X_all)
            
        # =============================
        # graph neural network encoder
        # =============================
        # Handling contextes's size
        context = context.reshape(-1, self.dim)           # (B, size, dim)
        context = self.r1 * self.W1(context) + (1-self.r1) * F.relu(self.agg_1(context/(self.city_size-1)))
        context = self.r2 * self.W2(context) + (1-self.r2) * F.relu(self.agg_2(context/(self.city_size-1)))
        context = self.r3 * self.W3(context) + (1-self.r3) * F.relu(self.agg_3(context/(self.city_size-1)))
        # LSTM encoder
        h, c = self.encoder(x, h, c)
        
        # =============================
        # Decoding Phase
        # ============================= 
        
        u1, _ = self.pointer(h, context)
        u2 ,_ = self.TransPointer(h,Transcontext)
        u = u1 + u2
        latent_u = u.clone()
        u = 100 * torch.tanh(u) + mask
        return Transcontext,F.softmax(u, dim=1), h, c, latent_u

In [None]:
size = 50
TOL  =  1e-3
TINY =  1e-15
learn_rate = 1e-3    # learning rate
B = 128              # batch_size
B_val = 64           # validation size
B_valLoop = 20
size_val = 500
steps = 2500 # training steps
n_epoch = 100       # epochs

print('=========================')
print('prepare to train')
print('=========================')
print('Hyperparameters:')
print('size', size)
print('size_val', size_val)
print('learning rate', learn_rate)
print('batch size', B)
print('validation size', B_val)
print('steps', steps)
print('epoch', n_epoch)
print('=========================')

###################
# Instantiate a training network and a baseline network
###################

try: 
    del Actor # remove existing model
    del Critic # remove existing model
except:
    pass

Actor  = HPN(n_feature=2, n_hidden=128)
Critic = HPN(n_feature=2, n_hidden=128)
optimizer = optim.Adam(Actor.parameters(), lr=learn_rate)
lr_decay_step = 2500
lr_decay_rate = 0.96
opt_scheduler = lr_scheduler.MultiStepLR(optimizer, range(lr_decay_step, lr_decay_step*1000,lr_decay_step), gamma=lr_decay_rate)

# Putting Critic model on the eval mode
Actor = Actor.to(device)
Critic = Critic.to(device)
Critic.eval()

########################
# Remember to first initialize the model and optimizer, then load the dictionary locally.
#######################
epoch_ckpt = 0
tot_time_ckpt = 0
plot_performance_train = []
plot_performance_baseline = []

#********************************************# Uncomment these lines to re-start training with saved checkpoint #********************************************#

#checkpoint_file = "../input/hpnlarge487e/checkpoint_21-08-08--09-15-31-n50-gpu0.pkl"
#checkpoint = torch.load(checkpoint_file, map_location=device)
#epoch_ckpt = checkpoint['epoch'] + 1
#tot_time_ckpt = checkpoint['tot_time']
#plot_performance_train = checkpoint['plot_performance_train']
#plot_performance_baseline = checkpoint['plot_performance_baseline']
#Critic.load_state_dict(checkpoint['model_baseline'])
#Actor.load_state_dict(checkpoint['model_train'])
#optimizer.load_state_dict(checkpoint['optimizer'])

#print('Re-start training with saved checkpoint file={:s}\n  Checkpoint at epoch= {:d} and time={:.3f}min\n'.format(checkpoint_file,epoch_ckpt-1,tot_time_ckpt/60))
#del checkpoint

#*********************************************# Uncomment these lines to re-start training with saved checkpoint #********************************************#


###################
#  Main training loop 
###################
start_training_time = time.time()
time_stamp = datetime.datetime.now().strftime("%y-%m-%d--%H-%M-%S")
C = 0     # baseline
R = 0     # reward
zero_to_bsz = torch.arange(B, device=device) # [0,1,...,bsz-1]
for epoch in range(0,n_epoch):
    # re-start training with saved checkpoint
    epoch += epoch_ckpt
    ###################
    # Train model for one epoch
    ###################
    start = time.time()
    Actor.train()
    for i in range(1,steps+1):
        X = torch.rand(B, size, 2).cuda()                
        mask = torch.zeros(B,size).cuda()
        R = 0
        logprobs = 0
        reward = 0
        Y = X.view(B,size,2)
        x = Y[:,0,:]
        h = None
        c = None
        Transcontext = None 
        #Actor Sampling phase
        for k in range(size):
            Transcontext,output, h, c, _ = Actor(Transcontext,x=x, X_all=X, h=h, c=c, mask=mask)            
            sampler = torch.distributions.Categorical(output)
            idx = sampler.sample()         
            Y1 = Y[zero_to_bsz, idx.data].clone()
            if k == 0:
                Y_ini = Y1.clone()
            if k > 0:
                reward = torch.sum((Y1 - Y0)**2 , dim=1 )**0.5
            Y0 = Y1.clone()  # --> insert current node into prev node for the next iteration
            x = Y[zero_to_bsz, idx.data].clone()
            R += reward
            logprobs += torch.log(output[zero_to_bsz, idx.data] + TINY)
            mask[zero_to_bsz, idx.data] += -np.inf    
        R += torch.sum((Y1 - Y_ini)**2 , dim=1 )**0.5
       
       
        # Critic Baseline phase
        mask = torch.zeros(B,size).cuda()
        C = 0
        baseline = 0
        Y = X.view(B,size,2)
        x = Y[:,0,:]
        h = None
        c = None
        Transcontext = None
        # compute tours for baseline without grad
        with torch.no_grad():
            for k in range(size):
                Transcontext,output, h, c, _ = Critic(Transcontext,x=x, X_all=X, h=h, c=c, mask=mask)
                idx = torch.argmax(output, dim=1) # ----> greedy baseline critic
                Y1 = Y[zero_to_bsz, idx.data].clone()
                if k == 0:
                    Y_ini = Y1.clone()
                if k > 0:
                    baseline  = torch.sum((Y1 - Y0)**2 , dim=1 )**0.5
                Y0 = Y1.clone()
                x = Y[zero_to_bsz, idx.data].clone()
                C += baseline
                mask[zero_to_bsz, idx.data] += -np.inf
        C  += torch.sum((Y1 - Y_ini)**2 , dim=1 )**0.5
       
        ###################
        # Loss and backprop handling 
        ###################
        
        loss = torch.mean((R - C) * logprobs)
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(Actor.parameters(),1.0, norm_type=2)
        optimizer.step()
        opt_scheduler.step()
        
        if i % 50 == 0:
            print("epoch:{}, batch:{}/{}, reward:{}".format(epoch, i, steps, R.mean().item()))
            # R_mean.append(R.mean().item())
            # R_std.append(R.std().item())
            # greedy validation
            
            tour_len = 0
            X_val = np.random.rand(B_val, size_val, 2)
            X = X_val
            X = torch.Tensor(X).cuda()
            mask = torch.zeros(B_val,size_val).cuda()
            R = 0
            logprobs = 0
            Idx = []
            reward = 0
            
            Y = X.view(B_val, size_val, 2)    # to the same batch size
            x = Y[:,0,:]
            h = None
            c = None
            Transcontext = None
            for k in range(size_val):
                Transcontext,output, h, c, _ = Actor(Transcontext,x=x, X_all=X, h=h, c=c, mask=mask)          
                sampler = torch.distributions.Categorical(output)
                # idx = sampler.sample()
                idx = torch.argmax(output, dim=1)
                Idx.append(idx.data)
                Y1 = Y[[i for i in range(B_val)], idx.data]
                if k == 0:
                    Y_ini = Y1.clone()
                if k > 0:
                    reward = torch.norm(Y1-Y0, dim=1)
                Y0 = Y1.clone()
                x = Y[[i for i in range(B_val)], idx.data]
                R += reward
                mask[[i for i in range(B_val)], idx.data] += -np.inf
            R += torch.norm(Y1-Y_ini, dim=1)
            tour_len += R.mean().item()
            print('validation tour length:', tour_len)
            #print('validation tour length:', R.std().item())

                
    time_one_epoch = time.time() - start
    time_tot = time.time() - start_training_time + tot_time_ckpt
    
    ###################
    # Evaluate train model and baseline on 1k random TSP instances
    ###################
    Actor.eval()    
    mean_tour_length_actor = 0
    mean_tour_length_critic = 0
    for step in range(0,B_valLoop):
        # compute tour for model and baseline
        X = np.random.rand(B, size, 2)        
        X = torch.Tensor(X).cuda()
        mask = torch.zeros(B,size).cuda()
        R = 0
        reward = 0
        Y = X.view(B,size,2)
        x = Y[:,0,:]
        h = None
        c = None
        Transcontext = None
        with torch.no_grad():
            for k in range(size):
                Transcontext,output, h, c, _ = Actor(Transcontext,x=x, X_all=X, h=h, c=c, mask=mask)          
                idx = torch.argmax(output, dim=1)
                Y1 = Y[zero_to_bsz, idx.data].clone()
                if k == 0:
                    Y_ini = Y1.clone()
                if k > 0:
                    #reward = torch.linalg.norm(Y1 - Y0, dim=1) # --> Calculation of the distance between two node
                    reward = torch.sum((Y1 - Y0)**2 , dim=1 )**0.5
                Y0 = Y1.clone()  # --> insert current node into prev node for the next iteration
                x = Y[zero_to_bsz, idx.data].clone()
                R += reward
                mask[zero_to_bsz, idx.data] += -np.inf
        #R += torch.linalg.norm(Y1 - Y_ini, dim=1)
        R += torch.sum((Y1 - Y_ini)**2 , dim=1 )**0.5

        # critic baseline
        mask = torch.zeros(B,size).cuda()
        C = 0
        baseline = 0
        Y = X.view(B,size,2)
        x = Y[:,0,:]
        h = None
        c = None
        Transcontext = None
        with torch.no_grad():
            for k in range(size):
                Transcontext,output, h, c, _ = Critic(Transcontext,x=x, X_all=X, h=h, c=c, mask=mask)
                idx = torch.argmax(output, dim=1)  
                Y1 = Y[zero_to_bsz, idx.data].clone()
                if k == 0:
                    Y_ini = Y1.clone()
                if k > 0:
                    #baseline = torch.linalg.norm(Y1-Y0, dim=1)
                    baseline  = torch.sum((Y1 - Y0)**2 , dim=1 )**0.5
                Y0 = Y1.clone()
                x = Y[zero_to_bsz, idx.data].clone()
                C += baseline
                mask[zero_to_bsz, idx.data] += -np.inf
        #C += torch.linalg.norm(Y1-Y_ini, dim=1) # ---> Last point to intial point
        C  += torch.sum((Y1 - Y_ini)**2 , dim=1 )**0.5
        mean_tour_length_actor  += R.mean().item()
        mean_tour_length_critic += C.mean().item()
    mean_tour_length_actor  =  mean_tour_length_actor  / B_valLoop
    mean_tour_length_critic =  mean_tour_length_critic / B_valLoop
    # evaluate train model and baseline and update if train model is better
    update_baseline = mean_tour_length_actor + TOL < mean_tour_length_critic
    print('Avg Actor {} --- Avg Critic {}'.format(mean_tour_length_actor,mean_tour_length_critic))
    if update_baseline:
        Critic.load_state_dict(Actor.state_dict())
        print('My actor is going on the right road Hallelujah :) Updated')
        
    # For checkpoint
    plot_performance_train.append([(epoch+1), mean_tour_length_actor])
    plot_performance_baseline.append([(epoch+1), mean_tour_length_critic])
    # Compute optimality gap
    if size==50: gap_train = mean_tour_length_actor/5.692- 1.0
    elif size==100: gap_train = mean_tour_length_actor/7.765- 1.0
    else: gap_train = -1.0
        
    # Print and save in txt file
    mystring_min = 'Epoch: {:d}, epoch time: {:.3f}min, tot time: {:.3f}day, L_actor: {:.3f}, L_critic: {:.3f}, gap_train(%): {:.3f}, update: {}'.format(
        epoch, time_one_epoch/60, time_tot/86400, mean_tour_length_actor, mean_tour_length_critic, 100 * gap_train, update_baseline)
    
    print(mystring_min)
    print('Save Checkpoints')
    
    # Saving checkpoint
    checkpoint_dir = os.path.join("checkpoint")
    
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
        
    torch.save({
        'epoch': epoch,
        'time': time_one_epoch,
        'tot_time': time_tot,
        'loss': loss.item(),
        'plot_performance_train': plot_performance_train,
        'plot_performance_baseline': plot_performance_baseline,
        'mean_tour_length_val': tour_len,
        'model_baseline': Critic.state_dict(),
        'model_train': Actor.state_dict(),
        'optimizer': optimizer.state_dict(),
        }, '{}.pkl'.format(checkpoint_dir + "/checkpoint_" + time_stamp + "-n{}".format(50) + "-gpu{}".format(gpu_id)))