In [1]:
import numpy as np
import pandas as pd

from utils import collate_batch_local_search,split_train_test,collate_batch_ptr
from sklearn.utils.class_weight import compute_class_weight
from torch.utils.data import DataLoader
import torch.nn as nn
import torch
import time


%reload_ext autoreload
%autoreload 2


In [2]:
DEVICE = "cuda" if torch.cuda.is_available() else  "cpu"
print(DEVICE)

cpu


# Evaluation Metric

**Gap**: $100*\frac{model\_time - optimal\_time}{optimal\_time}$

# Load Data

In [38]:
instances = np.load('data/instances.npy')
instances_orders = np.load('data/instances_orders_unpadded.npy')

X,y = instances,instances_orders

# Unpad data

Needed for the graph network

In [9]:
from utils import unpad

for seq in y:
    seq = unpad(seq)

In [38]:
np.save('data/instances_orders_unpadded.npy',y)

# Split data

In [39]:
train_dataset,test_dataset = split_train_test(X,y,0.1)

# Solving TSP with Local Search

In [None]:
from local_search import test_local_search

In [7]:
# We only need to test the model on the test set
test_dataloader = DataLoader(test_dataset,batch_size=256,collate_fn=collate_batch_local_search) #Large batch size to leverage the multiprocessing
test_local_search(test_dataloader)



  2%|▏         | 1/43 [00:21<14:53, 21.28s/it]


KeyboardInterrupt: 

# Graph Convolutional Network

You can run this file with your own hyperparameters to launch the training of a Graph Conv Net. The models checkpoints will automatically be saved under 'graphconvnet/ckpts'

bs :BATCH SIZE
lr : Starting learning rate
epochs : Number of epochs for training

## Training

In [4]:
! python train_gcn.py --epochs 3 --bs 32 --lr 0.01

cuda
3it [00:04,  1.33s/it]^C


## Load model and evaluate it

In [5]:
from graphconvnet.model.gcn_model import ResidualGatedGCNModel
from graphconvnet.model.model_utils import test_gcn
from utils import collate_batch_gcn

In [6]:
net = ResidualGatedGCNModel(torch.cuda.FloatTensor, torch.cuda.LongTensor).to(DEVICE)
checkpoint = torch.load("/home/aymenkallala/TSP_DL/graphconvnet/ckpts/checkpoint_epoch3.pth")
net.load_state_dict(checkpoint['model_state_dict'])

test_dataloader = DataLoader(test_dataset,
                            batch_size=10, #Cannot be larger due to memory concerns (computing beamsearch while generating final preds uses a lot)
                            shuffle=True,
                            collate_fn=collate_batch_gcn)

In [10]:
for  (x_edges,
    x_edges_values,
    x_nodes,
    x_nodes_coord,
    y_edges,
    y_nodes) in test_dataloader:


    edge_labels = y_edges.cpu().numpy().flatten()
    edge_cw = compute_class_weight(
                    "balanced", classes=np.unique(edge_labels), y=edge_labels
                )
    

    pred,loss = net.decode(x_edges,x_edges_values,x_nodes,x_nodes_coord,y_edges,edge_cw)

    print(pred[0])
    print(y_nodes[0])
    break



tensor([ 0,  2, 33, 37,  8, 25, 10,  4,  7, 19, 22,  1, 21, 18, 31, 27, 39, 23,
        11, 24, 26, 28,  5, 16, 14, 38, 34, 12, 15, 29, 35,  9, 20, 36,  3, 32,
        17, 13, 46, 48, 43, 42, 47, 41, 40, 44, 49, 45, 30,  6],
       device='cuda:0')
tensor([ 0, 21,  1, 22, 19, 31, 18, 27, 39, 23, 11, 24, 26, 13, 17, 32,  3, 28,
         5, 16, 14, 36,  7,  4, 10, 35,  9, 20, 38, 34, 12, 15, 29, 30,  6, 25,
         8, 37, 33,  2, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       device='cuda:0')


In [14]:
test_gcn(test_dataloader,net,1,torch.cuda.FloatTensor,torch.cuda.LongTensor)

0it [00:00, ?it/s]




ValueError: not enough values to unpack (expected 6, got 3)

# Transformer Net

In [3]:
from transformernet.transformernet import TSP_net
from tqdm import tqdm
from graphconvnet.model.graph_utils import total_tour_len_nodes


instances = np.load('data/instances.npy')
instances_orders = np.load('data/instances_orders.npy')

X,y = instances,instances_orders
y[y==-1] = 50
train_dataset,test_dataset = split_train_test(X,y,0.1)

In [4]:
LR = 0.01
BATCH_SIZE = 64
NUM_EPOCHS = 1

train_loader = DataLoader(train_dataset,
                        batch_size=BATCH_SIZE,
                        shuffle=True,
                        collate_fn=collate_batch_ptr)

test_loader = DataLoader(test_dataset,
                        batch_size=16,
                        shuffle=True,
                        collate_fn=collate_batch_ptr)

In [6]:
model_train = TSP_net(2, 128, 512, 
              6, 2, 8, 1000,
              False).to("cuda")

RuntimeError: No CUDA GPUs are available

In [36]:
criterion = torch.nn.NLLLoss().to(DEVICE)
optimizer = torch.optim.Adam(model_train.parameters(),lr = LR)

In [37]:
def train_transformernet(dataloader, model, optimizer, criterion, epoch):
    model.train()
    total_loss,total_gap, total_count = 0,0,0
    log_interval = 100
    running_tour_length = 0
    running_gt_length = 0

    for idx, (x, y,mask,dm) in tqdm(enumerate(dataloader)):
        optimizer.zero_grad()
        total_count += y.shape[0]
        #print(dm.shape)

        y_pred,probs,_ = model(x,mask)
        probs = probs.permute(0, 2, 1)

        # Get the loss
        loss = criterion(probs,y)#.view(-1))
        total_loss  += loss.item()
        # Do back propagation
        loss.backward()
        # Clip the gradients at 0.1
        #nn.utils.clip_grad_norm_(model.parameters(), 0.1)
        # Do an optimization step
        optimizer.step()
        
        # Compute the tour lengths
        running_tour_length += total_tour_len_nodes(dm, y_pred)
        running_gt_length += total_tour_len_nodes(dm, y)
        
        
        if idx % log_interval == 0 and idx > 0:
            gap = (
                    100 * (running_tour_length - running_gt_length) / running_gt_length
                )
            print(
                "| epoch {:3d} | {:5d}/{:5d} batches"
                "| gap {:8.3f}".format(epoch, idx, len(dataloader), gap),
                f"| loss {total_loss/total_count}"
            )
            running_tour_length,running_gt_length, total_count,total_loss = 0, 0, 0,0

In [38]:
train_transformernet(test_loader,model_train,optimizer,criterion,1)

0it [00:00, ?it/s]

101it [00:29,  3.43it/s]

| epoch   1 |   100/  679 batches| gap  138.632 | loss -0.0006815082904200374


201it [00:58,  3.36it/s]

| epoch   1 |   200/  679 batches| gap  141.155 | loss -0.0006882467085961252


301it [01:27,  3.48it/s]

| epoch   1 |   300/  679 batches| gap  139.688 | loss -0.0006779999448917806


359it [01:44,  3.44it/s]


KeyboardInterrupt: 