In [10]:
import numpy as np
import time
from tqdm import tqdm


import torch
import torch.optim as optim
from options import get_options
import wandb

import torch.nn.functional as F 
from torch.nn import Linear, BatchNorm1d, ModuleList
from torch_geometric.nn import TransformerConv, TopKPooling 
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp

from VRP_dataset import VRPDataset
from torch_geometric.loader import DataLoader

import warnings
warnings.filterwarnings("ignore", message="indexing with dtype torch.uint8 is now deprecated, please use a dtype torch.bool instead.")


### Config

In [11]:
opts=get_options()

# Set the random seed
torch.manual_seed(opts.seed)
np.random.seed(opts.seed)

wandb.init(project="experiments")

opts.device = str(torch.device("cuda:0" if opts.use_cuda else "mps"))

### Load data

In [12]:
train_dataset = VRPDataset('vrp_data/train')
val_dataset = VRPDataset('vrp_data/validation')

opts.epoch_size = len(train_dataset)


train_dataloader = DataLoader(train_dataset, 
                                batch_size=opts.batch_size, 
                                num_workers=opts.num_workers,
                                shuffle=True, 
                                drop_last=True, 
                                generator=torch.Generator().manual_seed(1234)
                                )

valid_dataloader = DataLoader(val_dataset, 
                                batch_size=opts.batch_size, 
                                num_workers=opts.num_workers,
                                shuffle=True, 
                                drop_last=True, 
                                generator=torch.Generator().manual_seed(1234)
                                )

Processing...


num_neighbor 11
adj matrix tensor([[1, 0, 0,  ..., 0, 0, 0],
        [0, 1, 0,  ..., 0, 0, 0],
        [0, 0, 1,  ..., 1, 0, 0],
        ...,
        [0, 0, 1,  ..., 1, 0, 0],
        [0, 0, 0,  ..., 0, 1, 1],
        [0, 0, 0,  ..., 0, 1, 1]], dtype=torch.uint8)
edge attributes tensor([[0.0000],
        [0.8216],
        [0.3925],
        [0.6909],
        [0.9258],
        [0.6215],
        [0.6468],
        [0.1632],
        [0.1537],
        [0.5218],
        [0.2332],
        [0.8793],
        [0.0000],
        [0.7337],
        [1.0151],
        [0.8169],
        [0.7617],
        [0.2720],
        [0.7608],
        [0.6498],
        [0.3496],
        [0.4701],
        [0.7893],
        [0.5132],
        [0.0000],
        [0.6955],
        [0.6154],
        [0.2567],
        [0.3562],
        [0.6827],
        [0.3884],
        [0.2713],
        [0.5254],
        [0.3891],
        [0.2170],
        [0.4120],
        [0.6955],
        [0.0000],
        [0.2232],
        [0.3946],


Done!


### Define the model

In [13]:
from models.GNN import GNN

model_params = {
    "model_embedding_size": opts.embedding_dim,
    "model_attention_heads": opts.n_heads,
    "model_layers": opts.n_encode_layers,
    "model_dropout_rate": 0.2,
    "model_top_k_ratio": 0.5,
    "model_top_k_every_n": 1,
    "model_dense_neurons": 256,
    "model_edge_dim": None
    }

    
model  = GNN(4, model_params)

### Train

In [14]:
def move_to(var, device):
    if isinstance(var, dict):
        return {k: move_to(v, device) for k, v in var.items()}
    return var.to(device)

def calc_loss(graph_pred, targets):
    return torch.mean((torch.abs(graph_pred/targets - 1) * 100))
    
def validate(model, valid_dataloader):

    print(f'\nValidating...')
    model.eval()
    opt_gap = []

    for idx, bat in enumerate(valid_dataloader):
        with torch.no_grad():
            targets = bat.y.float()

            graph_pred = model(bat.x.float(), 
                    bat.edge_attr,
                    bat.edge_index, 
                    bat.batch)
            opt_gap_batch = calc_loss(graph_pred, targets)
            opt_gap.append(opt_gap_batch)

        wandb.log({f'val optimality gap': opt_gap_batch})

    return np.array(opt_gap).mean()

def train_epoch(
        epoch, 
        model, 
        train_dataloader,
        valid_dataloader,
        optimizer, 
        lr_scheduler,
        opts
    ):
    print("\nStart train epoch {}".format(epoch))
    start_time = time.time()    
    step = epoch * (opts.epoch_size // opts.batch_size)
    
    model.train()
    optimizer.zero_grad()

    for batch_id, batch in enumerate(train_dataloader):

        graph_pred = model(batch.x.float(), 
                    batch.edge_attr,
                    batch.edge_index, 
                    batch.batch) 

        targets = batch.y.float()

        loss = calc_loss(graph_pred, targets)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        step += 1

        if step % int(opts.log_step) == 0:
            wandb.log({'loss': loss, 'graph pred': graph_pred, 'targets': targets })

    lr_scheduler.step(epoch)
    epoch_duration = time.time() - start_time
    print("Finished epoch {}, took {} s".format(epoch, time.strftime('%H:%M:%S', time.gmtime(epoch_duration))))
    
    epoch_avg_opt_gap = validate(model, valid_dataloader)
        
    wandb.log({'validation AOG': epoch_avg_opt_gap})

In [15]:

optimizer = optim.Adam([{'params': model.parameters(), 'lr': opts.lr_model}])
lr_scheduler = optim.lr_scheduler.LambdaLR(optimizer, lambda epoch: opts.lr_decay ** epoch)

wandb.watch(model)

for epoch in range(opts.n_epochs):
    train_epoch(
        epoch, 
        model, 
        train_dataloader,
        valid_dataloader,
        optimizer, 
        lr_scheduler,
        opts
    )


Start train epoch 0


RuntimeError: The size of tensor a (8) must match the size of tensor b (29681) at non-singleton dimension 1

In [9]:
opts.n_epochs

10