In [22]:
import cbs
import torch
import torch.nn as nn
import torch.nn.functional as F
import ctraining_data

# basic RESNET style convolution block, 
# these blocks make up the bulk of the network
class SimpleResidualConv2D(nn.Module):
    def __init__(self, channels: int, H: int, W: int, kernel_size: int):
        super().__init__()
        self.params = nn.ParameterList()
        pad = int((kernel_size-1)//2)
        self.conv1 = nn.LazyConv2d(channels, kernel_size, padding=pad, groups=1)
        self.params.extend(self.conv1.parameters())
        self.bn1 = nn.LazyBatchNorm2d()
        self.params.extend(self.bn1.parameters())
        self.conv2 = nn.LazyConv2d(channels, kernel_size, padding=pad, groups=1)
        self.params.extend(self.conv2.parameters())
        self.bn2 = nn.LazyBatchNorm2d()
        self.params.extend(self.bn2.parameters())
    
    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        return F.relu(Y)+X
    
    def parameters(self):
        return self.params

# The "JoeyBlock" chains a SimpleResidualConv2D unit with a 1x1 convolution, merely to 
# introduce an extra layer of full-connections between channels.
# This might be over-kill
class JoeyBlock(nn.Module):
    def __init__(self, channels_in, 
                 channels_out, 
                 Hin, Win, 
                 ckernel_size):
        super().__init__()
        self.params = nn.ParameterList()
        self.channels_in = channels_in
        self.channels_out = channels_out
        self.res = SimpleResidualConv2D(channels_in, Hin, Win, ckernel_size)
        self.params.extend(self.res.parameters())
        self.conv1x1 = nn.Conv2d(channels_in, channels_out, 1)
        self.params.extend(self.conv1x1.parameters())
    
    def forward(self, X):
        return F.relu(self.conv1x1(self.res(X)))
    
    def parameters(self):
        return self.params

# "JoeyNet" consists of a series of "JoeyBlocks", followed by a head stage
# consisting of 3 fully-connected layers that consume the flattened convolution channels.
class JoeyNet(nn.Module):
    def __init__(self, channels_in, Hin, Win, num_blocks, ckernel_size):
        super().__init__()
        self.params = nn.ParameterList()
        features_out = channels_in-1
        self.blocks = [] 
        for i in range(num_blocks):
            block = JoeyBlock(channels_in, channels_in, Hin, Win, ckernel_size)
            self.blocks.append(block)
            self.params.extend(block.parameters())
        
        self.fc = []
        for i in range(1,4):
            fc = nn.LazyLinear(features_out)
            self.fc.append(fc)
            self.params.extend(fc.parameters())
    
    """
    Inputs to the forward pass of the network
    maps: a (N+1, H, W) tensor.
        - the first channel is a map of the empty environment
        - the remaining channels encode A* shortest paths for single agents that start
            on the boundary, and merely the goal location of all other agents. Hence,
            there is uncertainty built into the model.

    astar_delays: a (N,) tensor containing the length of the A* shortest paths for
        each agent that starts on the boundary. This is added to the output of
        the network, so that in effect, the network learns a "correction" to path-lengths
        predicted by A*.
    
    output_mask: a (N,) tensor of 1's and 0's, used to mask (zero) the output channels of 
        network which are associated with agents who's initial position in the environment
        is not known, but whos goal positions are known.
    """
    def forward(self, maps, astar_delays, output_mask):
        Y = maps
        for i in range(len(self.blocks)):
            Y = self.blocks[i](Y)
        Y = torch.flatten(Y, start_dim=1)
        for i in range(len(self.fc)-1):
            Y = F.relu(self.fc[i](Y))
        Y = self.fc[-1](Y)
        return output_mask*(astar_delays + Y)
    
    def parameters(self):
        return self.params
        

In [23]:
"""
Import the training dataset, and construct a data loader.
"""
import ctraining_data
import torch.optim as optim
from torch.utils.data import DataLoader
dataset = ctraining_data.ColumnLatticeDataset('data')
batch_size = 500

# note that multiple processes can be assigned in this constructor,
# so that loading of large batches is sped up (this is a good idea when doing GPU training)
dataloader = DataLoader(dataset, batch_size=batch_size)

In [25]:
"""
Construct the network. In this test, there are N=13 agents maximum
in the environment at any given time. This results in N+1=14 input channels.

I used 10 "JoeyBlocks" to construct the body of the network. They arent massive, but 
it may be overlarge. I'm not sure.

Inside the "JoeyBlocks" are convolution channels, the kernel size is set by ckernel_size.
These blocks, due to their residual nature, include padding to maintain the channel dimensions.
"""
channels_in = 14
Hin = 14
Win = 14
num_blocks = 10
ckernel_size = 5
model = JoeyNet(channels_in, Hin, Win, num_blocks, ckernel_size)

# run a forward pass with gradients disabled to initialize lazy modules
features, labels = next(iter(dataloader))
with torch.no_grad():
    model(*features)

# Using ADAM as our optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Sum of squared errors as loss function
loss_fn = nn.MSELoss(reduction='sum')

# Training for 20 epochs
epochs = 20
epoch_losses = []
for i in range(epochs):
    epoch_loss = 0
    for features, labels in iter(dataloader):
        Y = model(*features)
        loss = loss_fn(Y,labels)
        epoch_loss += loss 
        loss.backward()
        optimizer.step()
    epoch_losses.append(epoch_loss/len(dataset))
    print(f"Epoch loss: {epoch_loss/len(dataset)}")



Epoch loss: 6.966172695159912
Epoch loss: 5.768726348876953
Epoch loss: 4.663089752197266
Epoch loss: 3.6063694953918457
Epoch loss: 2.856055974960327
Epoch loss: 2.4778196811676025
Epoch loss: 2.50622820854187
Epoch loss: 2.914788246154785
Epoch loss: 3.449183702468872
Epoch loss: 3.6854705810546875
Epoch loss: 3.4314069747924805
Epoch loss: 2.901149272918701
Epoch loss: 2.3619682788848877
Epoch loss: 1.9962055683135986
Epoch loss: 1.8531579971313477
Epoch loss: 1.8933385610580444
Epoch loss: 2.040192127227783
Epoch loss: 2.2066726684570312
Epoch loss: 2.2698140144348145
Epoch loss: 2.1687710285186768


In [37]:
# comparing just A* solutions to the CBS solutions
astar_losses = torch.zeros(len(dataset), dtype=torch.float)
i = 0
with torch.no_grad():
    for features, labels in iter(dataloader):
        N_batch = labels.size()[0]
        astar_losses[i:i+N_batch] = loss_fn(features[1], labels)
        i += N_batch
astar_losses.sum()/len(dataset) # about 1000x worse than running the congestion model on the same data.

tensor(-3114.)
tensor(-3040.)
tensor(-3080.)
tensor(-3134.)
tensor(-157.)


tensor(3528.7036)

In [31]:
astar_losses[2]

tensor(3582.)

In [None]:
i = iter(dataloader)
print(len(dataset))

2025


In [None]:
features,labels = next(i)
print(f'Predicted: {model(*features)[1,:]}')
print(f'Actual: {labels[1,:]}')

Predicted: tensor([ 4.1784, 13.7605, 10.7260,  6.5246, 22.0122, 17.6655, -0.0000,  0.0000,
         0.0000, -0.0000, -0.0000, -0.0000, -0.0000], grad_fn=<SliceBackward0>)
Actual: tensor([ 4., 14., 11.,  7., 22., 18.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])
