In [2]:
CONVERTED_DIRS = {'train':'./dataset/converted_train',
                 'val':'./dataset/converted_validation',
                 'test':'./dataset/converted_test'
                 }

RAW_DIRS = {'train':f'./dataset/gnnet-ch21-dataset-{"train"}',
           'val':f'./dataset/gnnet-ch21-dataset-{"validation"}',
           'test':f'./dataset/gnnet-ch21-dataset-{"test"}'
           }

#Debug: Optionally, you can change the directory to some location on an SSD
#CONVERTED_DIRS['train'] = '/usr/converted_train/converted_train'

## Convert dataset to pytorch data

In [3]:

import os
import os.path as osp
import torch
import re
from torch_geometric.data import DataLoader, Dataset

import numpy as np
import torch.nn.functional as F




class ChallengeDataset(Dataset):
    """
    Base class representing a dataset for the challenge.
    
    We assume that the conversion process is already done, i.e. we 
    work with a list of pytorch Data objects stored in .pt files,
    all in the same folder.
    """
    

    def challenge_transform(self,data,converted_path=None,debug=False):
        all_timeparams = ['EqLambda', 'AvgPktsLambda', 'ExpMaxFactor',
                     'MinPktLambda','MaxPktLambda','StdDev','PktsLambdaOn',
                      'AvgTOff','AvgTOn','BurstGenLambda','Bitrate',
                      'ParetoMinSize','ParetoMaxSize','ParetoAlfa'
                     ]

        all_sizeparams = ['MinSize','MaxSize','AvgPktSize','PktSize1',
                         'PktSize2','NumCandidates','Size_i','Prob_i']


        """ 1a. Assert that some stuff remains constant..."""
        if debug:
            assert all(data.p_SizeDist == 2)
            assert all(data.p_TimeDist == 0)
            assert all(data.n_levelsQoS == 1)
            assert all(data.p_ToS == 0.0)
            assert all(data.n_queueSizes == 32)
            assert data.n_schedulingPolicy[0] == "FIFO"
            for a,v in zip(['p_size_AvgPktSize','p_size_PktSize1',
                            'p_size_PktSize2', 'p_time_ExpMaxFactor'],
                        [1000.0,300.0,1700.0,10.0]):
                if not torch.allclose(getattr(data,a), 
                                      v*torch.ones_like(getattr(data,a)),rtol=1e-05):
                    raise Exception(f"{a} was expected to have the value close to {v}")

        del data.p_SizeDist, data.p_TimeDist, data.p_ToS
        del data.n_queueSizes, data.n_levelsQoS, data.n_schedulingPolicy

        """ 1b. Transform p_SizeDist and p_TimeDist into one-hot. We skip it because it 
        does not change in the training dataset.
        """
        #data.p_SizeDist= (F.one_hot(data.p_SizeDist,4))
        #data.p_TimeDist= (F.one_hot(data.p_SizeDist,6))

        """ 
        2. Path attributes; Concatenate Size/Time distribution parameters.

         It turns out all sizeparams have the same value. Useless.
            Otherwise, we'd have:
                data.p_sizeparams = torch.cat([getattr(data,a).view(-1,1) for a in sizeparams],axis=1)

         Also, p_time_ExpMaxFactor is always equal to 10.0, so we delete it
        """
        delattr(data,'p_time_ExpMaxFactor')

        sizeparams = [f'p_size_{a}' for a in ['AvgPktSize','PktSize1',
                     'PktSize2']]
        timeparams = [f'p_time_{a}' for a in ['EqLambda', 'AvgPktsLambda']]
    
        p_params = timeparams + ['p_TotalPktsGen','p_PktsGen','p_AvgBw']
        
        mean_pkts_rate = data.p_time_AvgPktsLambda.mean().item()
        
        assert mean_pkts_rate > 0
        for p in p_params:
            setattr(data,p,getattr(data,p)/mean_pkts_rate)
            
        data.p_time_EqLambda /= 1000.0
        data.p_AvgBw /= 1000.0
        #data.p_time_EqLambda *= 0.0
        data.p_TotalPktsGen *= 0.0
        
        """
        Time parameters: Total Packets Generated (unused)
                         EqLambda (we divide by 1000)
                         Packets Generated
                         Average Packets Generated
                         Average Bandwidth (we divide by 1000)
        
        """
        
        
        data.P = torch.cat([getattr(data,a).view(-1,1) for a in p_params],axis=1)
        
        
        for p in p_params + sizeparams:
            delattr(data,p)
        
        """
            3. Global Attributes
        """
        global_attrs = ['g_delay','g_packets','g_losses','g_AvgPktsLambda']
        data.G = torch.as_tensor([getattr(data,a) for a in global_attrs],device=data.P.device)
        data.G = torch.tile(data.G.view(1,-1),(data.type.shape[0],1))
        for a in global_attrs:
            delattr(data,a)

        """
            4. Link attributes
        """
        data.L = data.l_capacity.clone().view(-1,1) / mean_pkts_rate
        data.mean_pkts_rate = mean_pkts_rate*torch.ones_like(data.type)
        
        data.n_paths = data.P.shape[0]*torch.ones_like(data.type)
        data.n_links = data.L.shape[0]*torch.ones_like(data.type)
        
        
        delattr(data,'l_capacity')
        
        
        if not converted_path is None:
            torch.save(data,converted_path)

        return data
    
    def __init__(self, root_dir,filenames=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the .pt files.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.root_dir = root_dir
        if filenames is None:
            onlyfiles = [f for f in os.listdir(self.root_dir) if osp.isfile(osp.join(self.root_dir, f))]
            self.filenames = [f for f in onlyfiles if f.endswith('.pt')]
        else:
            self.filenames = filenames
        
    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        filename = self.filenames[idx]
        pt_path = osp.join(self.root_dir, filename) 
        
        converted_dir = self.root_dir+'_2'
        #os.makedirs(converted_dir,exist_ok=True)
        converted_path = osp.join(converted_dir, filename) 
        try:
            sample = torch.load(pt_path,map_location='cuda')
        except KeyboardInterrupt:
            raise KeyboardInterrupt
        except:
            print(f"Couldn't load {pt_path}")
        sample = self.challenge_transform(sample,converted_path=None)

        return sample

    


## Divide validation datasets into 3. Initialize datasets/dataloaders


In [4]:
ds_val =  ChallengeDataset(root_dir=CONVERTED_DIRS['val'])
ds_test =  ChallengeDataset(root_dir=CONVERTED_DIRS['test'])

filenames_val = ds_val.filenames

import re
import pandas as pd
import numpy as np
import datanetAPI
import os.path as osp
def converted_filenames_metadata(filenames,path_to_original_dataset):
        import re
        
        def m(f):
            g = re.match("(validation|train|test)\_(\d+)\_(\d+).*",f).groups()
            g  = [g[0], int(g[1]), int(g[2])]
            return g
        
        matches =  [m(f) for f in filenames]
        reader = datanetAPI.DatanetAPI(path_to_original_dataset)
        files_num = np.array([m[1] for m in matches],dtype=np.int32)
        samples_num = np.array([m[2] for m in matches],dtype=np.int32)
        
        all_paths = np.array(reader.get_available_files())
        print(all_paths[0,1])
        df = pd.DataFrame(index=filenames,columns=['full_path','num_nodes','validation_setting'])
        df['full_path'] = all_paths[files_num,0]
        df['sample_num'] = samples_num
        df['file_num'] = files_num
        
        df['num_nodes'] = np.array([osp.split(f)[-1] for f in df['full_path'].values],dtype=np.int32)
        
        if matches[0][0] in ['validation','test']:
            df['validation_setting'] = np.array([osp.split(f)[-2][-1] for f in df['full_path'].values],dtype=np.int32)
        else:
            df['validation_setting'] = -1
            
        """
            Put it in correct order
        """
        df = df.sort_values(by=['validation_setting','num_nodes','file_num','sample_num'])
        return df
        
        
df_val = converted_filenames_metadata(ds_val.filenames,RAW_DIRS['val'])
"""
    We opt to make the validation set smaller (as it is more time consuming to run)
"""
print(df_val['validation_setting'].value_counts())
df_val['filenames'] = df_val.index.values
df_val = df_val.groupby('full_path').head(10)
df_test =  converted_filenames_metadata(ds_test.filenames,RAW_DIRS['test'])
display(df_test)

results_100_400-2000_0_9.tar.gz
1    1040
2    1040
3    1040
Name: validation_setting, dtype: int64
results_100_400-2000_0_9.tar.gz


Unnamed: 0,full_path,num_nodes,validation_setting,sample_num,file_num
test_42_0.pt,./dataset/gnnet-ch21-dataset-test/ch21-test-se...,50,1,0,42
test_42_1.pt,./dataset/gnnet-ch21-dataset-test/ch21-test-se...,50,1,1,42
test_42_2.pt,./dataset/gnnet-ch21-dataset-test/ch21-test-se...,50,1,2,42
test_42_3.pt,./dataset/gnnet-ch21-dataset-test/ch21-test-se...,50,1,3,42
test_42_4.pt,./dataset/gnnet-ch21-dataset-test/ch21-test-se...,50,1,4,42
...,...,...,...,...,...
test_61_5.pt,./dataset/gnnet-ch21-dataset-test/ch21-test-se...,300,3,5,61
test_61_6.pt,./dataset/gnnet-ch21-dataset-test/ch21-test-se...,300,3,6,61
test_61_7.pt,./dataset/gnnet-ch21-dataset-test/ch21-test-se...,300,3,7,61
test_61_8.pt,./dataset/gnnet-ch21-dataset-test/ch21-test-se...,300,3,8,61


In [5]:
BATCH_SIZE = {'train':16,'val':4}
datasets = {"train": ChallengeDataset(root_dir=CONVERTED_DIRS['train']),
            "val":ChallengeDataset(root_dir=CONVERTED_DIRS['val']),
            "test":ChallengeDataset(root_dir=CONVERTED_DIRS['test'],
                                   filenames=list(df_test.index) )
           }
for i in range(3):
    which_files = list(df_val[df_val['validation_setting']==i+1]['filenames'].values)
    ds = ChallengeDataset(root_dir=CONVERTED_DIRS['val'],
                         filenames=which_files)
    datasets[f'val_{i+1}'] = ds
    
    
    
    
dataloaders = {}
for k in datasets.keys():
    if k.startswith('train'):
        batch_size = BATCH_SIZE['train']
    else:
        batch_size = BATCH_SIZE['val']
    dataloaders[k] = DataLoader(datasets[k],batch_size=batch_size,shuffle=False)
    dataloaders[k+"_s"] = DataLoader(datasets[k],batch_size=batch_size,shuffle=True)
    print(f"Dataset Len ({k}): {len(datasets[k])}")
    print(f"Dataloader Len ({k}): {len(dataloaders[k])}")



Dataset Len (train): 120000
Dataloader Len (train): 7500
Dataset Len (val): 3120
Dataloader Len (val): 780
Dataset Len (test): 1560
Dataloader Len (test): 390
Dataset Len (val_1): 260
Dataloader Len (val_1): 65
Dataset Len (val_2): 260
Dataloader Len (val_2): 65
Dataset Len (val_3): 260
Dataloader Len (val_3): 65


<h2><b>(Debug)</b></h2> Optionally, run the following to ensure that the training, validation and test datasets are correctly loaded

In [250]:
from tqdm import tqdm
import numpy as np

DEBUG_MODES  = ['train','val','test']
for mode in DEBUG_MODES:
    for i in tqdm(range(len(datasets[mode]))):
        X = datasets[mode][i]
       


 17%|█▋        | 259/1560 [00:02<00:14, 87.76it/s] 


KeyboardInterrupt: 

In [6]:
%matplotlib inline
import matplotlib.pyplot as plt

import warnings
import numpy as np
import pandas as pd
import scipy.stats as st
from scipy.stats._continuous_distns import _distn_names
import matplotlib
matplotlib.rcParams['figure.figsize'] = (16.0, 12.0)
matplotlib.style.use('ggplot')

def get_train_distribution_statistics(ds_train,attrs_to_normalize,calc_distr=False):
    torch.manual_seed(42)
    dl_train_oneshot = DataLoader(ds_train,batch_size=500,shuffle=True)
    for sample in dl_train_oneshot:
        print(sample)
        means=dict([(k, torch.mean(getattr(sample,k).float(),axis=0))\
                   for k in attrs_to_normalize])
        std=dict([(k, torch.std(getattr(sample,k).float(),axis=0))\
                   for k in attrs_to_normalize])
        sample.out_occupancy.cpu().numpy()
        break
    return means, std

means, stds = get_train_distribution_statistics(datasets['train'],['P','G','L','out_occupancy','out_delay'])



Batch(G=[836909, 4], L=[92884, 1], P=[725230, 5], batch=[836909], edge_index=[2, 4427010], edge_type=[4427010], mean_pkts_rate=[836909], n_links=[836909], n_paths=[836909], out_delay=[725230], out_occupancy=[92884], ptr=[501], type=[836909])


In [10]:
import torch
import pandas as pd
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree

from torch.nn import Sequential, Linear, BatchNorm1d, ReLU, Sigmoid
from torch_geometric.nn.conv import GATConv,TransformerConv,GCNConv,GINConv

from torch_geometric_temporal.nn.recurrent import GConvGRU
import torch.nn.functional as F
from torch_scatter import scatter

def separate_edge_timesteps(edge_index,edge_type):
    all_edges= [[] for _ in range(3)]
    for et in [0,1,2]:
        et_edges = edge_index[:,edge_type==et]

        init_tensor = torch.cat([torch.ones(1,device=et_edges.device).long(),torch.diff(et_edges[0,:])],dim=0)
        init_tensor = torch.clip(torch.abs(init_tensor),0.,1.)
        # [1, 0, 0, 0, 1, 0, 0, 1] where 0 iff edge source equal
        """  Debug: 
        init_tensor = torch.as_tensor([1,0, 0, 0, 1, 0, 0, 1] ) 
        sol =  [0,1,2,3,0,1,2,0]
        """
        # [0, 1, 1, 1, 0, 1, 1, 0] where 1 iff edge source equal
        init_tensor = 1 - init_tensor
        # [0, 1, 1, 1, -4, 1, 1, -3] where 1 iff edge source equal
        count_tensor = torch.nonzero(1-init_tensor).view(-1)
        init_tensor[count_tensor[1:]] = -torch.diff(count_tensor) +1
        # [0, 1, 2, 3, 0, 1, 2, 0] where 1 iff edge source equal
        init_tensor = init_tensor.cumsum(axis=0)

        # Will list all dsts that were the first linked to some src, then all second, etc..
        ensure_stable = torch.linspace(start=0.0,end=0.5,steps=init_tensor.shape[0],device=init_tensor.device)
        encountered_order = torch.sort(init_tensor+ensure_stable)[1]
        et_edges = et_edges[:,encountered_order]
        
        
        #vals[i] == number of edges that belong to time step i
        idxs, vals = torch.unique(init_tensor,return_counts=True)
        vs = [x for x in torch.split_with_sizes(et_edges,tuple(vals),dim=1)]
         
        
        #if not torch.as_tensor([v.shape[1] for v in vs]).sum().item() == et_edges.shape[1]:
        #    raise f"Sum of disjoint timesteps is {torch.as_tensor([v.shape[0] for v in vs]).sum()} but should be {et_edges.shape[1]}"

        all_edges[et] = vs
        
    return all_edges

import torch
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree


class Baseline(torch.nn.Module):

    
    def __init__(self,num_iterations=5,G_dim = 4,P_dim =5,L_dim = 1,**kwargs):
        super(Baseline, self).__init__(**kwargs)
        self.num_iterations = num_iterations
        self.G_dim, self.P_dim, self.L_dim = G_dim, P_dim, L_dim
        self.H_n, self.H_p, self.H_l = 2,2,2
    
    def forward(self,data,means,stds,naive=True,mode=None):
        edge_index = data.edge_index.long()
        edge_type = data.edge_type.clone()
        
        is_p = data.type==0
        is_l = data.type==1
        is_n = data.type==2       
        
        
        all_edges = separate_edge_timesteps(edge_index,edge_type)
        
        """ Each element $i$ of pl_by_time contains all edges that occur at  
            position $i$ in some path.
        """
        pl_at_time = all_edges[0]
        
        edges_pl = edge_index[:,edge_type==0]
        edges_pn = edge_index[:,edge_type==1]
        edges_ln = edge_index[:,edge_type==2]    
        

        G_dim, P_dim, L_dim = self.G_dim, self.P_dim, self.L_dim
        H_n, H_p, H_l = self.H_n, self.H_p, self.H_l
        
        n_p = torch.sum(is_p)
        n_l = torch.sum(is_l)
        n_n = torch.sum(is_n)
        
        """ Get true value of P,L,G"""
        P = data.P * data.mean_pkts_rate[is_p].view(-1,1)
        L = data.L * data.mean_pkts_rate[is_l].view(-1,1)
        L = L / 1000
        G = data.G 
        
          
        
        cnt = 0
        cnt, node_hidden = cnt+H_n,   slice(cnt,cnt+H_n)
        cnt, node_og     = cnt+G_dim, slice(cnt,cnt+G_dim)
        cnt, link_hidden = cnt+H_l,   slice(cnt,cnt+H_l)
        cnt, link_og     = cnt+L_dim, slice(cnt,cnt+L_dim)
        cnt, path_hidden = cnt+H_p,   slice(cnt,cnt+H_p)
        cnt, path_og     = cnt+P_dim, slice(cnt,cnt+P_dim)
        
        cnt = 0
        cnt, node_all = cnt+H_n+G_dim, slice(cnt,cnt+H_n+G_dim)
        cnt, link_all = cnt+H_l+L_dim, slice(cnt,cnt+H_l+L_dim)
        cnt, path_all = cnt+H_p+P_dim, slice(cnt,cnt+H_p+P_dim)
        
        X = torch.zeros(data.G.size(0),H_n+G_dim+H_p+P_dim+H_l+L_dim,device='cuda')
        X[:,node_og] = G[:,:]
        X[is_l,link_og] = L
        X[is_p,path_og] = P
        
        """
            Get Average bandwidth
        """
        A = X[:,path_og.stop-2].view(-1,).clone() #Avg pkts sent
        blocking_probs =  0.3*torch.ones_like(A)
        
        max_numpaths = len(pl_at_time) 
        T =  torch.zeros(X.size(0),device=A.device)
        rhos =  torch.zeros(X.size(0),device=A.device)      
        
        """
            \trafic[k]_{i}: traffic passing on some edge that appears in order k at path
        """
        def update_traffic(L,T,A,pl_at_time,blocking_probs):
            multiplier = 1.0
            T = torch.zeros_like(T)
            N =  torch.zeros_like(T)
            for k in range(max_numpaths):
                
                if k == 0:
                    """ Just map the demand on the respective path"""
                    traffic = A.clone()
                else:
                    prev_paths = pl_at_time[k-1][0,:]
                    prev_edges = pl_at_time[k-1][1,:]
                    prev_edges_block_probs = torch.gather(blocking_probs,dim=0,
                                                         index=prev_edges)

                    traffic[prev_paths] *= (1.0 - prev_edges_block_probs)
                    
                which_paths = pl_at_time[k][0,:]
                which_edges = pl_at_time[k][1,:]
                T += scatter(src=torch.gather(traffic,0,which_paths),
                             index=which_edges,
                            dim=0,dim_size=X.size(0),reduce='sum')
                N += scatter(src=torch.ones_like(torch.gather(traffic,0,which_paths)),
                             index=which_edges,
                            dim=0,dim_size=X.size(0),reduce='sum')
                #print(T[is_l].mean())    
            #T = T/torch.maximum(N,torch.ones_like(N))
            #T /= max_numpaths
            return T,N
        B = buffer_size = 32
        def update_blocking_probs(L,T,A,pl_at_time,blocking_probs):
            blocking_probs = 0.0*blocking_probs
            rhos = 0.0*blocking_probs      
            rhos[is_l] = T[is_l] / X[is_l,link_og.start]
            #print(rhos[is_l].mean())
            
            blocking_probs_num = (1.0 - rhos) * torch.pow(rhos,buffer_size)
            blocking_probs_den = 1.0 - torch.pow(rhos,buffer_size+1)
            return blocking_probs_num/(blocking_probs_den+1e-08)
        
        for t in range(self.num_iterations):
            T, N = update_traffic(L,T,A,pl_at_time,blocking_probs)
            #print("mean traffic: ",T[is_l].mean().item())
            
            blocking_probs = update_blocking_probs(L,T,A,pl_at_time,blocking_probs)

            #print("mean block p.: ",blocking_probs[is_l].mean().item())
            rhos = T[is_l] / (X[is_l,link_og.start])
            pi_0 = (1 - rhos)/(1-torch.pow(rhos,B+1))
            res = 1*pi_0
            for j in range(32):
                pi_0 = pi_0*rhos
                res += (j+1)*pi_0
                
            res = res/32

        L = res 
        
        """ To predict the node, we use the formula:
        
            path delay ~= \sum_{i=0}^{n_links} delay_link(i)
            where 
                delay_link(i) := avg_utilization_{i} * (queue_size_{i}/link_capacity_{i})
                
            Our NN predicts avg_utilization_{i} \in [0,1], \forall i. 
            For this dataset, we have \forall i: queue_size_{i} =32000
        """
        X = torch.zeros(X.size(0), device=X.device)
        data_L = data.L.squeeze(-1)
        link_capacity = data.L.squeeze(-1) * data.mean_pkts_rate[is_l]
        X[is_l] = L.squeeze(-1)  * 32000.0  / link_capacity
        E   = torch.gather(X,index=edges_pl[1,:],dim=0)
        res = scatter(src=E,index=edges_pl[0,:],dim=0,dim_size=X.size(0),reduce='sum')
        res = res[is_p]
        return res, L
        
        

class ChallengeModel(torch.nn.Module):

    
    def __init__(self,num_iterations=3,G_dim = 4,P_dim =5,L_dim = 1,**kwargs):
        super(ChallengeModel, self).__init__(**kwargs)
        self.num_iterations = num_iterations
        self.G_dim = G_dim
        self.P_dim = P_dim
        self.L_dim = L_dim
        
        self.H = 64
        self.H_p = 64
        self.H_l = 64
        self.H_n = 64
        
        self.conv_pn_1 = []
        self.conv_ln_1 = []
        self.conv_pn_r_1 = []
        self.conv_ln_r_1 = []
        self.conv_pl_1 = []
        self.conv_pl_r_1 = []
        
        for i in range(self.num_iterations):
            self.conv_pn_1.append(GATConv(self.H_p+self.P_dim,self.H_n).cuda())
            self.conv_ln_1.append(GATConv(self.H_l+L_dim,self.H_n).cuda())
            self.conv_pl_1.append(GATConv(self.H_p+P_dim,self.H_l).cuda())

            self.conv_pn_r_1.append(GATConv(self.H_n+G_dim,self.H_p,flow="target_to_source").cuda())
            self.conv_ln_r_1 .append(GATConv(self.H_n+G_dim,self.H_l,flow="target_to_source").cuda())
        
            self.conv_pl_r_1.append(GConvGRU(self.H_l+L_dim,self.H_p,K=2).cuda())
        
        self.finalconv = GCNConv(self.H_p+P_dim,self.H_l,normalize=True).cuda()
        
        for c in ['conv_pn','conv_ln','conv_pl']:
            setattr(self,c+'_1',torch.nn.ModuleList(getattr(self,c+'_1')))
            setattr(self,c+'_r_1',torch.nn.ModuleList(getattr(self,c+'_r_1')))
        
        self.lin1 = torch.nn.Linear(L_dim+self.H_l,512)
        self.lin2 = torch.nn.Linear(512,512)
        self.lin3 = torch.nn.Linear(512,1)
        self.xlin1 = torch.nn.Linear(self.H_n+self.G_dim+self.H_p+self.P_dim+self.H_l+self.L_dim,128)
        self.xlin2 = torch.nn.Linear(128,self.H_n+self.G_dim+self.H_p+self.P_dim+self.H_l+self.L_dim)
        
    
    def forward(self,data,means,stds,naive=True,mode=None,baseline_occup=None,baseline_out=None):
        edge_index = data.edge_index.long()
        edge_type = data.edge_type.clone()
        
        is_p = data.type==0
        is_l = data.type==1
        is_n = data.type==2       
        
        
        all_edges = separate_edge_timesteps(edge_index,edge_type)
        vs = all_edges[0]
        
        edges_pl = edge_index[:,edge_type==0]
        edges_pn = edge_index[:,edge_type==1]
        edges_ln = edge_index[:,edge_type==2]    
        
        #print(f"Edges between path and link: {edges_pl.shape[1]}")
        #print(f"Edges between path and node: {edges_pn.shape[1]}")
        #print(f"Edges between link and node: {edges_ln.shape[1]}")
    
        
        G_dim, P_dim, L_dim = self.G_dim, self.P_dim, self.L_dim
        H_n, H_p, H_l = self.H_n, self.H_p, self.H_l
        
        n_p = torch.sum(is_p)
        n_l = torch.sum(is_l)
        n_n = torch.sum(is_n)
        
        P = (data.P - means['P'])/(1e-08+stds['P'])
        #P =  data.P
        G = 0.0*(data.G - means['G'])/stds['G']
        G[:,2] = 0*data.G[:,2]
        L = (data.L - means['L'])/stds['L']
 
        
        
        
        X = torch.zeros(data.G.size(0),H_n+G_dim+H_p+P_dim+H_l+L_dim,device='cuda')
        cnt = 0
        cnt, node_hidden = cnt+H_n,   slice(cnt,cnt+H_n)
        cnt, node_og     = cnt+G_dim, slice(cnt,cnt+G_dim)
        cnt, link_hidden = cnt+H_l,   slice(cnt,cnt+H_l)
        cnt, link_og     = cnt+L_dim, slice(cnt,cnt+L_dim)
        cnt, path_hidden = cnt+H_p,   slice(cnt,cnt+H_p)
        cnt, path_og     = cnt+P_dim, slice(cnt,cnt+P_dim)
        
        cnt = 0
        cnt, node_all = cnt+H_n+G_dim, slice(cnt,cnt+H_n+G_dim)
        cnt, link_all = cnt+H_l+L_dim, slice(cnt,cnt+H_l+L_dim)
        cnt, path_all = cnt+H_p+P_dim, slice(cnt,cnt+H_p+P_dim)
        
        
        X[:,node_og] = G[:,:]
        X[is_l,link_og] = L
        X[is_p,path_og] = P
        
        X[is_l,link_hidden.start] = baseline_occup
        X[is_p,path_hidden.start] = baseline_out
        
        
        X = F.leaky_relu(self.xlin1(X))
        X = F.leaky_relu(self.xlin2(X))
        

        def act(x):
            return F.leaky_relu(x)
        for i in range(self.num_iterations):
            X[is_p,path_hidden] =  act(self.conv_pn_r_1[i](X[:,node_all].clone(),edges_pn)[is_p,:])
            
            x = X[:,link_all].clone()
            H = None
            max_numpaths = len(vs)
            for k in range(max_numpaths):
                e = torch.cat([vs[k][1,:].unsqueeze(0),vs[k][0,:].unsqueeze(0)],axis=0)
                H  = self.conv_pl_r_1[0](X=x,H=H,edge_index=e)    
            X[is_p,path_hidden] = act(H[is_p,:]/max_numpaths)
            X[is_p,path_hidden.start] = baseline_out
            
            X[is_n,node_hidden] = \
                act(self.conv_pn_1[i](X[:,path_all].clone(), edges_pn)[is_n,:] +\
                              self.conv_ln_1[i](X[:,link_all].clone(),edges_ln)[is_n,:])

            
            X[is_l,link_hidden] =  act(self.conv_ln_r_1[i](X[:,node_all].clone(),edges_ln)[is_l,:])
            X[is_l,link_hidden] =  act(self.conv_pl_1[i](X[:,path_all].clone(),
                                                         edges_pl)[is_l,:])
            X[is_l,link_hidden.start] = baseline_occup
        
            
        L = X[is_l,link_all]
        L = self.lin1(L)
        
        L = F.leaky_relu(L)
        L = F.leaky_relu(self.lin2(L))
        #L = F.leaky_relu(L)
        L = torch.sigmoid(self.lin3(L)) 
        #lamb = (1/0.05)
        #L = -(1/lamb)* torch.log(1-0.99*L)
        
        
        X = torch.zeros(X.size(0), device=X.device)
        
        
        """ To predict the node, we use the formula:
        
            path delay ~= \sum_{i=0}^{n_links} delay_link(i)
            where 
                delay_link(i) := avg_utilization_{i} * (queue_size_{i}/link_capacity_{i})
                
            Our NN predicts avg_utilization_{i} \in [0,1], \forall i. 
            For this dataset, we have \forall i: queue_size_{i} =32000
        """
        link_capacity = data.L.squeeze(-1) * data.mean_pkts_rate[is_l]
        
        
        
        X[is_l] = L.squeeze(-1)  * 32000.0  / link_capacity
        E   = torch.gather(X,index=edges_pl[1,:],dim=0)
        
        #print(f"Shape after gather {E.shape}")
        res = scatter(src=E,index=edges_pl[0,:],dim=0,dim_size=X.size(0),reduce='sum')
        res = res[is_p]
        
        return res, L
               
model = ChallengeModel().cuda()
baseline = Baseline().cuda()
       
       

<h2> Use the cell below to train from scratch.</h2> <br/>
Warning: Unlike the other model, training this from scratch may take multiple hours!

In [11]:
from tqdm import tqdm,trange
from convertDataset import total_samples
from torch.nn import MSELoss
def MAPE(preds,actuals):
    return 100.0*torch.mean(torch.abs((preds-actuals)/actuals))

def mape_all(preds,actuals):
    return 100.0*torch.abs((preds-actuals)/actuals)
def lMAPE(preds,actuals):
    return 100.0*torch.mean(torch.abs((torch.log(preds)-torch.log(actuals))/actuals))

def MSE(preds,actuals):
    return torch.sqrt(torch.mean(torch.square(preds-actuals)))
import numpy as np


from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()
num_epochs = 300
opt = torch.optim.Adam(lr=1e-3,params=model.parameters())
step = 0

torch.manual_seed(420)


#model.load_state_dict(torch.load(f'./22_setembro_modelo.pt'))

for epoch in range(num_epochs):
    
        
    for mode in ['train','val_1','val_2','val_3']:
        if mode == 'train':
                model.train()
        else:
                model.eval()
        stats = {'loss':[],
                 'out':[],
                 'occup':[],
                 'actual_out':[],
                 'actual_occup':[]
                }
                
        running_loss = []
        out_mean = []
        occup_mean = []
        actual_out_mean = []
        cnt = 0
        total=len(dataloaders[mode])//30 if mode == 'train' else len(dataloaders[mode])
        mode_maybe_shuffle = 'train_s' if mode == 'train' else mode
        for i,sample in tqdm(enumerate(dataloaders[mode_maybe_shuffle]),
                             total=total):
            
            #with torch.autograd.detect_anomaly():

                with torch.set_grad_enabled(False):
                    b_out, b_occup = baseline(sample,means,stds,mode)


                with torch.set_grad_enabled(mode == 'train'):
                    if i == total:
                        break
                    if mode == 'train':
                        opt.zero_grad()
                    cnt += 1
                    out, occup = model(sample,means,stds,mode,baseline_occup=b_occup,
                                      baseline_out=b_out)
                    if False:
                        loss = mape_all(out,sample.out_delay)
                        loss = scatter(src=loss,index=sample.batch[sample.type==0],
                                       dim=0,dim_size=(sample.batch.max()+1),reduce='mean')
                        loss = loss.mean()
                    else:
                        loss = MAPE(out,sample.out_delay)
                    if mode == 'train':
                        #MSE(occup,sample.out_occupancy).backward()
                        lMAPE(out,sample.out_delay).backward()
                        opt.step()
                    elif i == 0:
                        print(torch.cat([out.view(-1,1),sample.out_delay.view(-1,1)],axis=1))
                        print(loss)

                    _stats = {'loss':loss,
                              'out':out.mean(),
                              'actual_out':sample.out_delay.mean() if not mode == 'test' else -1.0,
                              'occup':occup.mean(),
                              'actual_occup':sample.out_occupancy.mean() if not mode == 'test' else -1.0,
                             }
                    for k in _stats.keys():
                        stats[k].append(_stats[k].cpu().item())
                    del _stats
        print("======================================")
        print(f"Epoch {epoch} - Avg stats ({mode})")
        for k in stats.keys():
            print(f'Mean {k}: {np.array(stats[k]).mean()}')
            writer.add_scalar(f"{k}/{mode}", np.array(stats[k]).mean(), step)
    writer.flush()
    print("Flushed")
    step += 1

    torch.cuda.empty_cache()

    import os
    os.makedirs('./model',exist_ok=True)
    if i%1 == 0:
        torch.save(model.state_dict(),f'./model/model_{epoch}.pt')


  3%|▎         | 2/65 [00:01<00:41,  1.52it/s]

tensor([[0.1144, 0.1163],
        [0.1166, 0.1196],
        [0.1885, 0.1938],
        ...,
        [0.1437, 0.1463],
        [0.0993, 0.0999],
        [0.3237, 0.3315]], device='cuda:0')
tensor(1.9805, device='cuda:0')


100%|██████████| 65/65 [00:18<00:00,  3.61it/s]
  2%|▏         | 1/65 [00:00<00:07,  8.50it/s]

Epoch 0 - Avg stats (val_1)
Mean loss: 2.7116544760190524
Mean out: 0.44985095354226917
Mean occup: 0.03714364377351908
Mean actual_out: 0.4635581649266757
Mean actual_occup: 0.035495109111070636
tensor([[0.0517, 0.0517],
        [0.2008, 0.2010],
        [0.0314, 0.0312],
        ...,
        [0.0375, 0.0376],
        [0.7551, 0.7673],
        [0.0177, 0.0178]], device='cuda:0')
tensor(1.0508, device='cuda:0')


100%|██████████| 65/65 [00:22<00:00,  2.85it/s]
  2%|▏         | 1/65 [00:00<00:10,  6.29it/s]

Epoch 0 - Avg stats (val_2)
Mean loss: 1.332254853615394
Mean out: 0.07160714635482202
Mean occup: 0.08163771801269971
Mean actual_out: 0.07174499582212705
Mean actual_occup: 0.08150574467503108
tensor([[0.0438, 0.0444],
        [0.0484, 0.0489],
        [0.0819, 0.0816],
        ...,
        [0.0562, 0.0556],
        [0.0712, 0.0712],
        [0.0115, 0.0115]], device='cuda:0')
tensor(1.4480, device='cuda:0')


100%|██████████| 65/65 [00:37<00:00,  1.73it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch 0 - Avg stats (val_3)
Mean loss: 1.6559356038387005
Mean out: 0.05248686512215779
Mean occup: 0.062194769027141425
Mean actual_out: 0.05243281760754494
Mean actual_occup: 0.06198185212337054
Flushed


  3%|▎         | 2/65 [00:00<00:10,  5.98it/s]

tensor([[0.1144, 0.1163],
        [0.1166, 0.1196],
        [0.1885, 0.1938],
        ...,
        [0.1437, 0.1463],
        [0.0993, 0.0999],
        [0.3237, 0.3315]], device='cuda:0')
tensor(1.9805, device='cuda:0')


 66%|██████▌   | 43/65 [00:08<00:04,  5.05it/s]


KeyboardInterrupt: 

## Use the cell below to load the pre-trained model.

You should see the message: \<All keys matched successfully\>

In [24]:

model.load_state_dict(torch.load(f'./22_setembro_modelo.pt'))

<All keys matched successfully>

## Create Test prediction

In [25]:
model.eval()
import os
os.makedirs('./predictions',exist_ok=True)
FILENAME = '22_setembro_NEW.csv'
upload_file = open(f'./predictions/{FILENAME}', "w")
from tqdm import tqdm

cnt = 0
for i,sample in tqdm(enumerate(dataloaders['test']),total=len(dataloaders['test'])):
    with torch.set_grad_enabled(False):
        b_out, b_occup = baseline(sample,means,stds,'test')
    
    with torch.set_grad_enabled(False):
        out, occup = model(sample,means,stds,'test',baseline_occup=b_occup,
                                      baseline_out=b_out)
        batch = sample.batch[sample.type==0]

        for b in range(sample.batch.max() + 1):
            if cnt > 0:
                upload_file.write("\n")
            cnt += 1
            out_batch = out[batch==b].cpu().numpy().round(5)            
            upload_file.write("{}".format(';'.join([str(i) for i in np.squeeze(out_batch)])))
        
upload_file.close()        

100%|██████████| 390/390 [04:42<00:00,  1.38it/s]


### Check if test file is OK

In [459]:

from itertools import zip_longest
def check_submission(FILENAME,PATHS_PER_SAMPLE):
    sample_num = 0
    error = False
    with open(FILENAME, "r") as uploaded_file, open(PATHS_PER_SAMPLE, "r") as path_per_sample:
        # Load all files line by line (not at once)
        for prediction, n_paths in zip_longest(uploaded_file, path_per_sample):
            # Case 1: Line Count does not match.
            if n_paths is None:
                print("WARNING: File must contain 1560 lines in total for the final test datset (90 for the toy dataset). "
                      "Looks like the uploaded file has {} lines".format(sample_num))
                error = True
            if prediction is None:
                print("WARNING: File must have 1560 lines in total for the final test datset (90 for the toy dataset). "
                      "Looks like the uploaded file has {} lines".format(sample_num))
                error = True

            # Remove the \n at the end of lines
            prediction = prediction.rstrip()
            n_paths = n_paths.rstrip()

            # Split the line, convert to float and then, to list
            prediction = list(map(float, prediction.split(";")))

            # Case 2: Wrong number of predictions in a sample
            if int(len(prediction)) != int(n_paths):
                print("WARNING in line {}: The line should have size {} but it has size {}".format(sample_num, n_paths,
                                                                                                   len(prediction)))
                error = True

            sample_num += 1

    if not error:
        print("Congratulations! The submission file has passed all the tests!")
    else:
        print("Error")
        
print("Checking the file...")

PATHS_PER_SAMPLE = './paths_per_sample_test_dataset.txt'
FILEPATH= f'./predictions/{FILENAME}'
check_submission(FILEPATH,PATHS_PER_SAMPLE)


Checking the file...
Congratulations! The submission file has passed all the tests!
