In [2]:
import numpy as np

import utils.network as network_class
# import utils.lstm_model as lstm_model_class
import utils.model_trainer as model_trainer_class
import utils.data_processer as data_processer_functions

import torch
import torch.nn as nn
import torch.nn.functional as fl

from importlib import reload
from tqdm import trange
from pprint import pprint
from datetime import datetime
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error

In [3]:
print(torch.cuda.get_device_name(torch.cuda.current_device()))
print('CUDA Version', torch.version.cuda)
print('PyTorch Version', torch.__version__)

NVIDIA GeForce RTX 2080 Ti
CUDA Version 12.1
PyTorch Version 2.1.1


# Defining the network

In [4]:
num_nodes = 5
num_edges = 5

adjacency_nodexedge = np.zeros([num_nodes, num_edges]) # node x edge adjacency matrix of the network (with all tunnels)
adjacency_nodexedge = np.array([[-1,0,0,0,0], # -1 => node is edge's source
                                [1,-1,-1,0,0], # 1 => node is edge's destination
                                [0,1,0,-1,0],
                                [0,0,1,1,-1],
                                [0,0,0,0,1]])

In [5]:
# Tunnel level parameters
num_tunnels = 2

is_edge_in_tunnel      = np.zeros([num_tunnels, num_edges, ])
external_arrival_rates = np.zeros([num_tunnels, num_nodes, ])
overlay_service_rates  = np.zeros([num_tunnels, num_edges, ])
underlay_service_rates = np.zeros([num_tunnels, num_edges, ])

# tunnel 0
is_edge_in_tunnel[0,:]      = np.array([1,1,0,1,1]) # 1 => edge is a part of the tunnel, 0 => otherwise
external_arrival_rates[0,:] = np.array([0.9,0,0,0,0])
overlay_service_rates[0,:]  = np.array([1,0,0,0,0])
underlay_service_rates[0,:] = np.array([0,1,0,1,1])

# tunnel 1
is_edge_in_tunnel[1,:]      = np.array([1,0,1,0,1]) # 1 => edge is a part of the tunnel, 0 => otherwise
external_arrival_rates[1,:] = np.array([0.9,0,0,0,0])
overlay_service_rates[1,:]  = np.array([1,0,0,0,0])
underlay_service_rates[1,:] = np.array([0,0,1,0,1])

In [6]:
# get network class
reload(network_class)
my_network = network_class.Network(
    num_tunnels,
    num_nodes,
    num_edges,
    adjacency_nodexedge,
    is_edge_in_tunnel, 
    underlay_service_rates,
    external_arrival_rates)

In [7]:
# simulate network
packets_in_flight, tunnel_backlogs = my_network.simulate(overlay_service_rates, total_time = 100000, custom_seed = 5)

# Training the model

In [8]:
# Define the neural network architecture
class DNN_pool(nn.Module):
    def __init__(self,in_size,hid_sizes,out_size):
        super(DNN_pool, self).__init__()
        self.input_size=in_size
        self.output_size=out_size
        self.hidden_sizes=hid_sizes
        print('hidden sizes is :',hid_sizes)
        
        # Fully connected hidden layers
        self.input_layer = nn.Linear(in_size, hid_sizes[0])  
        self.hidden_layers = nn.ModuleList([nn.Linear(hid_sizes[i], hid_sizes[i+1]) 
                                            for i in range(len(hid_sizes) - 1)])
        
#         self.fc2 = nn.Linear(128, 64)
#         self.fc3 = nn.Linear(64, out_size)

        # Max pooling layer
#         self.pool = nn.MaxPool2d(kernel_size=2, stride=2) 
        
#         pool_out_size=(hid_sizes[-1]-2)/2
        self.output_layer=nn.Linear(hid_sizes[-1], out_size)
    def evaluate(self, x):
        with torch.no_grad():
            return self.forward(x)
        
    def forward(self, x):
        x= fl.relu(self.input_layer(x))
        # Pass through fully connected layers with ReLU activation
        for i in range(len(self.hidden_layers)):
            x = fl.relu(self.hidden_layers[i](x))
#             if i==len(self.hidden_sizes)-1:
#                 x = x.unsqueeze(0).unsqueeze(0)
#                 x = self.pool(x)
            
#             x = fl.relu(self.fc2(x))

        # Apply pooling layer
#         print('input to pool: ',x.size())
        
        x=torch.max(x,dim=-2).values
        # Pass through final fully connected layer for output
        x = fl.relu(self.output_layer(x))
        return x

In [9]:
def prepare_data(device, tunnel_backlogs, packets_in_flight, look_back):
    # class to split data, scale data, and rescale data
    data_processer = data_processer_functions.DataProcessor()

    # create time series sequences with given look_back hyperparameter
    x_all = data_processer.create_sequences(torch.Tensor(packets_in_flight), look_back).to(device)
    y_all = torch.Tensor(tunnel_backlogs).to(device)
    
    # feature engineering
    x_transformed = data_processer.feature_transform(device, x_all)

    # split into train and test
    x_train_unscaled, y_train_unscaled, x_test_unscaled, y_test_unscaled = data_processer.split_train_test(x_transformed, y_all)

    # rescale to [0,1]
    x_train, y_train = data_processer.scale_train(x_train_unscaled, y_train_unscaled, is_x_sequenced = True)
    x_test, y_test = data_processer.scale_test(x_test_unscaled, y_test_unscaled)

    return data_processer, x_train, y_train, x_test, y_test

In [10]:
def define_and_train_model(device, hidden_sizes, learning_rate, num_epochs, x_train, y_train):
    # seeds  for reproducibility
    np.random.seed(0)
    torch.manual_seed(0)

    # define model with given hidden_sizes
    input_size  = x_train.shape[-1]
    output_size = y_train.shape[-1]
    dnn_mod = DNN_pool(input_size, hidden_sizes, output_size).to(device)
    print(dnn_mod)

    # define tools for training with given hyperparameters
    trainer = model_trainer_class.modelTrainer(criterion = nn.MSELoss(), device = device)
    optimizer = torch.optim.Adam(dnn_mod.parameters(), lr = learning_rate)

    # training loop
    pbar = trange(num_epochs)
    for epoch in pbar:
        # perform a training epoch using full x_train dataset
        # (future): if x_train is too large, break into batches, and use batches x_batch instead 
        loss_value = trainer.batch_step(dnn_mod, x_train, y_train, optimizer) 
        pbar.set_postfix({'epoch': f'{epoch+1}/{num_epochs}', 'loss': loss_value, 'look_back': x_train.shape[1], 'hidden_sizes': str(hidden_sizes), 'learning_rate': learning_rate})

    return dnn_mod

In [11]:
def train_model_and_get_performance(device, tunnel_backlogs, packets_in_flight, look_back, hidden_sizes, learning_rate, num_epochs):
    # prepare data
    data_processer, x_train, y_train, x_test, y_test = prepare_data(device, tunnel_backlogs, packets_in_flight, look_back)

    # define and train model
    dnn_mod = define_and_train_model(device, hidden_sizes, learning_rate, num_epochs, x_train, y_train)

    # get performance metrics
    train_error_rates, _, _ = get_error_rates(dnn_mod, x_train, y_train, data_processer)
    test_error_rates, _, _ = get_error_rates(dnn_mod, x_test, y_test, data_processer)
    error_metrics = {'train': train_error_rates, 'test': test_error_rates}
    
    return dnn_mod, error_metrics

In [12]:
def get_error_rates(lstm_predictor, x, y, data_processer):
    # get predictions
    lstm_predictor.eval()
    y_pred = lstm_predictor.evaluate(x)

    # rescale back to queue sizes
    _, y_pred_unscaled = data_processer.inverse_scale(x_scaled=0, y_scaled=y_pred)
    _, y_unscaled = data_processer.inverse_scale(x_scaled=0, y_scaled=y)
    y_pred_unscaled = np.round(y_pred_unscaled.cpu().numpy())
    y_unscaled = y_unscaled.cpu().numpy()

    # calculate root mean squared error, and max absolute percentage error
    rmse = mean_squared_error(y_unscaled, y_pred_unscaled, squared = False) # rms = np.sqrt(np.mean((Y_predicted-Y)**2))
    mape = mean_absolute_percentage_error(y_unscaled[y_unscaled>0], y_pred_unscaled[y_unscaled>0])*100 # mape = 100*np.mean(np.abs(y_pred_unscaled-y_unscaled)[y_unscaled>0]/y_unscaled[y_unscaled>0])

    error_rates = {'rmse': rmse,
                   'mape': mape}

    return error_rates, y_unscaled, y_pred_unscaled

In [13]:
# reload(data_processer_functions)
# reload(lstm_model_class)
# reload(model_trainer_class)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# create hyperparamer lists to explore
look_back_list = [10]
hidden_sizes_list = [[10,20,10,5], [5,5,5]]
learning_rate_list = [1e-2] #[5e-2, 1e-2, 5e-3]
num_epochs = 250
saved_models = []
# model, error_metrics = train_model_and_get_performance(device, tunnel_backlogs, packets_in_flight, 10, [10,20,10,5], 1e-2, num_epochs)

for look_back in look_back_list:
    for hidden_sizes in hidden_sizes_list:
        for learning_rate in learning_rate_list:
            # train and test the model
            model, error_metrics = train_model_and_get_performance(device, tunnel_backlogs, packets_in_flight, look_back, hidden_sizes, learning_rate, num_epochs)

            # save trained model
            saved_models.append({
                        'model' : model,
                        'look_back': look_back,
                        'learning_rate': learning_rate,
                        'hidden_size' : hidden_sizes,
                        'error_metrics': error_metrics
                    })
            
# sort models according to best test rmse error
saved_models.sort(key = lambda x: (x['error_metrics']['test']['rmse'], x['error_metrics']['test']['mape'])) # (future): technically should have validation data and sort according to that 

hidden sizes is : [10, 20, 10, 5]
DNN_pool(
  (input_layer): Linear(in_features=5, out_features=10, bias=True)
  (hidden_layers): ModuleList(
    (0): Linear(in_features=10, out_features=20, bias=True)
    (1): Linear(in_features=20, out_features=10, bias=True)
    (2): Linear(in_features=10, out_features=5, bias=True)
  )
  (output_layer): Linear(in_features=5, out_features=2, bias=True)
)


100%|â–ˆ| 250/250 [00:02<00:00, 103.45it/s, epoch=250/250, loss=0.175, look_back=10, hidden_sizes=[10, 20, 10, 5], learni


hidden sizes is : [5, 5, 5]
DNN_pool(
  (input_layer): Linear(in_features=5, out_features=5, bias=True)
  (hidden_layers): ModuleList(
    (0-1): 2 x Linear(in_features=5, out_features=5, bias=True)
  )
  (output_layer): Linear(in_features=5, out_features=2, bias=True)
)


100%|â–ˆ| 250/250 [00:01<00:00, 170.47it/s, epoch=250/250, loss=0.0023, look_back=10, hidden_sizes=[5, 5, 5], learning_ra


In [14]:
error_metrics

{'train': {'rmse': 7.362185, 'mape': 11.26992553472519},
 'test': {'rmse': 7.0383043, 'mape': 11.253330111503601}}