## Generate Training Data for Contextual Games
This notebook shows how the training data was generated in the [Nash Fixed Point Networks](arxiv.com) paper. It assumes that the network folders in the original [Transportation Networks Research Project](https://github.com/bstabler/TransportationNetworks) are present in the local directory.

Specifically, this notebook first uses a script obtained from the Transportation Networks website to load the following real data:

1.   Incidence matrix for chosen network dataset

1.   Capacity vector

1.   Free Flow Time
2.   Origin-Destination (OD) Pairs

Once these are loaded, this notebook then generates contextual data sampled randomly and uses Algorithm 3 in our [paper](arxiv.com) to generate their corresponding actions on the network.

The data is saved in the local directory.

###**Note:** 
while we show how we generate the data in this notebook, we also provide the exact training data that was used in the paper. You can download the (already generated) data from this [link](https://drive.google.com/drive/folders/1XsyxT664yjyIXsnAG9gFeKYEtMlWWp-j?usp=sharing)



In [None]:
pip install openmatrix



In [None]:
import os
import sys
import numpy as np
import pandas as pd
import openmatrix as omx
import torch
import pickle as p

data_set = 'SiouxFalls'; trips_inner_directory = 'SiouxFalls'
# data_set = 'Anaheim'; trips_inner_directory = 'Anaheim'
# data_set = 'Berlin-Friedrichshain'; trips_inner_directory = 'friedrichshain-center'
# data_set = 'Berlin-Tiergarten'; trips_inner_directory = 'berlin-tiergarten'
# data_set = 'Eastern-Massachusetts'; trips_inner_directory = 'EMA'

In [None]:
root = os.path.dirname(os.path.abspath('./'))

# We list all folders available, most of which are TNTP instances
folders = [x for x in os.listdir(root)[1:] if os.path.isdir(os.path.join(root, x))]

# Function to import OMX matrices
def import_matrix(matfile):
    f = open(matfile, 'r')
    all_rows = f.read()
    blocks = all_rows.split('Origin')[1:]
    matrix = {}
    for k in range(len(blocks)):
        orig = blocks[k].split('\n')
        dests = orig[1:]
        orig=int(orig[0])

        d = [eval('{'+a.replace(';',',').replace(' ','') +'}') for a in dests]
        destinations = {}
        for i in d:
            destinations = {**destinations, **i}
        matrix[orig] = destinations
    zones = max(matrix.keys())
    mat = np.zeros((zones, zones))
    print('mat.shape = ', mat.shape)
    for i in range(zones):
        for j in range(zones):
            # We map values to a index i-1, as Numpy is base 0
            mat[i, j] = matrix.get(i+1,{}).get(j+1,0)

    index = np.arange(zones) + 1

    myfile = omx.open_file('demand.omx','w')
    myfile['matrix'] = mat
    myfile.create_mapping('taz', index)
    myfile.close()


for f in folders:
    mod = os.path.join(root, f)
    mod_files = os.listdir(mod)

    for i in mod_files:
        print(f.upper())
        if 'TRIPS' in i.upper() and i.lower()[-5:]=='.tntp':
            source_file = os.path.join(mod, i)
            b = import_matrix(source_file)

mod = os.path.join('./' + data_set + '/')
mod_files = os.listdir(mod)
source_file = os.path.join(mod, trips_inner_directory + '_trips.tntp')
import_matrix(source_file) # import data and save as demand.omx

Trip_data = omx.open_file('demand.omx') # open the omx file
print('Shape:', Trip_data.shape())
print('Number of tables:', len(Trip_data))
print('Table names:', Trip_data.list_matrices())

Trips = Trip_data['matrix']
num_origins = Trip_data.shape()[0] 

netfile = os.path.join('./' + data_set, trips_inner_directory + '_net.tntp')
net = pd.read_csv(netfile, skiprows=8, sep='\t')

trimmed= [s.strip().lower() for s in net.columns]
net.columns = trimmed

# And drop the silly first andlast columns
net.drop(['~', ';'], axis=1, inplace=True)

import torch
initial_nodes             = torch.IntTensor(net['init_node'])
terminal_nodes            = torch.IntTensor(net['term_node']) 
capacity                  = torch.FloatTensor(net['capacity'])
free_flow_time            = torch.FloatTensor(net['free_flow_time'])

n_nodes = torch.max(torch.max(terminal_nodes), torch.max(initial_nodes))
print('n_nodes = ', n_nodes)
n_edges = initial_nodes.shape[0]

# --------------------------------
# Generate OD Pairs
# --------------------------------
num_vertices = n_nodes # Number of vertices in the graph
print('num_vertices = ', num_vertices)
b_vectors = []
for i in range(num_origins):
    for j in range(num_origins):
        Amount_of_traffic = Trips[i,j]
        if Amount_of_traffic != 0.0:
            b_new = np.zeros(num_vertices)
            b_new[i] = -Amount_of_traffic
            b_new[j] = Amount_of_traffic
            b_vectors.append(b_new)


Trip_data.close() # Close the file

origin_destination_pairs = torch.FloatTensor(b_vectors).t()

# --------------------------------
# Generate Incidence Matrix
# --------------------------------
N = torch.zeros(n_nodes, n_edges)
for e in range(n_edges):
  temp_column = torch.zeros(n_nodes)
  temp_column[initial_nodes[e]-1]   = -1
  temp_column[terminal_nodes[e]-1]  = 1
  N[:,e] = temp_column

state = {
    'N': N,
    'capacity': capacity,
    'free_flow_time': free_flow_time,
    'origin_destination_pairs': origin_destination_pairs
}

OPT
OPT
OPT
ROOT
ROOT
ROOT
ROOT
ROOT
ROOT
ROOT
ROOT
ROOT
ROOT
ROOT
LIB64
LIB
LIB
LIB
LIB
LIB
LIB
LIB
LIB
LIB
LIB
LIB
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
ETC
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBIN
SBI

## Generate Training Data Context and Action
Now that we have loaded the incidence matrix, capacities, free-flow times, and OD pairs, we create training data by generating actions based on randomly sampled context

In [None]:
device = 'cuda'
capacity = torch.tensor
context = torch.tensor
action = torch.tensor
edge = torch.tensor
context_factor = -10.0

N               = state['N'].to(device)
capacity        = state['capacity'].to(device)
free_flow_time  = state['free_flow_time'].to(device) 
b               = state['origin_destination_pairs'].to(device)

# compute (truncated) SVD decomposition used in projection
U,s,VT = torch.linalg.svd(N, full_matrices=False)
ind = s >= 1e-5
s = s[ind]
U = U[:,ind]
VT = VT[ind, :]
V = VT.t()
V_Sinv_UT = V.matmul( torch.diag(1/s).matmul(U.t()) ).to(device)

VVT = V.matmul(VT)


n_nodes = N.shape[0]
n_edges = N.shape[1]
context_size = 10

n_train   = 5000
n_test    = 500
n_samples = n_train + n_test
d_true    = 10*torch.rand(n_samples, context_size)

W = torch.rand(n_edges, context_size, device=device)
W[:,0] = context_factor * torch.rand(n_edges, device=device)

n_ODpairs = b.shape[1]

batch_size = 100

In [None]:
print('n_ODpairs = ', n_ODpairs, 'n_nodes = ', n_nodes, ', n_edges = ', n_edges, ', n_samples = ', n_samples)

n_ODpairs =  528 n_nodes =  24 , n_edges =  76 , n_samples =  5500


In [None]:
def get_capacity(d: context) -> capacity:
  ''' 
    computes channel capacities given context
      input:
        d = context with shape (batch_size x context_size)
      output:
        c = channel capacity with shape (batch_size x n_edges) 
  '''
  cap_range = 0.8 if data_set == 'Anaheim' else 3e-1  
  Wd        = W.matmul(d.permute(1,0)).permute(1,0)
  Wd_clamp  = torch.clamp(Wd, min=-cap_range, max=cap_range).to(device)
  return capacity * (Wd_clamp + 1)

def get_time(x, d):
  """
    computes time per edge given action and context
      inputs:
        x = action with shape (batch_size x n_edges) (ASSUMES SUMMED OD Pairs)
        d = context with shape (batch_size x context_size)
      output:
        t = for all agents for all edges with shape (batch_size x n_edges)
  """
  cd = get_capacity(d)
  t = free_flow_time * (1 + 0.5 * (x/cd)**4 )

  return t

def F(x, d):
  """
    computes application of Wardrop Eq. operator
      inputs:
        x = action with shape (batch_size x n_edges) (ASSUMES SUMMED OD Pairs)
        d = context with shape (batch_size x context_size)
      output:
        t = for all agents for all edges with shape (batch_size x n_edges)
  """
  return get_time(x, d)

def project_C1(z):
  '''Project onto nonnegative orthant'''
  return torch.clamp(z, min=0)

def project_C2(z):
  """
    returns projection z onto each set C_k^2 for each b_k
    input:
      z   = action of size (batch_size x n_edges x n_ODpairs) 
    output:
      Pz  = projection action of size (batch_size x n_edges x n_ODpairs) 
  """

  Pz    = z - VVT.matmul(z) + V_Sinv_UT.matmul(b.unsqueeze(0))
  constraint_violation = torch.norm(N.matmul(Pz) - b.unsqueeze(0))/torch.norm(b.unsqueeze(0))

  return Pz, constraint_violation

In [None]:
def get_game_solution(d:context, max_iter=100, eps=1e-2, alpha=1e-1, verbosity=False, device=device) -> action:
  """
    returns action v solution to VI given contexts d
  """

  z = torch.zeros(d.shape[0], n_edges, n_ODpairs, device=device)
  z_prev = z.clone()

  fmt        = 'iter = {:4d}: diff_norm = {:7.3e} | time = {:7.3e} | constraint_violation = {:7.3e}  | diff_sample_norm = {:7.3e}' 

  for iter in range(max_iter):
    start_time = time.time()
    z_prev = z.clone()
    x = project_C1(z)
    v = torch.sum(x, dim=2)
    diff_sample_norm = torch.norm(v[0,:] - v[x.shape[0]-1,:])/torch.norm(v[0,:])
    Fv = F(v,d)
    y = 2*x - z - alpha*Fv.unsqueeze(2)
    y, constraint_violation = project_C2(y)
    z = z + y - x

    end_time = time.time()
    time_epoch = end_time - start_time

    diff_norm = torch.norm(z - z_prev, dim=1)
    diff_norm = torch.max( torch.norm(diff_norm, dim=1) )
    if verbosity and ((iter+1)%1)==0:
      print(fmt.format(iter+1, diff_norm.cpu().numpy(), time_epoch, constraint_violation, diff_sample_norm.cpu().numpy()))

    if iter % 20000:
      alpha = alpha/10

    if diff_norm < eps:
      print('CONVERGED WITH diff_norm = ', diff_norm)
      break

  return v

In [None]:
import time

# Generate 100 training data at a time (and 10 testing at a time)
for i in range(int(n_samples/batch_size)):
  current_indices_train = range(batch_size*i, batch_size*(i+1))
  print('\n\n ----- GENERATING DATA BATCH', i+1, ' --------')

  d_batch = d_true[current_indices_train].to(device)
  x_true_temp = get_game_solution(d_batch, max_iter=int(1e6), eps=1e-2, alpha=1e-2, verbosity=True)

  # build data
  if i==0:
    x_true = x_true_temp
  else:
    x_true = torch.cat((x_true, x_true_temp), dim=0)

  # save current batch of trainign data
  # save training data:
  state = {
      'N': N.cpu(),
      'capacity': capacity.cpu(),
      'free_flow_time': free_flow_time.cpu(),
      'origin_destination_pairs': b.cpu(),
      'x_true': x_true.cpu(),
      'd_true': d_true.cpu(),
      'W': W.cpu()
  }
  
  save_dir = './'
  state_path = save_dir + data_set + '_training_data.pth'
  torch.save(state, state_path)



 ----- GENERATING DATA BATCH 1  --------
iter =    1: diff_norm = 1.339e+04 | time = 1.822e-03 | constraint_violation = 4.085e-06  | diff_sample_norm =     nan
iter =    2: diff_norm = 9.465e+03 | time = 1.113e-03 | constraint_violation = 4.119e-06  | diff_sample_norm = 0.000e+00
iter =    3: diff_norm = 6.693e+03 | time = 6.735e-04 | constraint_violation = 1.527e-05  | diff_sample_norm = 3.743e-05
iter =    4: diff_norm = 4.747e+03 | time = 7.803e-04 | constraint_violation = 2.923e-05  | diff_sample_norm = 6.501e-05
iter =    5: diff_norm = 3.354e+03 | time = 6.440e-04 | constraint_violation = 3.635e-05  | diff_sample_norm = 6.139e-05
iter =    6: diff_norm = 1.481e+02 | time = 6.142e-04 | constraint_violation = 2.942e-05  | diff_sample_norm = 6.259e-05
iter =    7: diff_norm = 1.018e+01 | time = 6.149e-04 | constraint_violation = 2.935e-05  | diff_sample_norm = 6.274e-05
iter =    8: diff_norm = 1.053e+00 | time = 6.068e-04 | constraint_violation = 2.934e-05  | diff_sample_norm = 6

## Split Data into Training and Testing

In [None]:
save_dir = './'
state_path = save_dir + data_set + '_training_data.pth'
state = torch.load(state_path)

In [None]:
d_true = state['d_true']; print('d_true.shape = ', d_true.shape)
x_true = state['x_true']; print('x_true.shape = ', x_true.shape)

d_train = d_true[0:n_train,:]
d_test  = d_true[n_train:n_samples, :]
x_train  = x_true[0:n_train,:]
x_test  = x_true[n_train:n_samples,:]
print('d_train.shape = ', d_train.shape, ', d_test.shape = ', d_test.shape) 
print('x_train.shape = ', x_train.shape, ', x_test.shape = ', x_test.shape)

d_true.shape =  torch.Size([5500, 10])
x_true.shape =  torch.Size([5500, 76])
d_train.shape =  torch.Size([5000, 10]) , d_test.shape =  torch.Size([500, 10])
x_train.shape =  torch.Size([5000, 76]) , x_test.shape =  torch.Size([500, 76])


In [None]:
state = {
      'N': N.cpu(),
      'capacity': capacity.cpu(),
      'free_flow_time': free_flow_time.cpu(),
      'origin_destination_pairs': b.cpu(),
      'x_train': x_train.cpu(),
      'd_train': d_train.cpu(),
      'x_test': x_test.cpu(),
      'd_test': d_test.cpu(),
      'W': W.cpu()
  }

save_dir = './'
state_path = save_dir + data_set + '_training_data.pth'
torch.save(state, state_path)