In [1]:
import networkx as nx
import numpy as np
import cvxpy as cp
from tqdm import tqdm
from pathlib import Path

from src.load_data import (
    read_metadata_networks_tntp,
    read_graph_transport_networks_tntp,
    read_traffic_mat_transport_networks_tntp,
)

from src.models import SDModel, BeckmannModel, TwostageModel
from src.algs import subgd, ustm, frank_wolfe, cyclic
from src.cvxpy_solvers import get_max_traffic_mat_mul
from src.commons import Correspondences

import matplotlib.pyplot as plt

plt.rcParams.update({'font.size': 14})
%config InlineBackend.figure_format = 'retina'

%matplotlib inline

In [2]:
networks_path = Path("./TransportationNetworks")

# folder = "SiouxFalls"
# net_name = "SiouxFalls_net"
# traffic_mat_name = "SiouxFalls_trips"

folder = "Austin"
net_name = "Austin_net"
traffic_mat_name = "Austin_trips_am"

# folder = "Anaheim"
# net_name = "Anaheim_net"
# traffic_mat_name = "Anaheim_trips"
    
# folder = "Barcelona"
# net_name = "Barcelona_net"
# traffic_mat_name = "Barcelona_trips"
    
net_file = networks_path / folder / f"{net_name}.tntp"
traffic_mat_file = networks_path / folder / f"{traffic_mat_name}.tntp"
graph, metadata = read_graph_transport_networks_tntp(net_file)
correspondences = read_traffic_mat_transport_networks_tntp(traffic_mat_file, metadata)
n = graph.number_of_nodes()

print(f"{graph.number_of_edges()=}, {graph.number_of_nodes()=}")

metadata["can_pass_through_zones"]=True
graph.number_of_edges()=18956, graph.number_of_nodes()=7388


In [3]:
traffic_mat = correspondences.traffic_mat
departures, arrivals = traffic_mat.sum(axis=1), traffic_mat.sum(axis=0) 

In [73]:
graph.adj

AdjacencyView({0: {1: {'free_flow_times': 4.296, 'capacities': 99999.0, 'rho': 0.83, 'mu': 0.18181818181818182}}, 1: {0: {'free_flow_times': 4.296, 'capacities': 99999.0, 'rho': 0.83, 'mu': 0.18181818181818182}, 42: {'free_flow_times': 2.074286, 'capacities': 1201.0, 'rho': 0.83, 'mu': 0.18181818181818182}}, 2: {3: {'free_flow_times': 1.728, 'capacities': 99999.0, 'rho': 0.83, 'mu': 0.18181818181818182}, 6571: {'free_flow_times': 3.816, 'capacities': 99999.0, 'rho': 0.83, 'mu': 0.18181818181818182}}, 3: {2: {'free_flow_times': 1.728, 'capacities': 99999.0, 'rho': 0.83, 'mu': 0.18181818181818182}, 4519: {'free_flow_times': 4.971429, 'capacities': 5000.0, 'rho': 0.83, 'mu': 0.18181818181818182}, 6571: {'free_flow_times': 4.028571, 'capacities': 1201.0, 'rho': 0.83, 'mu': 0.18181818181818182}}, 4: {5: {'free_flow_times': 1.026667, 'capacities': 4250.0, 'rho': 0.83, 'mu': 0.18181818181818182}, 1973: {'free_flow_times': 0.293333, 'capacities': 4250.0, 'rho': 0.83, 'mu': 0.18181818181818182}

In [74]:
departures, arrivals

(array([122.54999986,   0.        ,  30.89999954, ..., 175.33000039,
        130.17999973, 371.56000046]),
 array([263.75999932,   0.        ,  19.44999965, ..., 191.41000022,
        176.91999993, 246.69000021]))

In [75]:
from src.sinkhorn import Sinkhorn

sinkhorn = Sinkhorn(departures, arrivals, 100_000_000)

In [8]:
# sinkhorn.run(traffic_mat)

# numpy vs torch

In [38]:
# import torch
# from torch import logsumexp
# from torch import from_numpy
# from typing import Tuple

In [39]:
# from scipy.special import logsumexp

# a = np.arange(10)
# b = np.arange(10, 0, -1)

# r1 = logsumexp(a, b=b)
# r2 = np.log(np.sum(b * np.exp(a)))
# r3 = np.log(np.sum(np.exp(np.log(b) * a)))

In [40]:
# def logsumexp(input: torch.Tensor, dim: int, keepdim: bool, b: torch.Tensor) -> torch.Tensor:
#     return torch.log(torch.sum(b * torch.exp(input), dim=dim, keepdim=keepdim))

In [89]:
import torch
# from torch import logsumexp
# from torch import from_numpy
from typing import Tuple, Union

torch.set_printoptions(precision=8)


class SinkhornGPU:
    def __init__(
        self,
        departures: torch.Tensor,
        arrivals: torch.Tensor,
        max_iter: int,
        eps=1e-6,
        crit_check_period=10,
        device="cpu"
    ):
        self.L_i = departures#.to(device)
        self.W_j = arrivals#.to(device)
        self.n_types = self.L_i.shape[0]
        self.max_iter = max_iter
        self.eps = eps
        self.crit_check_period = crit_check_period
        self.device = device

    
    @staticmethod
    def logsumexp(input: torch.Tensor, dim: int, keepdim: bool, b: torch.Tensor) -> torch.Tensor:
        return torch.log(torch.sum(b * torch.exp(input), dim=dim, keepdim=keepdim))
    
    @staticmethod
    def d_ij(
        lambda_l_i: torch.Tensor, lambda_w_j: torch.Tensor, gammaT_ij: torch.Tensor
    ) -> torch.Tensor:
        return torch.exp(-(1 + gammaT_ij + lambda_w_j + lambda_l_i.unsqueeze(1)))

    def _torch_iteration(
        self,
        k: int,
        gammaT_ij: torch.Tensor,
        lambda_w_j: torch.Tensor,
        lambda_l_i: torch.Tensor,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        if k % 2 == 0:
            lambda_l_i = self.logsumexp(
                -lambda_w_j.unsqueeze(0) - 1.0 - gammaT_ij,
                dim=1,
                keepdim=False,
                b=1.0 / self.L_i.unsqueeze(1),
            )
        else:
            lambda_w_j = self.logsumexp(
                (-lambda_l_i.unsqueeze(1) - 1.0 - gammaT_ij),
                dim=0,
                keepdim=False,
                b=1.0 / self.W_j.unsqueeze(0),
            )

        return lambda_w_j, lambda_l_i

    def run(
        self,
        gammaT_ij: torch.Tensor,
        lambda_l_i: Union[torch.Tensor, None] = None,
        lambda_w_j: Union[torch.Tensor, None] = None,
    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        if lambda_l_i is None:
            lambda_l_i = torch.zeros_like(self.L_i)
        if lambda_w_j is None:
            lambda_w_j = torch.zeros_like(self.W_j)
        
        # gammaT_ij = gammaT_ij.to(self.device)
        # lambda_l_i = lambda_l_i.to(self.device)
        # lambda_w_j = lambda_w_j.to(self.device)

        k = 0
        while True:
            if k > 0 and not k % self.crit_check_period:
                if self._criteria(lambda_l_i, lambda_w_j, gammaT_ij):
                    break

            lambda_w_j, lambda_l_i = self._torch_iteration(
                k, gammaT_ij, lambda_w_j, lambda_l_i
            )

            k += 1
            if k == self.max_iter:
                raise RuntimeError("Max iter exceeded in SinkhornGPU")
            print(f'k: {k}')
        return self.d_ij(lambda_l_i, lambda_w_j, gammaT_ij), lambda_l_i, lambda_w_j

    def _criteria(
        self, lambda_l_i: torch.Tensor, lambda_w_j: torch.Tensor, gammaT_ij: torch.Tensor
    ) -> bool:
        traffic_mat = self.d_ij(lambda_l_i, lambda_w_j, gammaT_ij)
        grad_l = traffic_mat.sum(dim=1) - self.L_i
        grad_w = traffic_mat.sum(dim=0) - self.W_j
        dual_grad = torch.cat((grad_l, grad_w))

        dual_grad_norm = torch.norm(dual_grad)
        inner_prod = -torch.cat((lambda_l_i, lambda_w_j)) @ dual_grad

        return dual_grad_norm < self.eps and inner_prod < self.eps


In [77]:
print(torch.cuda.is_available())

# Is MPS even available? macOS 12.3+
print(torch.backends.mps.is_available())

# Was the current version of PyTorch built with MPS activated?
print(torch.backends.mps.is_built())

False
True
True


In [78]:
device = torch.device("mps")
device

device(type='mps')

In [90]:
arrivals_gpu = torch.from_numpy(arrivals)
departures_gpu = torch.from_numpy(departures)
traffic_mat_gpu = torch.from_numpy(traffic_mat)

# arrivals_gpu = torch.from_numpy(arrivals).to(torch.float32)
# departures_gpu = torch.from_numpy(departures).to(torch.float32)
# traffic_mat_gpu = torch.from_numpy(traffic_mat).to(torch.float32)

# arrivals_gpu = torch.from_numpy(arrivals).to(torch.float32).to(device)
# departures_gpu = torch.from_numpy(departures).to(torch.float32).to(device)
# traffic_mat_gpu = torch.from_numpy(traffic_mat).to(torch.float32).to(device)

In [67]:
n = 24

gammaT_ij = torch.rand(n, n).to(torch.float32).to(device)
lambda_w_j = torch.rand(n).to(torch.float32).to(device)
lambda_l_i = torch.rand(n).to(torch.float32).to(device)

k = 0

In [None]:
# lambda_w_j

# Sinkhorn GPU

In [36]:
%%time

sinkhorn_gpu = SinkhornGPU(departures_gpu, arrivals_gpu, 100_000_000)
sinkhorn_gpu._torch_iteration(k, gammaT_ij, lambda_w_j, lambda_l_i)

CPU times: user 1.57 ms, sys: 778 µs, total: 2.35 ms
Wall time: 1.43 ms


(tensor([0.28971928, 0.05323982, 0.07325834, 0.68632615, 0.26884586, 0.72073251,
         0.93661356, 0.74966162, 0.53361517, 0.11230940, 0.13846499, 0.21764684,
         0.14696401, 0.24413043, 0.12781686, 0.93585235, 0.27062666, 0.59520233,
         0.59722275, 0.66702229, 0.34569651, 0.21214789, 0.65340692, 0.99606693],
        device='mps:0'),
 tensor([-7.79963064, -6.87870121, -6.74509192, -8.08072948, -7.31824636,
         -7.59804058, -8.01463413, -8.53214836, -8.39509487, -9.41696930,
         -8.69264507, -8.19888973, -8.18727303, -8.20376778, -8.68741798,
         -8.83623123, -8.74397087, -7.24996662, -8.17409420, -8.44331264,
         -7.95458841, -8.65174961, -8.23153496, -7.55255413], device='mps:0'))

In [None]:
sinkhorn_gpu = SinkhornGPU(departures_gpu, arrivals_gpu, device="cpu", max_iter=100_000_000)
sinkhorn_gpu_result = sinkhorn_gpu.run(traffic_mat_gpu)#, lambda_l_i, lambda_w_j)

# Sinkhorn

In [4]:
from src.sinkhorn import Sinkhorn

sinkhorn = Sinkhorn(departures, arrivals, 100_000_000)

In [None]:
sinkhorn_result = sinkhorn.run(traffic_mat)

In [None]:
(sinkhorn_gpu_result[1].numpy() - sinkhorn_result[1])

In [None]:
# sinkhorn._scipy_iteration(k, gammaT_ij.detach().cpu().numpy(), 
#                           lambda_w_j.detach().cpu().numpy(), 
#                           lambda_l_i.detach().cpu().numpy())

## Comparison