In [None]:
import csv
import os
import pathlib
import sys
import typing

from matplotlib import pyplot as plt
import networkx as nx
import numpy as np
import ot

sys.path.append(str(pathlib.PurePath('..', '..', 'src')))
from linear_geodesic_optimization import plot
from linear_geodesic_optimization.data import input_network, utility

In [None]:
filename_probes = pathlib.PurePath('probes.csv')

directory_measurements = pathlib.PurePath('measurements')
filenames_measurements = list(sorted(os.listdir(directory_measurements)))

In [None]:
def symmetrize_graph(graph: nx.Graph):
    new_graph = nx.Graph()

    for node, data in graph.nodes(data = True):
        new_graph.add_node(node, **data)

    for source, target, data in graph.edges(data = True):
        if (source, target) in new_graph.edges:
            new_graph.edges[source, target]['throughput'] += data['throughput']
        else:
            new_graph.add_edge(source, target, **data)

    return new_graph

In [None]:
graph = input_network.get_graph_from_paths(
    filename_probes, directory_measurements / filenames_measurements[0],
    clustering_distance=1.,
    should_compute_curvatures=False,
    directed=True
)
graph = symmetrize_graph(graph)

throughputs = [data['throughput'] for _, _, data in graph.edges(data=True)]
throughput_maximum = max(throughputs)
throughput_minimum = min(throughputs)

In [None]:
def plot_graph(
    graph, weight_label = 'ricciCurvature', with_map = True,
    color_min = -2., color_max = 2.
):
    fig, ax = plt.subplots()

    if with_map:
        coordinates = np.array([
            utility.mercator(data['long'], data['lat'])
            for _, data in graph.nodes(data = True)
        ])


        scale = 0.8
        image_data, extent = plot.get_image_data(coordinates, 1000, scale)
        ax.imshow(image_data, extent = extent)

    return plot.get_network_plot(
        graph, weight_label, color_min, color_max, ax = ax,
    )

In [None]:
plot_graph(graph, weight_label = 'throughput', color_min = 0., color_max = throughput_maximum)
# plt.savefig('throughput.png', dpi=300)
plt.show()

In [None]:
def ricci_curvature_optimal_transport_old(
    graph: nx.Graph,
    edge_distance_label: typing.Optional[str] = None,
    edge_weight_label: typing.Optional[str] = None,
    alpha: float = 0.9999
):
    # Create mappings between node names (generally strings) and
    # integers, which act as indices
    nodes_to_indices = {}
    indices_to_nodes = []
    for node in graph.nodes:
        nodes_to_indices[node] = len(indices_to_nodes)
        indices_to_nodes.append(node)
    n_nodes = len(indices_to_nodes)

    # Compute the distance matrix, which is just a matrix of shortest
    # paths
    distance_matrix = np.zeros((n_nodes, n_nodes))
    if edge_distance_label is None:
        distances_iterator = nx.all_pairs_shortest_path_length(graph)
    else:
        distances_iterator = nx.all_pairs_dijkstra_path_length(
            graph, weight = edge_distance_label
        )
    for source, distance_dict in distances_iterator:
        index_source = nodes_to_indices[source]
        for destination, distance in distance_dict.items():
            index_destination = nodes_to_indices[destination]
            distance_matrix[index_source, index_destination] = distance

    # Compute the relative probability that a chain is in a state at a
    # given time. That is, for each node, assign a probability (scaled
    # by `alpha`) that a random walk stays at the node in one step.
    # TODO: Is this doing the right thing when the edges are weighted?
    node_weights = np.ones(n_nodes)

    # Compute the initial distributions
    distributions = []
    for node in indices_to_nodes:
        n_neighbors = graph.degree[node]
        distribution = np.zeros(n_nodes)

        node_index = nodes_to_indices[node]
        node_probability = alpha * node_weights[node_index]

        distribution_sum = 0.
        for neighbor in graph.neighbors(node):
            distribution_neighbor = (
                1.
                if edge_weight_label is None
                else graph.edges[node, neighbor][edge_weight_label]
            )
            distribution_neighbor /= (
                1.
                if edge_distance_label is None
                else graph.edges[node, neighbor][edge_distance_label]**2
            )
            distribution[nodes_to_indices[neighbor]] = distribution_neighbor
            distribution_sum += distribution_neighbor
        if distribution_sum == 0.:
            distribution[node_index] = 1.
        else:
            distribution *= (1. - node_probability) / np.sum(distribution)
            distribution[node_index] += node_probability

        distributions.append(distribution)

    # For each edge, compute the optimal transport cost between the
    # distributions corresponding to the edge's endpoints. From there,
    # compute the Ollivier-Ricci curvature.
    ricci_curvatures = {}
    for source, destination, data in graph.edges(data=True):
        transport_distance = ot.emd2(
            distributions[nodes_to_indices[source]],
            distributions[nodes_to_indices[destination]],
            distance_matrix
        )
        edge_distance = 1. if edge_distance_label is None else data[edge_distance_label]
        ricci_curvatures[source, destination] = (1. - transport_distance / edge_distance) / (1. - alpha)

    return ricci_curvatures

In [None]:
def ricci_curvature_optimal_transport(
    graph: nx.Graph,
    edge_distance_label: typing.Optional[str] = None,
    edge_weight_label: typing.Optional[str] = None,
    alpha: float = 0.9999
):
    # Create mappings between numerical IDs and node names
    index_to_node = [node for node in graph.nodes]
    node_to_index = {node: index for index, node in enumerate(index_to_node)}
    n_nodes = len(index_to_node)

    # Relabel the graph
    graph = nx.relabel_nodes(graph, node_to_index)

    # Create an augmented graph. Essentially, add a duplicate edge for
    # each node incident to a particular node.
    # Simultaneously, create dictionaries of the edge weights.
    weights = []
    index_to_node_augmented = list(range(n_nodes))
    for index in range(n_nodes):
        neighbors = list(graph.neighbors(index))
        weights_index = {}
        for neighbor in neighbors:
            graph.add_edge(
                index, (index, neighbor),
                weight = 1. if edge_weight_label is None else graph.edges[index, neighbor][edge_weight_label]
            )
            weights_index[neighbor] = (
                1. if edge_weight_label is None
                else graph.edges[index, neighbor][edge_weight_label]
            )
            index_to_node_augmented.append((index, neighbor))
        weights.append(weights_index)
    n_nodes_augmented = len(index_to_node_augmented)
    node_to_index_augmented = {node: index for index, node in enumerate(index_to_node_augmented)}

    # Compute the distance matrix, which is just a matrix of shortest
    # paths
    distance_matrix = np.zeros((n_nodes_augmented, n_nodes_augmented))
    if edge_distance_label is None:
        distances_iterator = nx.all_pairs_shortest_path_length(graph)
    else:
        distances_iterator = nx.all_pairs_dijkstra_path_length(
            graph, weight = edge_distance_label
        )
    for source, distance_dict in distances_iterator:
        index_source = node_to_index_augmented[source]
        for destination, distance in distance_dict.items():
            index_destination = node_to_index_augmented[destination]
            distance_matrix[index_source, index_destination] = distance

    ricci_curvatures = {}
    for source in range(n_nodes):
        weights_source = weights[source]
        for destination in weights_source:
            # Don't do duplicate work
            if source > destination:
                continue

            distribution_source = np.zeros(n_nodes_augmented)
            for neighbor, neighbor_weight in weights_source.items():
                if neighbor == destination:
                    distribution_source[node_to_index_augmented[source, neighbor]] = weights_source[destination]
                    # distribution_source[node_to_index_augmented[source, neighbor]] = 0.
                else:
                    distribution_source[neighbor] = weights_source[neighbor]
            distribution_source_sum = np.sum(distribution_source)
            if distribution_source_sum != 0.:
                distribution_source *= (1. - alpha) / distribution_source_sum
                distribution_source[source] = alpha
            else:
                distribution_source[source] = 1.

            distribution_destination = np.zeros(n_nodes_augmented)
            weights_destination = weights[destination]
            for neighbor, neighbor_weight in weights[destination].items():
                if neighbor == source:
                    distribution_destination[node_to_index_augmented[destination, neighbor]] = weights_destination[source]
                    # distribution_destination[node_to_index_augmented[destination, neighbor]] = 0.
                else:
                    distribution_destination[neighbor] = weights_destination[neighbor]
            distribution_destination_sum = np.sum(distribution_destination)
            if distribution_destination_sum != 0.:
                distribution_destination *= (1. - alpha) / distribution_destination_sum
                distribution_destination[destination] = alpha
            else:
                distribution_destination[destination] = 1.

            transport_distance = ot.emd2(distribution_source, distribution_destination, distance_matrix)
            edge_distance = 1. if edge_distance_label is None else graph.edges[source, destination][edge_distance_label]
            ricci_curvatures[index_to_node[source], index_to_node[destination]] = (1. - transport_distance / edge_distance) / (1. - alpha)

    return ricci_curvatures

In [None]:
ricci_curvatures = ricci_curvature_optimal_transport(graph, None, 'throughput')
for source, destination, data in graph.edges(data=True):
    data['ricciCurvature'] = ricci_curvatures[source, destination]
plot_graph(graph)
plt.show()

print(min(ricci_curvatures.values()), max(ricci_curvatures.values()))

In [None]:
# for filename_measurements in filenames_measurements[178:]:
for filename_measurements in []:
    graph = input_network.get_graph_from_paths(
        filename_probes, directory_measurements / filename_measurements,
        clustering_distance=1.,
        should_compute_curvatures=False,
        directed=True
    )
    graph = symmetrize_graph(graph)

    ricci_curvatures = ricci_curvature_optimal_transport(graph, None, 'throughput')
    for source, destination, data in graph.edges(data=True):
        data['ricciCurvature'] = ricci_curvatures[source, destination]

    for _, data in graph.nodes(data=True):
        del data['elements']

    nx.write_graphml(graph, pathlib.PurePath('graphml', f'{os.path.splitext(filename_measurements)[0]}.graphml'))

    plot_graph(graph)
    plt.savefig(pathlib.PurePath('networks', f'{os.path.splitext(filename_measurements)[0]}.png'), dpi=300)
    plt.clf()
    plt.close()

In [None]:
graph_toy = input_network.get_graph_from_paths(
    pathlib.PurePath('..', '..', 'data', 'toy', 'toy_probes.csv'),
    pathlib.PurePath('..', '..', 'data', 'toy', 'toy_latencies.csv'),
    0,
    should_compute_curvatures = False
)
ricci_curvatures_toy = ricci_curvature_optimal_transport(graph_toy)
for source, destination, data in graph_toy.edges(data=True):
    data['ricciCurvature'] = ricci_curvatures_toy[source, destination]

In [None]:
plot.get_network_plot(graph_toy)
plt.show()

print(min(ricci_curvatures_toy.values()), max(ricci_curvatures_toy.values()))

In [None]:
xs = list(range(3, 10))
ys_graph_alpha_0 = [
    (x - 2) / (x - 1)
    for x in xs
]
ys_graph_alpha_1 = [
    x / (x - 1)
    for x in xs
]
ys_new = [
    (x - 4) / (x - 1)
    for x in xs
]

fig, ax = plt.subplots(dpi=150)
ax.set_title(r'Comparison of Ricci Curvatures on $K_n$')
ax.set_xlabel(r'$n$')
ax.set_ylabel(r'$\kappa^{\mathrm{Ricci}}$')
ax.plot(xs, ys_graph_alpha_0, 'r-', label=r'Old ($\alpha = 0$)')
ax.plot(xs, ys_graph_alpha_1, 'b-', label=r'Old ($\alpha \rightarrow 1$)')
ax.plot(xs, ys_new, 'g-', label=r'New')
ax.plot(xs, [0] * len(xs), 'k--')
ax.legend()
plt.show()

In [None]:
for source, destination, data in graph.edges(data=True):
    data['capacity'] = 4e11 - data['throughput']
ricci_curvatures = ricci_curvature_optimal_transport_old(graph, None, 'capacity')
for source, destination, data in graph.edges(data=True):
    data['ricciCurvature'] = ricci_curvatures[source, destination]

plot_graph(graph, color_min = -1, color_max = 1)
# plt.savefig('Internet2_with_distances_and_throughputs.png', dpi=300)
plt.show()

print(min(ricci_curvatures.values()), max(ricci_curvatures.values()))

In [None]:
ricci_curvatures_unweighted = ricci_curvature_optimal_transport_old(graph, None, None)
x = []
y = []
for key in ricci_curvatures_unweighted:
    x.append(ricci_curvatures_unweighted[key])
    y.append(ricci_curvatures[key])
x = np.array(x)
y = np.array(y)
plt.plot(x, y, 'b.')
plt.show()

In [None]:
plt.hist(x - y)
plt.show()

In [None]:
graph_two_clusters = nx.Graph()
size_left = 7
size_right = 7
for i in range(size_left):
    for j in range(i + 1, size_left):
        graph_two_clusters.add_edge((0, i), (0, j), weight=5.)
for i in range(size_right):
    for j in range(i + 1, size_right):
        graph_two_clusters.add_edge((1, i), (1, j), weight=1.)
graph_two_clusters.add_edge((0, 0), (1, 0), weight=0.)
curvatures_two_clusters = ricci_curvature_optimal_transport(graph_two_clusters, None, 'weight', 0.)
# for edge, curvature in curvatures_two_clusters.items():
#     print(f'{edge}: {curvature}')

In [None]:
rng = np.random.default_rng()
graph_random = nx.erdos_renyi_graph(20, 0.5)
for _, _, data in graph_random.edges(data=True):
    data['weight'] = rng.random()**4 + 0.2
curvatures_random_0 = ricci_curvature_optimal_transport(graph_random, None, 'weight', 0.)
curvatures_random_0_9999 = ricci_curvature_optimal_transport(graph_random, None, 'weight', 0.999)
differences = {
    edge: curvatures_random_0_9999[edge] - curvatures_random_0[edge]
    for edge in curvatures_random_0
}
plt.hist(list(differences.values()))
plt.show()


In [None]:
plt.hist(list(curvatures_random_0.values()))
plt.show()