In [47]:
from typing import Dict
import random

import numpy as np
import pandas as pd
import igraph as ig
import leidenalg as la
import matplotlib.pyplot as plt

In [3]:
# Wczytywanie danych
df = pd.read_csv("DaneSiec.csv", skiprows=3)
df

Unnamed: 0,<AS_nr>,<pref>,<1st_s>,<s_nr>,<ld_nr>,<del>,<jit>,<los>,<hop>
0,1,256,1,85,3356,39,47,0.0003,1
1,0,0,0,0,3705,23,10,0.0003,1
2,2,65536,86,85,34,21,7,0.0009,1
3,0,0,0,0,29076,49,37,0.0002,1
4,0,0,0,0,42226,33,46,0.0002,1
...,...,...,...,...,...,...,...,...,...
206969,393239,512,4191652,85,209,26,34,0.0008,1
206970,438802,5120,4191737,85,43802,10,49,0.0002,1
206971,12845938,256,4191822,85,12741,24,46,0.0004,1
206972,12845948,2048,4191907,85,20959,45,34,0.0001,1


In [4]:
def data_transform(df: pd.DataFrame) -> ig.Graph:
    df.drop((df[df["<ld_nr>"] == 0]).index, inplace=True)
    df.replace({"<AS_nr>": {0: np.nan}}, inplace=True)
    df["<AS_nr>"] = df["<AS_nr>"].ffill(axis=0).astype(int)
    df.drop_duplicates(inplace=True)
    nodes = set(df["<AS_nr>"].values).union(set(df["<ld_nr>"].values))
    labels = dict(zip(nodes, np.arange(len(nodes))))
    edges = []
    weights = []
    for row in df.itertuples():
        start = labels[row[1]]
        end = labels[row[5]]
        weight = float(row[6])
        edges.append((start, end))
        weights.append(weight)
    return ig.Graph(n=len(nodes), edges=edges, edge_attrs={"weight": weights}, directed=True), labels

In [5]:
graph, labels = data_transform(df)

In [6]:
# Nazwy node'ów, ale przelabelowane

all_nodes = list(labels.values())

In [7]:
x1, x2 = random.choice(all_nodes), random.choice(all_nodes)
print(f"Finding path from {x1} to {x2}")

Finding path from 32342 to 9596


In [33]:
graph.get_shortest_path(x1, to=x2)

[33096, 3278, 517, 424, 17997]

In [9]:
# 10000 losowych
N = 10_000
for _ in range(N):
    x1, x2 = random.choice(all_nodes), random.choice(all_nodes)
    graph.get_shortest_path(x1, to=x2)

In [10]:
# 10 losowych
N = 10
for _ in range(N):
    x1, x2 = random.choice(all_nodes), random.choice(all_nodes)
    graph.get_all_shortest_paths(x1, to=x2)

In [42]:
graph.get_all_shortest_paths(x1, to=x2)

[[15852, 783, 132, 24843]]

In [None]:
# Leiden Algorithm

partition = la.find_partition(
    graph=graph,
    partition_type=la.ModularityVertexPartition,
    n_iterations=10
)

hierarchy = partition.aggregate_partition()
membership = partition.membership

def generate_distances(membership: list[int], graph: ig.Graph) -> Dict[int, Dict[int, float]]:
    communities = set(membership)
    community_distances = {}
    for source_community in communities:
        community_distances[source_community] = {}
        source_nodes = [n for n, c in enumerate(membership) if c == source_community]
        for target_community in communities:
            target_nodes = [n for n, c in enumerate(membership) if c == target_community]
            min_dist = float('inf')
            for s in source_nodes:
                for t in target_nodes:
                    dist = graph.get_shortest_path(s, t, weights="weight", mode='OUT')[0]
                    if dist > 0:
                        print(dist)
                    min_dist = min(min_dist, dist)
            community_distances[source_community][target_community] = min_dist if min_dist != float('inf') else 0.0
    return community_distances

def generate_distances(membership: list[int], graph: ig.Graph) -> Dict[int, Dict[int, float]]:
    communities = set(membership)
    community_distances = {}
    for source_community in communities:
        community_distances[source_community] = {}
        source_nodes = [n for n, c in enumerate(membership) if c == source_community]
        for target_community in communities:
            target_nodes = [n for n, c in enumerate(membership) if c == target_community]
            min_dist = float('inf')
            for s in source_nodes:
                for t in target_nodes:
                    dist = graph.get_shortest_path(s, t, weights="weight", mode='OUT')[0]
                    if dist > 0:
                        print(dist)
                    min_dist = min(min_dist, dist)
            community_distances[source_community][target_community] = min_dist if min_dist != float('inf') else 0.0
    return community_distances
                
community_distances = generate_distances(membership, graph)
community_distances

5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5


KeyboardInterrupt: 

In [None]:
def generate_heurestic(community_distances: List[List[int]], membership: List[int]):
    def heurestic(graph: ig.Graph, x1: int, x2: int) -> float:
        x1, x2 = membership[x1], membership[x2]
        if x1 == x2:
            return 0.0
        try:
            distance = community_distances[x1][x2]
            return float(distance) if distance != float('inf') else 0.0
        except IndexError:
            return 0.0
    return heurestic

graph.get_shortest_path_astar(x1, to=x2, heuristics=generate_heurestic(community_distances, membership), mode="OUT")

In [None]:
# # 10000 losowych
# N = 100
# for _ in range(N):
#     x1, x2 = random.choice(all_nodes), random.choice(all_nodes)
#     graph.get_shortest_path_astar(x1, to=x2, heuristics=generate_heurestic(community_distances, membership), mode="OUT")