In [81]:
import graph_tool.collection 
import graph_tool.search 
import graph_tool as gt
import random
import numpy as np
import zstandard
import networkx as nx
import copy

In [82]:
!pwd

/home/aferrara/FairShortPath


In [83]:
def load_graph_from_file(filename):
    graph = gt.Graph(directed=True)
    weight_prop = graph.new_edge_property("double")

    # Dictionary to store mapping from node IDs to graph vertex objects
    node_mapping = {}

    with open(filename, 'r') as file:
        for line in file:
            parts = line.strip().split()

            if parts[0] == 'p':
                # Parse the number of nodes and arcs
                num_nodes = int(parts[2])
                num_arcs = int(parts[3])
            elif parts[0] == 'a':
                # Parse the arc information
                source = int(parts[1]) - 1  # Subtract 1 to make it zero-indexed
                target = int(parts[2]) - 1
                weight = np.double(parts[3])

                # Add vertices and edge to the graph
                if source not in node_mapping:
                    node_mapping[source] = graph.add_vertex()
                if target not in node_mapping:
                    node_mapping[target] = graph.add_vertex()

                edge = graph.add_edge(node_mapping[source], node_mapping[target])
                weight_prop[edge] = weight
    
    graph.edge_properties["weight"] = weight_prop
    return graph

# Load the graph from file
filename = 'data/USA-road-d.NY.gr'
qqq = load_graph_from_file(filename)

print("Number of nodes:", g.num_vertices())
print("Number of edges:", g.num_edges())


Number of nodes: 369
Number of edges: 441


In [84]:
#openflights,chicago_road, london_transport

In [85]:
g = gt.collection.ns["openflights"]
print(g.edge_properties)

{'weight': <EdgePropertyMap object with value type 'int16_t', for Graph 0x7f8af8106e50, at 0x7f8a4af28d90>, 'layer': <EdgePropertyMap object with value type 'int16_t', for Graph 0x7f8af8106e50, at 0x7f8a4af28590>}


In [86]:
if not g.edge_properties:
    weights = g.new_edge_property("double")
    weights.set_value(1.0)  # Set all values to 1.0
    g.edge_properties["weight"] = weights
    print(g.edge_properties)

In [87]:
if "weight" in g.edge_properties:
    layer_property = g.edge_properties["weight"]
    for i,e in enumerate(g.edges()):
        if i >= 5:
            break
        print("Edge:", e, "weight:", layer_property[e])

Edge: (0, 1) weight: 1
Edge: (0, 352) weight: 1
Edge: (1, 77) weight: 2
Edge: (1, 106) weight: 1
Edge: (1, 219) weight: 1


In [88]:
def from_gt_to_nx(g, wgt = "distance"):
    g_nx = nx.DiGraph()
    
    for v in g.vertices():
        g_nx.add_node(int(v))
    
    for e in g.edges():
        source = int(e.source())
        target = int(e.target())
        weight = g.ep[wgt][e]  
        g_nx.add_edge(source, target, weight=weight)
        #g_nx.add_edge(target, source, weight=weight) #if undirected graph
    return(g_nx)

In [89]:
g_nx = from_gt_to_nx(g, wgt = "distance")

In [90]:
#average_shortest_path_length = nx.average_shortest_path_length(g_nx, weight='weight', method=None)
#print("Average shortest path length:", average_shortest_path_length)

In [91]:
#len([x for x in g.edge_properties["weight"]])

In [92]:
weights = g.edge_properties["distance"]

In [93]:
np.random.seed(42)

k = 20

nodes = list(g.vertices())

sampled_src = random.choices(nodes, k=k)
sampled_indices_src = [int(v) for v in sampled_src]

sampled_tar = random.choices(nodes, k=k)
sampled_indices_tar = [int(v) for v in sampled_tar]

def avg_short_path(g, weights, sampled_indices_src, sampled_indices_tar):
    lst = []
    num_nodes_sp = []
    inf_counter = 0
    for i in range(len(sampled_indices_src)):
        dist, pred = gt.search.dijkstra_search(g, source = sampled_indices_src[i], weight=weights)
        temp = dist[sampled_indices_tar[i]]

        if temp < np.inf:
            lst.append(temp)

            shortest_path = []
            current = sampled_indices_tar[i]
            while current != sampled_indices_src[i]:
                shortest_path.append(current)
                current = pred[current]
            shortest_path.append(sampled_indices_src[i])
            shortest_path.reverse()
            num_nodes_sp.append(len(shortest_path))
            print(len(shortest_path))
        else:
            inf_counter += 1 
            print(sampled_indices_src[i], sampled_indices_tar[i])
        
    print('unreachable targets:', inf_counter)
        
    return lst,num_nodes_sp



In [94]:
%%time
avg_sp,num_nodes_sp = avg_short_path(g, weights, sampled_indices_src, sampled_indices_tar)

10
8
17
24
12
11
18
14
8
11
13
22
13
27
7
16
17
7
24
11
unreachable targets: 0
CPU times: user 52.5 ms, sys: 5 µs, total: 52.5 ms
Wall time: 51.5 ms


In [95]:
print(np.mean(num_nodes_sp),np.std(num_nodes_sp))

14.5 5.852349955359813


In [96]:
print(np.mean(avg_sp),np.std(avg_sp))

14.35 6.035519861619212


In [97]:
dist_dict = nx.shortest_path_length(g_nx, source=None, target=10, weight='weight', method='dijkstra')

In [98]:
def remove_edges_based_on_scores(graph, dist_dict):
    edges_to_remove = []
    for node in graph.nodes():
        try:
            node_score = dist_dict[node]
            for neighbor in graph.neighbors(node):
                try:
                    neighbor_score = dist_dict[neighbor]
                except KeyError: 
                    neighbor_score = np.inf
                if neighbor_score >= node_score:
                    edges_to_remove.append((node, neighbor))
        except KeyError:
            continue
    for edge in edges_to_remove:
        graph.remove_edge(*edge)

G_copy = copy.deepcopy(g_nx)

remove_edges_based_on_scores(G_copy, dist_dict)

In [99]:
len(G_copy.edges()),len(g_nx.edges())

(398, 860)

In [100]:
#G_copy.edges(data=True)

In [101]:
def neg_edge_weights(graph):
    for source, target, data in graph.edges(data=True):
        if 'weight' not in data:
            raise ValueError(f"No weight attribute found for edge ({source}, {target})")
        data['weight'] = -data['weight']

neg_edge_weights(G_copy)
#G_copy.edges(data=True)

In [102]:
#nx.shortest_path_length(G_copy, source=2668, target=1074, weight='weight', method='dijkstra')

In [103]:
#nx.dijkstra_path_length(G_copy, source=2668, target=1074, weight='weight')

In [104]:
#nx.dijkstra_path(G_copy, source=2668, target=1074, weight='weight')

In [105]:
#nx.shortest_path(G_copy, source=2668, target=1074, weight='weight', method='dijkstra')

In [106]:
#nx.shortest_path(g_nx, source=2668, target=1074, weight='weight', method='dijkstra')

In [123]:
def remove_unreachable_nodes(G, source):
    # Get the set of nodes reachable from the source
    reachable_nodes = nx.descendants(G, source)

    # Find nodes that are not reachable
    non_reachable_nodes = set(G.nodes()) - reachable_nodes

    # Remove non-reachable nodes from the graph
    G.remove_nodes_from(non_reachable_nodes)
    
    return G

In [143]:
def avg_longest_forward_path(g_nx, weights, sampled_indices_src, sampled_indices_tar):
    lst = []
    num_nodes = []
    tot_nodes = []
    inf_counter = 0
    
    for i in range(len(sampled_indices_src)):
        #compute distances from the target
        dist_dict = nx.shortest_path_length(g_nx, source=None, target=sampled_indices_tar[i], weight='weight', method='dijkstra')
        #remove non-forward edges
        G_copy = copy.deepcopy(g_nx)
        remove_edges_based_on_scores(G_copy, dist_dict)
        #make edges negative
        neg_edge_weights(G_copy)
        G2 = copy.deepcopy(G_copy)
        #compute longest forward path
        try:
            #dist = nx.shortest_path_length(G_copy, source=sampled_indices_src[i], target=sampled_indices_tar[i], weight='weight', method='dijkstra')
            #dist1 = nx.dijkstra_path_length(G_copy, source=sampled_indices_src[i], target=sampled_indices_tar[i], weight='weight')
            dist,path = nx.single_source_dijkstra(G_copy, source=sampled_indices_src[i], target=sampled_indices_tar[i], weight='weight')
            lst.append(-dist)
            num_nodes.append(len(path))
            print(len(path))

            gg = remove_unreachable_nodes(G2,sampled_indices_src[i])
            print('here',len(gg.nodes()) + 1) # + 1 for the source
            tot_nodes.append(len(gg.nodes()) + 1)
            
        except:
            inf_counter +=1
            print(sampled_indices_src[i], sampled_indices_tar[i])
    print('unreachable targets:', inf_counter)
    
    return lst,num_nodes,tot_nodes



In [144]:
%%time
longest_forward_paths,num_nodes_fp,tot_nodes = avg_longest_forward_path(g_nx, weights, sampled_indices_src, sampled_indices_tar)

10
here 10
8
here 8
17
here 18
24
here 30
12
here 12
11
here 11
18
here 18
14
here 14
8
here 8
11
here 11
13
here 13
22
here 22
13
here 13
27
here 27
7
here 7
17
here 28
17
here 26
7
here 7
24
here 24
11
here 11
unreachable targets: 0
CPU times: user 145 ms, sys: 7 µs, total: 145 ms
Wall time: 144 ms


In [110]:
print(np.mean(num_nodes_fp),np.std(num_nodes_fp))

14.55 5.8691992639541555


In [111]:
print(np.mean(longest_forward_paths),np.std(longest_forward_paths))

14.35 6.035519861619212


In [122]:
len(G_copy.edges()),len(g_nx.edges())

(398, 860)

In [145]:
print('Data: FLA')
print('Nodes:', len(g_nx.nodes()), 'Edges:', len(g_nx.edges()), 'Weighted: Yes')
print('SP length: Mean:', np.round(np.mean(avg_sp),1),'Std:', np.round(np.std(avg_sp),1))
print('LFP length: Mean:', np.round(np.mean(longest_forward_paths),1), 'Std:', np.round(np.std(longest_forward_paths),1))
print('Visited nodes SP', np.round(np.mean(num_nodes_sp),1), 'Std:', np.round(np.std(num_nodes_sp),1))
print('Visited nodes LFP', np.round(np.mean(num_nodes_fp),1), 'Std:', np.round(np.std(num_nodes_fp),1))
print('Visited nodes Total FP', np.round(np.mean(tot_nodes),1), 'Std:', np.round(np.std(tot_nodes),1))





Data: FLA
Nodes: 369 Edges: 860 Weighted: Yes
SP length: Mean: 14.4 Std: 6.0
LFP length: Mean: 14.4 Std: 6.0
Visited nodes SP 14.5 Std: 5.9
Visited nodes LFP 14.6 Std: 5.9
Visited nodes Total FP 15.9 Std: 7.4


In [113]:
np.array(longest_forward_paths)/np.array(avg_sp)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1.])

In [114]:
avg_sp

[10, 7, 16, 24, 11, 10, 18, 13, 7, 15, 16, 22, 12, 26, 6, 16, 19, 6, 23, 10]

In [115]:

weights = g.edge_properties["weight"]
shortest_path = gt.shortest_path(g, source=0, target=33, weights=weights)


AttributeError: module 'graph_tool' has no attribute 'shortest_path'

In [None]:
np.inf ==np.inf