In [1]:
import networkx as nx
from pyvis.network import Network
import random
import os

In [41]:

def load_random_graph(directory):
    files = [file for file in os.listdir(directory) if file.endswith('.graphml')] # or other format
    random_file = random.choice(files)
    path = os.path.join(directory, random_file)
    print(f'Loading graph from {path}')
    return nx.read_graphml(path)  # Change this according to your graph format

def get_root_nodes(G):
    return [n for n, d in G.in_degree() if d == 0]

def plot_graph_with_sccs(G, sccs):
    net = Network(height='750px', width='100%', bgcolor='#222222', font_color='white', notebook=True)

    # Add nodes and edges
    for node in G.nodes:
        net.add_node(node, label=str(node), title=str(node))

    for edge in G.edges:
        net.add_edge(edge[0], edge[1])

    #loop over all sccs, select a unique color for each scc and color the nodes in the scc
    colors = ['#'+str(hex(random.randint(0, 16777215)))[2:] for i in range(len(sccs))]
    for i, scc in enumerate(sccs):
        for node in scc:
            net.get_node(node)['color'] = colors[i]
            

    # Generate network with specific layout
    net.from_nx(G)
    net.show('graph_viz_sccs.html')

def plot_graph_with_roots(G, roots):
    net = Network(height='750px', width='100%', bgcolor='#222222', font_color='white', notebook=True)

    # Add nodes and edges
    for node in G.nodes:
        net.add_node(node, label=str(node), title=str(node))

    for edge in G.edges:
        net.add_edge(edge[0], edge[1])

    for node in roots:
        net.get_node(node)['color'] = 'red'
            

    # Generate network with specific layout
    net.from_nx(G)
    net.show('graph_viz_sccs.html')

def get_target_nodes(root, G):
    target_nodes = [node for node, attributes in G.nodes(data=True) if attributes['cat'] == '1']
    return target_nodes





In [53]:
directory = '/home/cyril/ssh-rlkex/Generated_Graphs/output/basic/V_6_8_P1/24'
G = load_random_graph(directory)

#compute all sccs
sccs = list(nx.strongly_connected_components(G))



Loading graph from /home/cyril/ssh-rlkex/Generated_Graphs/output/basic/V_6_8_P1/24/26398-1643890740.graphml


In [54]:
#Detect if there are any cycles
if len(sccs) == len(G.nodes):
    print("No cycles detected")
else:
    print("Cycles detected")

Cycles detected


In [55]:
#Compare the number of nodes in the sccs with the number of nodes in the graph
scc_nodes = [node for scc in sccs for node in scc]
print(f'Number of nodes in the graph: {len(G.nodes)}')
print(f'Number of nodes in the sccs: {len(scc_nodes)}')
print(f'Number of SCCs: {len(sccs)}')

Number of nodes in the graph: 2770
Number of nodes in the sccs: 2770
Number of SCCs: 2659


In [56]:
#For each scc, check if it contains a node with no incoming edges to the scc
#this node is the root node of the scc

def get_root_nodes_from_scc(sccs, G):
    """If we consider each scc as a node, then we want to get the sccs that have no incoming edges."""
    root_nodes = []
    for scc in sccs:
        for node in scc:
            if len([n for n in G.predecessors(node) if n not in scc]) == 0:
                #append the first node of the scc that has no incoming edges
                #transform the scc to list
                scc = list(scc)
                root_nodes.append(scc[0])
                break
    return root_nodes

root_nodes = get_root_nodes_from_scc(sccs, G)
print(root_nodes)
plot_graph_with_roots(G, root_nodes)

['n50', 'n1336', 'n1338', 'n1339', 'n1342', 'n1345', 'n1347', 'n1349', 'n1352', 'n1354', 'n1357', 'n1360', 'n1367', 'n1370', 'n1373', 'n1375', 'n1378', 'n1380', 'n1382', 'n1385', 'n1389', 'n1393', 'n1401', 'n1404', 'n1407', 'n1409', 'n1411', 'n1861', 'n1923', 'n1925', 'n1926', 'n1928', 'n1931', 'n1934', 'n1942', 'n1963', 'n2014', 'n2021', 'n2027', 'n2037', 'n2067', 'n2069', 'n2072', 'n2073', 'n2074', 'n2076', 'n2130', 'n2131', 'n2132', 'n2133', 'n2134', 'n2135', 'n2136', 'n2137', 'n2213', 'n2214', 'n2221', 'n2414', 'n2449', 'n2450', 'n2451', 'n2452', 'n2453', 'n2611']
graph_viz_sccs.html


In [58]:
#Loop over all root nodes and make sure that every possible nodes from the graph, not just targets, is reachable from at least one root node
#If there is a node that is not reachable from any root node, then there is a problem

nb_of_reachable_nodes = 0

#loop over all roots, do a bfs from each root and mark all visited nodes, then sum the number of visited nodes
visited_nodes = {}
for root in root_nodes:
    visited_nodes[root] = True
    for node in nx.bfs_tree(G, root).nodes:
        visited_nodes[node] = True

nb_of_reachable_nodes = len(visited_nodes)
print(f'Number of reachable nodes: {nb_of_reachable_nodes}')
print(f'Number of nodes in the graph: {len(G.nodes)}')


Number of reachable nodes: 2770
Number of nodes in the graph: 2770


In [40]:
plot_graph_with_sccs(G, sccs)

graph_viz_sccs.html


In [None]:
import json
G = load_random_graph(directory)
nb_nodes_before = len(G.nodes)
# take all root nodes and the targets, and only keep the nodes that are on the path between them
root_nodes = get_root_nodes(G)
target_nodes = get_target_nodes(root_nodes, G)
print('Root nodes: ', root_nodes)
print('Target nodes: ', target_nodes)
nodes_to_keep = []
for root in root_nodes:
    for target in target_nodes:
        print(f'Computing shortest path between {root} and {target}')
        path = nx.shortest_path(G, root, target)
        nodes_to_keep.extend(path)
G = G.subgraph(nodes_to_keep)

#print number of kept nodes over total number of nodes
print('Kept nodes: ', len(nodes_to_keep))
print('Total nodes: ', nb_nodes_before)


# Convert the graph to a format that Cytoscape.js can understand
cytoscape_json = nx.cytoscape_data(G)['elements']

# Save the graph data to a JSON file
with open('graph_data.json', 'w') as f:
    json.dump(cytoscape_json, f)