In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

In [2]:
from networkx.algorithms.community import greedy_modularity_communities
from pyvis.network import Network

In [None]:
# %pip install pygraphviz
# %pip install pyvis

In [8]:
output_dir = 'D:/Thesis/files_output_dir/output_utilization/'

In [5]:
# Load data from files
network_df = pd.read_csv('D:/Thesis/dataset/Road/Roadnetwork.txt',delimiter='\s+', header=None, index_col=False)
# road feature
feature_df = pd.read_csv('D:/Thesis/dataset/Road/Roadfeature.txt',delimiter='\s+', header=None, index_col=False) 

  network_df = pd.read_csv('D:/Thesis/dataset/Road/Roadnetwork.txt',delimiter='\s+', header=None, index_col=False)
  feature_df = pd.read_csv('D:/Thesis/dataset/Road/Roadfeature.txt',delimiter='\s+', header=None, index_col=False)


In [6]:
network_df.columns =['road id', 'start_node', 'end_node']
# network_df.head()
feature_df.columns =['road id','lng','lanes','max_spd','dir','lev','tor','n_cn','school', 'ofc', 'bnk', 'mall', 'rest', 'gas_st', 'spot', 'hotel',
       'trans', 'living_ser', 'sum_poi']
# feature_df.head()

In [3]:
def create_road_network(network_df):
    # Create a new directed graph
    G = nx.DiGraph()
    
    # Add edges from the DataFrame
    # for _, row in network_df.iterrows():
    #     G.add_edge(row['start_node'], 
    #               row['end_node'], 
    #               road_id=row['road id'])
        
    ## Graph for level-1 and 2
    for _, row in feature_df.iterrows():
        road_id = row['road id']
        length = row['lng']
        max_speed = row['max_spd']
        lanes = row['lanes']
        poi_sum =  row['sum_poi']
        level = row['lev']
        bi_dirrection = row['dir']
        connections = row['n_cn']

        if level<=2: 
            # Find the corresponding road
            feature = network_df[network_df['road id'] == road_id]
            road_id = feature['road id'].values[0]
            start_node = feature['start_node'].values[0]
            end_node = feature['end_node'].values[0]

            G.add_edge(start_node, end_node, road_id=road_id, weight=np.nan)
    
    connected_components_wk_cg = sorted(nx.weakly_connected_components(G), key=len, reverse=True)
    print(len(connected_components_wk_cg[0]))
    # select the larger subgraph
    sub_graph = G.subgraph(connected_components_wk_cg[0]).copy()
    return sub_graph

def analyze_network_properties(G):
  
    print("\nDetailed Network Analysis:")
    
    # Analyze connectivity
    strongly_connected = list(nx.strongly_connected_components(G))
    print(f"\nStrongly connected components: {len(strongly_connected)}")
    if len(strongly_connected) > 0:
        print(f"Largest strongly connected component size: {len(strongly_connected[0])}")
    
    weakly_connected = list(nx.weakly_connected_components(G))
    print(f"Weakly connected components: {len(weakly_connected)}")
    if len(weakly_connected) > 0:
        print(f"Largest weakly connected component size: {len(weakly_connected[0])}")
    
    # Analyze node degrees
    in_degrees = dict(G.in_degree())
    out_degrees = dict(G.out_degree())
    
    print("\nDegree Statistics:")
    print(f"Maximum in-degree: {max(in_degrees.values())}")
    print(f"Maximum out-degree: {max(out_degrees.values())}")
    
    # Find important nodes
    print("\nImportant Nodes:")
    
    # Nodes with highest in-degree (destination hubs)
    high_in_degree = sorted(in_degrees.items(), key=lambda x: x[1], reverse=True)[:5]
    print("\nTop 5 destination nodes (highest in-degree):")
    for node, degree in high_in_degree:
        print(f"Node {node}: {degree} incoming roads")
    
    # Nodes with highest out-degree (source hubs)
    high_out_degree = sorted(out_degrees.items(), key=lambda x: x[1], reverse=True)[:5]
    print("\nTop 5 source nodes (highest out-degree):")
    for node, degree in high_out_degree:
        print(f"Node {node}: {degree} outgoing roads")
    
    # Calculate betweenness centrality for the largest weakly connected component
    if len(weakly_connected) > 0:
        largest_component = weakly_connected[0]
        subgraph = G.subgraph(largest_component)
        betweenness = nx.betweenness_centrality(subgraph)
        
        print("\nTop 5 nodes by betweenness centrality (most important for traffic flow):")
        top_betweenness = sorted(betweenness.items(), key=lambda x: x[1], reverse=True)[:5]
        for node, centrality in top_betweenness:
            print(f"Node {node}: centrality = {centrality:.3f}")

#

In [None]:
def show_graph_stat(G, output_dir=None):
    # Print basic network statistics
    print("\nNetwork Statistics:")
    print(f"Number of nodes: {G.number_of_nodes()}")
    print(f"Number of edges: {G.number_of_edges()}")
    
    # Calculate and print some network metrics
    print("\nNetwork Metrics:")
    print(f"Average in-degree: {sum(dict(G.in_degree()).values()) / G.number_of_nodes():.2f}")
    print(f"Average out-degree: {sum(dict(G.out_degree()).values()) / G.number_of_nodes():.2f}")
    
    try:
        print(f"Average shortest path length: {nx.average_shortest_path_length(G):.2f}")
    except:
        print("Graph is not strongly connected - cannot compute average shortest path length")

    # Find communities
    communities = list(greedy_modularity_communities(G))
    color_map = []
    for node in G:
        for i, community in enumerate(communities):
            if node in community:
                color_map.append(i)

    # Plot graph with community-based coloring
    pos = nx.spring_layout(G, k=0.1)
    nx.draw(G, pos, node_size=10, node_color=color_map, edge_color="gray", with_labels=False, cmap=plt.cm.tab20)
    plt.show()
    
    ###
    # Create Pyvis network
    net = Network(notebook=True)
    net.from_nx(G)
    net.show("graph.html")


    # pos = nx.nx_agraph.graphviz_layout(G, prog='neato')
    # nx.draw(G, pos, node_size=1, edge_color="gray", with_labels=False)
    # plt.show()
    
    # # Visualize the network
    # plt.figure(figsize=(15, 10))
    # # Use spring layout for node positioning
    # pos = nx.spring_layout(G, k=1, iterations=50)
    
    # # Draw nodes
    # nx.draw_networkx_nodes(G, pos, 
    #                       node_color='lightblue',
    #                       node_size=500,
    #                       alpha=0.6)
    
    # # Draw edges with arrows
    # nx.draw_networkx_edges(G, pos,
    #                       edge_color='gray',
    #                       arrows=True,
    #                       arrowsize=20)
    
    # # Add node labels
    # nx.draw_networkx_labels(G, pos)
    
    # # Add edge labels (road IDs)
    # edge_labels = nx.get_edge_attributes(G, 'road_id')
    # nx.draw_networkx_edge_labels(G, pos,
    #                             edge_labels=edge_labels,
    #                             font_size=8)
    
    # plt.title("Road Network Visualization")
    # plt.axis('off')
    
    # # Save figure if output directory is provided
    # if output_dir:
    #     try:
    #         plt.savefig(f'{output_dir}/road_network.png', 
    #                    dpi=300, bbox_inches='tight', format='png')
    #         print(f"\nPlot saved to {output_dir}/road_network.png")
    #     except Exception as e:
    #         print(f"Error saving figure: {str(e)}")
    
    # plt.show()

In [7]:
G = create_road_network(network_df)

21143


In [9]:
nx.write_graphml(G, output_dir+"graph_1.graphml")

In [None]:
show_graph_stat(G)

In [26]:
analyze_network_properties(G)


Detailed Network Analysis:

Strongly connected components: 7161
Largest strongly connected component size: 1
Weakly connected components: 1
Largest weakly connected component size: 21143

Degree Statistics:
Maximum in-degree: 4
Maximum out-degree: 4

Important Nodes:

Top 5 destination nodes (highest in-degree):
Node 36649: 4 incoming roads
Node 2579: 3 incoming roads
Node 3622: 3 incoming roads
Node 3987: 3 incoming roads
Node 3993: 3 incoming roads

Top 5 source nodes (highest out-degree):
Node 20687: 4 outgoing roads
Node 2517: 3 outgoing roads
Node 2518: 3 outgoing roads
Node 2600: 3 outgoing roads
Node 2890: 3 outgoing roads

Top 5 nodes by betweenness centrality (most important for traffic flow):
Node 68489: centrality = 0.044
Node 68488: centrality = 0.044
Node 5018: centrality = 0.042
Node 20620: centrality = 0.041
Node 20621: centrality = 0.041
