In [9]:
import pandas as pd
import numpy as np
import networkx as nx
import pickle
from sklearn.neighbors import NearestNeighbors
import random

### **Step 7: Load DownSampled Data**

In [3]:
RoadNetwork = pd.read_csv('derived_data/downsampled_road_network.csv')
airports = pd.read_csv('derived_data/processed_airports.csv')

### **Step 8: Graph Creation**

In [4]:
GRAPH_FILE = 'derived_data/road_airport_graph.pkl'

k_road = 10  # Number of nearest neighbors for road-to-road connections
k_airport = 5  # Number of nearest neighbors for road-to-airport connections

# Build graph if no saved file exists
print("Creating graph from scratch...")
G = nx.Graph()

# Add road network nodes
for i, row in RoadNetwork.iterrows():
    G.add_node(i, pos=(row['X'], row['Y']), population=row['population_density'])

Creating graph from scratch...


#### --- Connect road-to-road nodes using k-nearest neighbors ---

In [5]:
road_coords = RoadNetwork[['X', 'Y']].values
nbrs_road = NearestNeighbors(n_neighbors=k_road + 1, algorithm='ball_tree').fit(road_coords)
distances, indices = nbrs_road.kneighbors(road_coords)

print("Connecting road nodes using k-nearest neighbors...")
for i, neighbors in enumerate(indices):
    for j in neighbors[1:]:  # Exclude the node itself
        G.add_edge(i, j, weight=0.5)

Connecting road nodes using k-nearest neighbors...


#### --- Connect road-to-airport nodes using k-nearest neighbors ---

In [6]:
# Add airport nodes
airport_start = len(RoadNetwork)
for i, airport in airports.iterrows():
    G.add_node(airport_start + i, pos=(airport['lon'], airport['lat']), population=airport['population_density'])

# --- Connect road-to-airport nodes using k-nearest neighbors ---
print("Connecting road-to-airport nodes...")
all_coords = np.vstack((road_coords, airports[['lon', 'lat']].values))
road_airport_indices = range(len(road_coords), len(all_coords))
nbrs_airport = NearestNeighbors(n_neighbors=k_airport, algorithm='ball_tree').fit(all_coords)

for i in road_airport_indices:  # Iterate over airport indices
    distances, neighbors = nbrs_airport.kneighbors([all_coords[i]])
    for j in neighbors[0]:
        if j < len(road_coords):  # Only connect airports to road nodes
            G.add_edge(i, j, weight=1)

Connecting road-to-airport nodes...


#### --- Connect airport-to-airport nodes using random sparse creation ---

In [7]:
print("Connecting airport nodes (fully connected, removing 80% of edges)...")
airport_coords = airports[['lon', 'lat']].values
num_airports = len(airport_coords)

# Step 1: Fully connect all airport nodes
airport_indices = range(airport_start, airport_start + num_airports)
for i in airport_indices:
    for j in airport_indices:
        if i != j:  # Avoid self-loops
            G.add_edge(i, j, weight=1)

# Step 2: Remove 80% of the edges randomly
all_airport_edges = list(G.edges(airport_indices))  # Get all edges involving airports
edges_to_remove = random.sample(all_airport_edges, int(0.98 * len(all_airport_edges)))

print(f"Removing {len(edges_to_remove)} edges out of {len(all_airport_edges)} total edges.")
G.remove_edges_from(edges_to_remove)

# Save graph for future runs
with open(GRAPH_FILE, 'wb') as f:
    pickle.dump(G, f)
print("Graph created and saved.")

Connecting airport nodes (fully connected, removing 80% of edges)...
Removing 29464 edges out of 30066 total edges.
Graph created and saved.


### **Step 9: Graph Evaluation**

In [8]:
def measure_graph_connectivity(G):
    # Average Degree
    degrees = [degree for _, degree in G.degree()]
    avg_degree = sum(degrees) / len(degrees)
    
    # Graph Density
    density = nx.density(G)
    
    # Clustering Coefficient
    avg_clustering = nx.average_clustering(G)
    
    # Average Shortest Path Length (only for connected graphs)
    if nx.is_connected(G):
        avg_shortest_path = nx.average_shortest_path_length(G)
    else:
        avg_shortest_path = None  # Not connected
    
    # Number of Connected Components
    num_connected_components = nx.number_connected_components(G)
    
    # Print results
    print("Graph Connectivity Metrics:")
    print(f"Average Degree: {avg_degree:.2f}")
    print(f"Density: {density:.6f}")
    print(f"Average Clustering Coefficient: {avg_clustering:.4f}")
    if avg_shortest_path:
        print(f"Average Shortest Path Length: {avg_shortest_path:.2f}")
    else:
        print("Graph is not fully connected.")
    print(f"Number of Connected Components: {num_connected_components}")

# Call the function
measure_graph_connectivity(G)

Graph Connectivity Metrics:
Average Degree: 10.63
Density: 0.003873
Average Clustering Coefficient: 0.4991
Graph is not fully connected.
Number of Connected Components: 3
