### IMPORTS

In [None]:
# IMPORTS=
import networkx as nx
import matplotlib.pyplot as plt
import folium

# MY_IMPORTS=
from src.Models import Graph as G
from src.Models import NXGraph as NXG

### GRAPH AND NXGRAPH OBJECTS INITIALIZATION + METRICS EXTRACTION

In [None]:
graph = G.Graph('resources/data/input/railway.csv')
graph.print_attributes()
graph.print_metrics()

In [None]:
gnx = NXG.nxgraph_loader(graph)
NXG.print_metrics(gnx)

### GRAPH CONVERSION

In [None]:
# SAVE THE GRAPH AS A JSON FILE:
graph.toJSON('resources/data/output/graph.json')

# LOAD THE GRAPH FROM A JSON FILE:
# ...

In [None]:
# SAVE THE NETWORKX GRAPH AS A GML STRING
gml_string = NXG.nx_to_gml_string(gnx)

# SAVE THE NETWORKX GRAPH AS A GML FILE
NXG.nx_to_gml_file(gnx, "resources/data/output/nxgraph.gml")

# LOAD THE NETWORKX GRAPH FROM A GML STRING
gnx = NXG.gml_file_to_nx('resources/data/output/nxgraph.gml')
# or gnx = NXG.gml_string_to_nx(gml_string)

### METRICS VISUALIZATION

In [None]:
components = list(nx.connected_components(gnx))
amount_of_components = len(components)
print('Amount of components in the NetworkX graph: ' + str(amount_of_components))
# Every 3 components, make a figure with the 3 components:
for i in range(0, amount_of_components, 3):
    fig, ax = plt.subplots(1, 3, figsize=(20, 5))
    for j in range(3):
        if i + j < amount_of_components:
            subgraph = gnx.subgraph(components[i + j])
            pos = {node[0]: (node[1]['lon'], node[1]['lat']) for node in subgraph.nodes(data=True)}
            nx.draw_networkx_nodes(subgraph, pos, node_size=1, node_color='red', ax=ax[j])
            nx.draw_networkx_edges(subgraph, pos, edge_color='black', width=0.2, ax=ax[j])
            ax[j].set_title('Component ' + str(i + j))
    plt.show()

In [None]:
# Degree distribution:
degree_sequence = sorted([d for n, d in gnx.degree()], reverse=True)
degreeCount = {}
for degree in degree_sequence:
    if degree in degreeCount:
        degreeCount[degree] += 1
    else:
        degreeCount[degree] = 1
deg, cnt = zip(*degreeCount.items())
fig, ax = plt.subplots()
plt.bar(deg, cnt, width=0.80, color='b')
plt.title("Degree Histogram")
plt.ylabel("Count")
plt.xlabel("Degree")
ax.set_xticks([d + 0.4 for d in deg])
ax.set_xticklabels(deg)
plt.show()

### VISUALIZATION


In [None]:
# Using nodes lat and lon as positions, draw the graph
pos = {node[0]: (node[1]['lon'], node[1]['lat']) for node in gnx.nodes(data=True)}
nx.draw_networkx_nodes(gnx, pos, node_size=1, node_color='red')
nx.draw_networkx_edges(gnx, pos, edge_color='black', width=0.2)
# plt.show()

In [None]:
china_json_path = 'resources/data/input/china.json'
# Plot the graph on top of the map of China using geopandas:
import geopandas as gpd
china_map = gpd.read_file(china_json_path)
china_map.plot()
# Plot the graph on top of it:
pos = {node[0]: (node[1]['lon'], node[1]['lat']) for node in gnx.nodes(data=True)}
nx.draw_networkx_nodes(gnx, pos, node_size=1, node_color='red')
nx.draw_networkx_edges(gnx, pos, edge_color='black', width=0.2)
plt.show()

In [None]:
# Create a NetworkX graph
gnx = nx.Graph()
for node in graph.nodes:
    gnx.add_nodes_from([(node.id, {'lat': node.position.lat, 'lon': node.position.lon})])
for edge in graph.edges:
    gnx.add_edges_from([(edge.fromNode.id, edge.destNode.id, {'fromLat': edge.fromNode.position.lat, 'fromLon': edge.fromNode.position.lon, 'destLat': edge.destNode.position.lat, 'destLon': edge.destNode.position.lon})])
pos = {node[0]: (node[1]['lon'], node[1]['lat']) for node in gnx.nodes(data=True)}
nx.draw_networkx_nodes(gnx, pos, node_size=0.2, node_color='grey')
nx.draw_networkx_edges(gnx, pos, edge_color='black', width=0.2)


# Sort the df by the st_no:
graph.df.sort_values(by=['st_no'], inplace=True, ascending=False)
top20trains = []
for train in graph.df['train'].unique():
    top20trains.append(train)
    if len(top20trains) == 20:
        break
graph.df.sort_values(by=['train', 'st_no'], inplace=True, ascending=True)
current_train = ''
current_color = ''
top20paths = []
edges = []
for index, row in graph.df.iterrows():
    if row.train in top20trains:
        if row.train != current_train:
            top20paths.append(edges)
            edges = []
            current_train = row.train
        if not (row['stay_time'] == '-' and graph.df.iloc[index - 1]['stay_time'] == '-') and (row['train'] == graph.df.iloc[index - 1]['train']):
            fromNode = G.Node(graph.df.iloc[index - 1]['st_id'], G.Position(graph.df.iloc[index - 1]['lat'], graph.df.iloc[index - 1]['lon']), [])
            destNode = G.Node(row['st_id'], G.Position(row['lat'], row['lon']), [])
            edge = G.Edge(index, fromNode, destNode, row['mileage'], 0)
            edges.append(edge)
top20paths.pop(0)

networkx_colors = ['yellow', 'green', 'cyan', 'red']
for i in range(len(top20paths)):
    gnx2 = nx.Graph()
    color = networkx_colors[i%len(networkx_colors)]
    for edge in top20paths[i]:
        gnx2.add_nodes_from([(edge.fromNode.id, {'lat': edge.fromNode.position.lat, 'lon': edge.fromNode.position.lon})])
        gnx2.add_nodes_from([(edge.destNode.id, {'lat': edge.destNode.position.lat, 'lon': edge.destNode.position.lon})])
        gnx2.add_edges_from([(edge.fromNode.id, edge.destNode.id, {'fromLat': edge.fromNode.position.lat, 'fromLon': edge.fromNode.position.lon, 'destLat': edge.destNode.position.lat, 'destLon': edge.destNode.position.lon})])
    pos2 = {node[0]: (node[1]['lon'], node[1]['lat']) for node in gnx2.nodes(data=True)}
    nx.draw_networkx_nodes(gnx2, pos2, node_size=2, node_color=color)
    nx.draw_networkx_edges(gnx2, pos2, edge_color=color, width=1)
plt.show()

# FOLIUM

In [None]:
# Create a map of China
map = folium.Map(tiles='Stamen Terrain', width='100%', height='100%', zoom_start=6, location=[35, 105])
nodes = folium.FeatureGroup(name='Nodes')
edges = folium.FeatureGroup(name='Edges')

# ADD NODES TO FOLIUM:
for node in graph.nodes:
    # If the node has no position, do not add it to the map:
    if node.position is None:
        continue
    # Add a marker for each node:
    popup = folium.Popup('<b>Node ID:</b> ' + str(node.id), max_width=200)
    nodes.add_child(folium.CircleMarker(location=[node.position.lat, node.position.lon], radius=7, popup=popup, fill_color='red', color='black', fill_opacity=0.7))

# ADD EDGES TO FOLIUM:
for edge in graph.edges:
    ptA = [edge.fromNode.position.lat, edge.fromNode.position.lon]
    ptB = [edge.destNode.position.lat, edge.destNode.position.lon]
    # If either of the nodes has no position, do not add the edge to the map:
    if None in ptA or None in ptB:
        continue
    # Add a line for each edge:
    edges.add_child(folium.PolyLine(locations=[ptA, ptB], weight=1, color='black', opacity=0.3))

# Add the feature groups to the map:
map.add_child(edges)
map.add_child(nodes)
# Add a layer control to the map:
folium.LayerControl().add_to(map)
# Save the map to an HTML file:
map.save('resources/data/output/map.html')

# CLUSTERING

In [None]:
# Create a NetworkX graph
gnx = nx.Graph()
for node in graph.nodes:
    gnx.add_nodes_from([(node.id, {'lat': node.position.lat, 'lon': node.position.lon})])
for edge in graph.edges:
    gnx.add_edges_from([(edge.fromNode.id, edge.destNode.id, {'fromLat': edge.fromNode.position.lat, 'fromLon': edge.fromNode.position.lon, 'destLat': edge.destNode.position.lat, 'destLon': edge.destNode.position.lon})])
pos = {node[0]: (node[1]['lon'], node[1]['lat']) for node in gnx.nodes(data=True)}

# Cluster the NetworkX graph nodes using KMeans, and plot the clusters:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=10, random_state=0).fit(list(pos.values()))
clusters = kmeans.predict(list(pos.values()))
nx.draw_networkx_nodes(gnx, pos, node_size=1, node_color=clusters, cmap=plt.cm.jet)
nx.draw_networkx_edges(gnx, pos, edge_color='black', width=0.2)
plt.show()

In [None]:
# Cluster the NetworkX graph nodes using DBSCAN, and plot the clusters:
from sklearn.cluster import DBSCAN
dbscan = DBSCAN(eps=0.5, min_samples=10).fit(list(pos.values()))
clusters = dbscan.labels_
nx.draw_networkx_nodes(gnx, pos, node_size=1, node_color=clusters, cmap=plt.cm.jet)
nx.draw_networkx_edges(gnx, pos, edge_color='black', width=0.2)
plt.show()

In [None]:
# Cluster the NetworkX graph nodes using AgglomerativeClustering, and plot the clusters:
from sklearn.cluster import AgglomerativeClustering
agg = AgglomerativeClustering(n_clusters=10).fit(list(pos.values()))
clusters = agg.labels_
nx.draw_networkx_nodes(gnx, pos, node_size=1, node_color=clusters, cmap=plt.cm.jet)
nx.draw_networkx_edges(gnx, pos, edge_color='black', width=0.2)
plt.show()

In [None]:
# Cluster the NetworkX graph nodes using SpectralClustering, and plot the clusters:
from sklearn.cluster import SpectralClustering
spec = SpectralClustering(n_clusters=10, affinity='nearest_neighbors').fit(list(pos.values()))
clusters = spec.labels_
nx.draw_networkx_nodes(gnx, pos, node_size=1, node_color=clusters, cmap=plt.cm.jet)
nx.draw_networkx_edges(gnx, pos, edge_color='black', width=0.2)
plt.show()

# DEGREE CENTRALITY

In [None]:
# Find the node with the highest degree centrality:
from networkx.algorithms.centrality import degree_centrality
degree_centrality = degree_centrality(gnx)
max_degree_centrality = max(degree_centrality.values())
max_degree_centrality_node = [node for node in degree_centrality if degree_centrality[node] == max_degree_centrality][0]
print('Node with highest degree centrality:', max_degree_centrality_node)
# Remove the node with the highest degree centrality from the graph:
gnx2 = gnx.copy()
print('Amount of nodes in the NetworkX graph: ' + str(gnx2.number_of_nodes()) + ', before removing the node with the highest degree centrality.')
gnx2.remove_node(max_degree_centrality_node)
print('Amount of nodes in the NetworkX graph: ' + str(gnx2.number_of_nodes()) + ', after removing the node with the highest degree centrality.')
# Plot the all subgraphs:
components = list(nx.connected_components(gnx2))
amount_of_components = len(components)
print('Amount of components in the NetworkX graph: ' + str(amount_of_components))
for i in range(0, amount_of_components, 3):
    fig, ax = plt.subplots(1, 3, figsize=(20, 5))
    for j in range(3):
        if i + j < amount_of_components:
            subgraph = gnx2.subgraph(components[i + j])
            pos = {node[0]: (node[1]['lon'], node[1]['lat']) for node in subgraph.nodes(data=True)}
            nx.draw_networkx_nodes(subgraph, pos, node_size=1, node_color='red', ax=ax[j])
            nx.draw_networkx_edges(subgraph, pos, edge_color='black', width=0.2, ax=ax[j])
            ax[j].set_title('Component ' + str(i + j))
    plt.show()

In [None]:
# Plot the graph with the node with the highest degree centrality in green:
pos = {node[0]: (node[1]['lon'], node[1]['lat']) for node in gnx.nodes(data=True)}
nx.draw_networkx_nodes(gnx, pos, nodelist=gnx.nodes, node_size=1, node_color='grey')
nx.draw_networkx_nodes(gnx, pos, nodelist=[max_degree_centrality_node], node_size=300, node_color='red', node_shape='*', edgecolors='white', linewidths=1, label='Node with highest degree centrality')
nx.draw_networkx_edges(gnx, pos, edge_color='black', width=0.2)

# BETWEENNESS CENTRALITY

In [None]:
# Find the node with the highest betweenness centrality:
from networkx.algorithms.centrality import betweenness_centrality
betweenness_centrality = betweenness_centrality(gnx)
max_betweenness_centrality = max(betweenness_centrality.values())
max_betweenness_centrality_node = [node for node in betweenness_centrality if betweenness_centrality[node] == max_betweenness_centrality][0]
print('Node with highest betweenness centrality:', max_betweenness_centrality_node)

In [None]:
# Remove the node with the highest betweenness centrality from the graph:
gnx3 = gnx.copy()
print('Amount of nodes in the NetworkX graph: ' + str(gnx3.number_of_nodes()) + ', before removing the node with the highest betweenness centrality.')
gnx3.remove_node(max_betweenness_centrality_node)
print('Amount of nodes in the NetworkX graph: ' + str(gnx3.number_of_nodes()) + ', after removing the node with the highest betweenness centrality.')
# Plot the all subgraphs:
components = list(nx.connected_components(gnx3))
amount_of_components = len(components)
print('Amount of components in the NetworkX graph: ' + str(amount_of_components))
for i in range(0, amount_of_components, 3):
    fig, ax = plt.subplots(1, 3, figsize=(20, 5))
    for j in range(3):
        if i + j < amount_of_components:
            subgraph = gnx3.subgraph(components[i + j])
            pos = {node[0]: (node[1]['lon'], node[1]['lat']) for node in subgraph.nodes(data=True)}
            nx.draw_networkx_nodes(subgraph, pos, node_size=1, node_color='red', ax=ax[j])
            nx.draw_networkx_edges(subgraph, pos, edge_color='black', width=0.2, ax=ax[j])
            ax[j].set_title('Component ' + str(i + j))
    plt.show()

In [None]:
# Plot the graph with the node with the highest betweenes centrality:
pos = {node[0]: (node[1]['lon'], node[1]['lat']) for node in gnx.nodes(data=True)}
nx.draw_networkx_nodes(gnx, pos, nodelist=gnx.nodes, node_size=1, node_color='grey')
nx.draw_networkx_nodes(gnx, pos, nodelist=[max_betweenness_centrality_node], node_size=300, node_color='red', node_shape='*', edgecolors='white', linewidths=1, label='Node with highest betweenness centrality')
nx.draw_networkx_edges(gnx, pos, edge_color='black', width=0.2)

In [None]:
# Find the node with the highest closeness centrality:
from networkx.algorithms.centrality import closeness_centrality
closeness_centrality = closeness_centrality(gnx)
max_closeness_centrality = max(closeness_centrality.values())
max_closeness_centrality_node = [node for node in closeness_centrality if closeness_centrality[node] == max_closeness_centrality][0]
print('Node with highest closeness centrality:', max_closeness_centrality_node)

In [None]:
# Remove the node with the highest closeness centrality from the graph:
gnx4 = gnx.copy()
print('Amount of nodes in the NetworkX graph: ' + str(gnx4.number_of_nodes()) + ', before removing the node with the highest closeness centrality.')
gnx4.remove_node(max_closeness_centrality_node)
print('Amount of nodes in the NetworkX graph: ' + str(gnx4.number_of_nodes()) + ', after removing the node with the highest closeness centrality.')
# Plot the all subgraphs:
components = list(nx.connected_components(gnx4))
amount_of_components = len(components)
print('Amount of components in the NetworkX graph: ' + str(amount_of_components))
for i in range(0, amount_of_components, 3):
    fig, ax = plt.subplots(1, 3, figsize=(20, 5))
    for j in range(3):
        if i + j < amount_of_components:
            subgraph = gnx4.subgraph(components[i + j])
            pos = {node[0]: (node[1]['lon'], node[1]['lat']) for node in subgraph.nodes(data=True)}
            nx.draw_networkx_nodes(subgraph, pos, node_size=1, node_color='red', ax=ax[j])
            nx.draw_networkx_edges(subgraph, pos, edge_color='black', width=0.2, ax=ax[j])
            ax[j].set_title('Component ' + str(i + j))
    plt.show()

In [None]:
# Plot the graph with the node with the highest closeness centrality:
pos = {node[0]: (node[1]['lon'], node[1]['lat']) for node in gnx.nodes(data=True)}
nx.draw_networkx_nodes(gnx, pos, nodelist=gnx.nodes, node_size=1, node_color='grey')
nx.draw_networkx_nodes(gnx, pos, nodelist=[max_closeness_centrality_node], node_size=300, node_color='red', node_shape='*', edgecolors='white', linewidths=1, label='Node with highest closeness centrality')
nx.draw_networkx_edges(gnx, pos, edge_color='black', width=0.2)

We notice that the node with the highest close centrality is the same as the node with the highest betweenness centrality.

In [None]:
# Sum up the centrality measures:
print('Node with highest degree centrality:', max_degree_centrality_node)
print('Node with highest betweenness centrality:', max_betweenness_centrality_node)
print('Node with highest closeness centrality:', max_closeness_centrality_node)