In [41]:
import networkx as nx
import numpy as np

edge_path = '../data/sub_graph_5.csv'
data = np.genfromtxt(edge_path, delimiter=',', dtype=str)
data=[list( map(int,i) ) for i in data]
G = nx.DiGraph()
for row in data:
    source_node, target_node = row
    G.add_edge(source_node, target_node)

# Centrality
* find major hubs
* degree centrality: network is directly connected to many others
* closeness centrality: close to many others indirectly
* betweenness centrality: key broker between many other nodes

* highly centralized network is one in which a small number of people or organizations have a larger than proportional share of the connections
    * e.g. Do a few organizations serve as a central hub, with outsized influence? Is the network decentralized, with more equal access to the network’s resources and information? 

* reference link: https://visiblenetworklabs.com/2021/04/16/understanding-network-centrality/
* reference link: https://bootcamp.uxdesign.cc/uncovering-hidden-patterns-in-network-data-an-introduction-to-graph-analytics-1d371680928b

## Degree Centrality
* number of connections each node has in the network
* most central node is directly connected; least central node is connected to one other node
* reference link: https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.degree_centrality.html

In [42]:
import math

In [43]:
def degree_centrality_node(G):
    degree_centrality = nx.degree_centrality(G)
    indegree_centrality = nx.in_degree_centrality(G)
    outdegree_centrality = nx.out_degree_centrality(G)

    max_degree_centrality = -math.inf
    max_degree_node = 0

    max_indegree_centrality = -math.inf
    max_indegree_node = 0

    max_outdegree_centrality = -math.inf
    max_outdegree_node = 0

    for i,(k,v) in enumerate(degree_centrality.items()):
        if max_degree_centrality<v:
            max_degree_centrality = v
            max_degree_node = k

    for i,(k,v) in enumerate(indegree_centrality.items()):
        if max_indegree_centrality<v:
            max_indegree_centrality = v
            max_indegree_node = k 
    
    for i,(k,v) in enumerate(outdegree_centrality.items()):
        if max_outdegree_centrality<v:
            max_outdegree_centrality = v
            max_outdegree_node = k 
    
    print(f'Max Degree Centrality Node:',max_degree_node,'Degree Centrality:',degree_centrality[max_degree_node])
    print(f'Max Indegree Centrality Node:',max_indegree_node,'Indegree Centrality:',indegree_centrality[max_indegree_node])
    print(f'Max Outdegree Centrality Node:',max_outdegree_node,'Max Outdegree Centrality:',outdegree_centrality[max_outdegree_node])

In [44]:
degree_centrality_node(G)


Max Degree Centrality Node: 291898 Degree Centrality: 0.25725725725725723
Max Indegree Centrality Node: 9107 Indegree Centrality: 0.013013013013013013
Max Outdegree Centrality Node: 291898 Max Outdegree Centrality: 0.25625625625625625


## Closeness Centrality
* closeness or distance to others in the network
* more central nodes can communicate more quickly and easily with others
* more central nodes have low closeness centrality scores and do not have to travel as far along paths 
* nodes with high closeness centrality scores are less central and have to travel farther along paths
* reference link: https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.closeness_centrality.html#networkx.algorithms.centrality.closeness_centrality

In [45]:
def closeness_centrality_node(G):
    closeness_centrality = nx.closeness_centrality(G)
    
    min_closeness_centrality = math.inf
    min_closeness_node = 0

    for i,(k,v) in enumerate(closeness_centrality.items()):
        if min_closeness_centrality>v:
            min_closeness_centrality = v
            min_closeness_node = k
    
    print(f'Min Closeness Centrality Node:',min_closeness_node,'Closeness Centrality:',closeness_centrality[min_closeness_node])

In [46]:
closeness_centrality_node(G)

Min Closeness Centrality Node: 4135 Closeness Centrality: 0.0


## Betweeness Centrality
* how often a node in the shortest path between two other nodes in the network
* high betweeness score: gatekeepers of information and resources

In [47]:
def betweeness_centrality_node(G):
    betweeness_centrality = nx.betweenness_centrality(G)
    
    max_closeness_centrality = -math.inf
    max_closeness_node = 0

    for i,(k,v) in enumerate(betweeness_centrality.items()):
        if max_closeness_centrality>v:
            max_closeness_centrality = v
            max_closeness_node = k
    if max_closeness_centrality == -math.inf:
        print(f'Max Betweeness Centrality Node Does Not Exist')
    else:
        print(f'Max Betweeness Centrality Node:',max_closeness_node,'Betweeness Centrality:',betweeness_centrality[max_closeness_centrality])

In [48]:
betweeness_centrality_node(G)


Max Betweeness Centrality Node Does Not Exist


## Eigen Vector
* computes the centrality for a node based on the centrality of its neighbors
* reference link: https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.eigenvector_centrality.html


# Vote Rank
* Select a list of influential nodes in a graph using VoteRank algorithm
* reference link: https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.voterank.html#networkx.algorithms.centrality.voterank


In [49]:
vote_rank = nx.voterank(G)
vote_rank

[291898,
 106436,
 33404,
 212376,
 52737,
 46179,
 64874,
 14602,
 42685,
 59157,
 4135,
 102646,
 27272,
 35479,
 151332,
 151460]