In [1]:
import networkx as nx
import numpy as np

edge_path = 'data/soc-twitter-follows-mun.csv'
data = np.genfromtxt(edge_path, delimiter=' ', dtype=str, skip_header=1)
data=[list( map(int,i) ) for i in data]
G = nx.DiGraph()
for row in data:
    source_node, target_node = row
    G.add_edge(source_node, target_node)

In [2]:
degrees = dict(G.degree())
print("Number of nodes:", G.number_of_nodes())
print("Number of edges:", G.number_of_edges())
print("\nMinimum degree:", min(degrees.values()))
print("Maximum degree:", max(degrees.values()))
print("Mean Degree:", sum(degrees.values()) / len(G))
degree_sequence = sorted((d for n, d in G.degree()), reverse=True)
node_counts = np.unique(degree_sequence, return_counts=True)
print("Median Degree:", degree_sequence[len(degree_sequence)//2])
print("Mode Degree:", node_counts[0][0])

Number of nodes: 465016
Number of edges: 834797

Minimum degree: 1
Maximum degree: 678
Mean Degree: 3.5904011904966713
Median Degree: 1
Mode Degree: 1


In [3]:
indegrees = dict(G.in_degree())
outdegrees = dict(G.out_degree())
print("Minimum indegree:", min(indegrees.values()))
print("Maximum indegree:", max(indegrees.values()))
print("Mean indegree:", sum(indegrees.values()) / len(G))
indegree_sequence = sorted((d for n, d in G.in_degree()), reverse=True)
node_counts = np.unique(indegree_sequence, return_counts=True)
print("Median indegree:", degree_sequence[len(degree_sequence)//2])
print("Mode indegree:", node_counts[0][0])

print("\nMinimum outdegrees:", min(outdegrees.values()))
print("Maximum outdegrees:", max(outdegrees.values()))
print("Mean outdegree:", sum(outdegrees.values()) / len(G))
outdegree_sequence = sorted((d for n, d in G.out_degree()), reverse=True)
node_counts = np.unique(outdegree_sequence, return_counts=True)
print("Median outdegree:", degree_sequence[len(degree_sequence)//2])
print("Mode outdegree:", node_counts[0][0])

Minimum indegree: 0
Maximum indegree: 199
Mean indegree: 1.7952005952483356
Median indegree: 1
Mode indegree: 0

Minimum outdegrees: 0
Maximum outdegrees: 500
Mean outdegree: 1.7952005952483356
Median outdegree: 1
Mode outdegree: 0


# Centrality
* find major hubs
* degree centrality: network is directly connected to many others
* closeness centrality: close to many others indirectly
* betweenness centrality: key broker between many other nodes

* highly centralized network is one in which a small number of people or organizations have a larger than proportional share of the connections
    * e.g. Do a few organizations serve as a central hub, with outsized influence? Is the network decentralized, with more equal access to the network’s resources and information? 

* reference link: https://visiblenetworklabs.com/2021/04/16/understanding-network-centrality/
* reference link: https://bootcamp.uxdesign.cc/uncovering-hidden-patterns-in-network-data-an-introduction-to-graph-analytics-1d371680928b

## Degree Centrality
* number of connections each node has in the network
* most central node is directly connected; least central node is connected to one other node
* reference link: https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.degree_centrality.html

In [4]:
import math

In [5]:
def degree_centrality_node(G):
    degree_centrality = nx.degree_centrality(G)
    indegree_centrality = nx.in_degree_centrality(G)
    outdegree_centrality = nx.out_degree_centrality(G)

    max_degree_centrality = -math.inf
    max_degree_node = 0

    max_indegree_centrality = -math.inf
    max_indegree_node = 0

    max_outdegree_centrality = -math.inf
    max_outdegree_node = 0

    for i,(k,v) in enumerate(degree_centrality.items()):
        if max_degree_centrality<v:
            max_degree_centrality = v
            max_degree_node = k

    for i,(k,v) in enumerate(indegree_centrality.items()):
        if max_indegree_centrality<v:
            max_indegree_centrality = v
            max_indegree_node = k 
    
    for i,(k,v) in enumerate(outdegree_centrality.items()):
        if max_outdegree_centrality<v:
            max_outdegree_centrality = v
            max_outdegree_node = k 
    
    print(f'Max Degree Centrality Node:',max_degree_node,'Degree Centrality:',degree_centrality[max_degree_node])
    print(f'Max Indegree Centrality Node:',max_indegree_node,'Indegree Centrality:',indegree_centrality[max_indegree_node])
    print(f'Max Outdegree Centrality Node:',max_outdegree_node,'Max Outdegree Centrality:',outdegree_centrality[max_outdegree_node])

In [6]:
degree_centrality_node(G)


Max Degree Centrality Node: 643 Degree Centrality: 0.00145801748330699
Max Indegree Centrality Node: 643 Indegree Centrality: 0.00042794318462845284
Max Outdegree Centrality Node: 3418 Max Outdegree Centrality: 0.0010752341322322935


## Closeness Centrality
* closeness or distance to others in the network
* more central nodes can communicate more quickly and easily with others
* more central nodes have low closeness centrality scores and do not have to travel as far along paths 
* nodes with high closeness centrality scores are less central and have to travel farther along paths
* reference link: https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.closeness_centrality.html#networkx.algorithms.centrality.closeness_centrality

In [7]:
def closeness_centrality_node(G):
    closeness_centrality = nx.closeness_centrality(G)
    
    min_closeness_centrality = math.inf
    min_closeness_node = 0

    for i,(k,v) in enumerate(closeness_centrality.items()):
        if min_closeness_centrality>v:
            min_closeness_centrality = v
            min_closeness_node = k
    
    print(f'Min Closeness Centrality Node:',min_closeness_node,'Closeness Centrality:',closeness_centrality[min_closeness_node])

In [8]:
closeness_centrality_node(G)

Min Closeness Centrality Node: 2555 Closeness Centrality: 0.0


## Betweeness Centrality
* how often a node in the shortest path between two other nodes in the network
* high betweeness score: gatekeepers of information and resources

In [9]:
def betweeness_centrality_node(G):
    betweeness_centrality = nx.betweenness_centrality(G)
    
    max_closeness_centrality = -math.inf
    max_closeness_node = 0

    for i,(k,v) in enumerate(betweeness_centrality.items()):
        if max_closeness_centrality>v:
            max_closeness_centrality = v
            max_closeness_node = k
    
    print(f'Max Betweeness Centrality Node:',max_closeness_node,'Betweeness Centrality:',betweeness_centrality[max_closeness_centrality])

In [10]:
betweeness_centrality_node(G)


KeyboardInterrupt: 

## Eigen Vector
* 

# Vote Rank
* Select a list of influential nodes in a graph using VoteRank algorithm
* reference link: https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.voterank.html#networkx.algorithms.centrality.voterank


In [11]:
vote_rank = nx.voterank(G)
vote_rank

[3418,
 5656,
 6389,
 13854,
 20596,
 30720,
 31542,
 41752,
 64482,
 76541,
 91651,
 114920,
 171825,
 179375,
 13913,
 13922,
 30741,
 35762,
 35842,
 59057,
 78929,
 63054,
 142511,
 209051,
 288580,
 17314,
 29794,
 56900,
 62892,
 249227,
 12942,
 18158,
 51898,
 71200,
 73722,
 174609,
 45999,
 55935,
 108078,
 47977,
 30767,
 24134,
 34313,
 83775,
 100771,
 322641,
 124663,
 52635,
 82392,
 191317,
 207299,
 10021,
 19902,
 49519,
 71331,
 6992,
 329636,
 31919,
 54830,
 216880,
 221923,
 207085,
 196206,
 19691,
 46961,
 204422,
 68082,
 40874,
 97980,
 28719,
 35502,
 81088,
 21023,
 28399,
 222887,
 45065,
 39359,
 25647,
 10269,
 99133,
 44541,
 113159,
 50173,
 69037,
 40851,
 23451,
 66402,
 66690,
 181692,
 50097,
 72332,
 81596,
 3580,
 265080,
 67116,
 126357,
 68135,
 316184,
 32256,
 17076,
 27708,
 60398,
 108250,
 50268,
 66624,
 79415,
 272725,
 276766,
 49072,
 55,
 78318,
 59374,
 50136,
 8828,
 51889,
 47680,
 15149,
 25387,
 37355,
 39975,
 8781,
 30450,
 6592