In [1]:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

import csv

In [2]:
# Import Reddit Hyperlink network
edges = np.loadtxt('data/edges.csv', delimiter=',', dtype=int, skiprows=1)
G = nx.Graph()
G.add_edges_from(edges)

In [3]:
# Print number of nodes in graph
print(len(G.nodes()))

# Print number of edges in graph
print(len(G.edges()))

35776
124330


In [4]:
%%time
# Find degree centrality of each node
degrees = nx.degree_centrality(G)

# Convert to list of tuples, sorted by key (node id)
degrees_list = sorted(degrees.items(), key=lambda x: x[0])

# Save to csv
with open('data/task2/degree_centrality.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['node', 'degree'])
    writer.writerows(degrees_list)

# Print 10 nodes with highest and lowest degree centralities
sorted_degrees = sorted(degrees_list, key=lambda x: x[1], reverse=True)
print(sorted_degrees[:10])
print(sorted_degrees[-1:-11:-1])

[(59, 0.06529699510831585), (41, 0.051404612159329136), (122, 0.0446680642907058), (57, 0.029154437456324248), (225, 0.027561146051712088), (36, 0.026722571628232005), (166, 0.024626135569531794), (42, 0.02280922431865828), (55, 0.02188679245283019), (224, 0.020097833682739343)]
[(35775, 2.795248078266946e-05), (35773, 2.795248078266946e-05), (35772, 2.795248078266946e-05), (35770, 2.795248078266946e-05), (35769, 2.795248078266946e-05), (35768, 2.795248078266946e-05), (35767, 2.795248078266946e-05), (35766, 2.795248078266946e-05), (35765, 2.795248078266946e-05), (35764, 2.795248078266946e-05)]
CPU times: total: 109 ms
Wall time: 136 ms


In [5]:
%%time
# Find eigenvector centrality of each node
eigenvectors = nx.eigenvector_centrality(G)

# Convert to list of tuples, sorted by key (node id)
eigenvectors_list = sorted(eigenvectors.items(), key=lambda x: x[0])

# Save to csv
with open('data/task2/eigenvector_centrality.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['node', 'eigenvector'])
    writer.writerows(eigenvectors_list)

# Print 10 nodes with highest and lowest eigenvector centralities
sorted_eigenvectors = sorted(eigenvectors_list, key=lambda x: x[1], reverse=True)
print(sorted_eigenvectors[:10])
print(sorted_eigenvectors[-1:-11:-1])

[(59, 0.19980016082723698), (122, 0.17251286662845217), (41, 0.16843359927641854), (225, 0.1284885713009235), (36, 0.11509786622734267), (166, 0.11367799569072382), (42, 0.10991787698584359), (156, 0.1058157374989249), (55, 0.10344128462091134), (233, 0.1013885860167151)]
[(35773, 1.5684687372311454e-20), (35772, 1.5684687372311454e-20), (35751, 1.5684687372311454e-20), (35750, 1.5684687372311454e-20), (35720, 1.5684687372311454e-20), (35719, 1.5684687372311454e-20), (35664, 1.5684687372311454e-20), (35663, 1.5684687372311454e-20), (35573, 1.5684687372311454e-20), (35572, 1.5684687372311454e-20)]
CPU times: total: 1.17 s
Wall time: 1.29 s


In [6]:
%%time
# Find closeness centrality of each node
closenesses = nx.closeness_centrality(G)

# Convert to list of tuples, sorted by key (node id)
closenesses_list = sorted(closenesses.items(), key=lambda x: x[0])

# Save to csv
with open('data/task2/closeness_centrality.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['node', 'closeness'])
    writer.writerows(closenesses_list)

# Print 10 nodes with highest and lowest closeness centralities
sorted_closenesses = sorted(closenesses_list, key=lambda x: x[1], reverse=True)
print(sorted_closenesses[:10])
print(sorted_closenesses[-1:-11:-1])

[(59, 0.426222639576908), (41, 0.41633578694267376), (122, 0.41617076668872666), (225, 0.3995045382731404), (166, 0.38703771040360846), (224, 0.38280446477592434), (36, 0.3806447413910621), (42, 0.3798400411260701), (134, 0.3797842258627051), (55, 0.37934257641069025)]
[(35773, 2.795248078266946e-05), (35772, 2.795248078266946e-05), (35751, 2.795248078266946e-05), (35750, 2.795248078266946e-05), (35720, 2.795248078266946e-05), (35719, 2.795248078266946e-05), (35664, 2.795248078266946e-05), (35663, 2.795248078266946e-05), (35573, 2.795248078266946e-05), (35572, 2.795248078266946e-05)]
CPU times: total: 35min 27s
Wall time: 35min 50s


In [7]:
%%time
# Find betweenness centrality of each node
betweennesses = nx.betweenness_centrality(G)

# Convert to list of tuples, sorted by key (node id)
betweennesses_list = sorted(betweennesses.items(), key=lambda x: x[0])

# Save to csv
with open('data/task2/betweenness_centrality.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['node', 'betweenness'])
    writer.writerows(betweennesses_list)

# Print 10 nodes with highest and lowest betweenness centralities
sorted_betweennesses = sorted(betweennesses_list, key=lambda x: x[1], reverse=True)
print(sorted_betweennesses[:10])
print(sorted_betweennesses[-1:-11:-1])

[(59, 0.11404735670207757), (41, 0.08346184105614243), (122, 0.07006046080416123), (57, 0.03947829573368361), (225, 0.034916176682116246), (166, 0.027187257029303344), (0, 0.025493520057664188), (224, 0.023911825838878423), (36, 0.022284293740961147), (556, 0.021842083951689285)]
[(35775, 0.0), (35773, 0.0), (35772, 0.0), (35770, 0.0), (35769, 0.0), (35768, 0.0), (35767, 0.0), (35766, 0.0), (35765, 0.0), (35764, 0.0)]
CPU times: total: 2h 3min 4s
Wall time: 2h 4min 15s
