## Problem1

Calculate betweenness, closeness, eigenvector and pagerank centrality of all nodes in the graph of the example


<img src="ex.png"> 

In [None]:
import networkx as nx

In [None]:
edges = [(1,2),(2,3),(1,3),(3,6),(3,5),(3,4),(4,5),(5,6),(5,7),(6,7),(7,8),(8,9),(8,10)]
G = nx.Graph()
G.add_edges_from(edges)

In [None]:
betw = nx.betweenness_centrality(G, normalized=False)
betw

In [None]:
n = G.number_of_nodes()
cl = [(x[0],x[1]/(n-1)) for x in nx.closeness_centrality(G).items()]
cl

In [None]:
eig = nx.eigenvector_centrality(G)
eig

In [None]:
pr = nx.pagerank(G, alpha=0.85)
pr

## Problem 2

Analyze closeness centrality of the line graph on 50 nodes

In [None]:
n=50
G = nx.Graph()
G.add_node(0)
for i in range(1,n):
    G.add_edge(i-1,i)

In [None]:
c = [(x[0],x[1]/(n-1)) for x in nx.closeness_centrality(G).items()]
top10cl = sorted(c,key = lambda x: x[1], reverse=True)[:10]
top10cl

In [None]:
cc = {}
for i in range(n):
    j = i + 1 # i starts from 0
    sum_dist = (2*j**2+n**2-2*n*j-2*j+n)/2
    cc[i] = (c[i][1],1/sum_dist)
    
cc

In [None]:
G_w = nx.Graph()
G_w.add_node(0)
for i in range(1,n):
    G_w.add_edge(i-1,i,dist = i**2)

In [None]:
c_w = [(x[0],x[1]/(n-1)) for x in nx.closeness_centrality(G_w,distance = "dist").items()]
top10cl_w = sorted(c_w,key = lambda x: x[1], reverse=True)[:10]
top10cl_w

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot([x[0] for x in c_w], [x[1] for x in c_w])

## Problem 4

Analyze the part ru.wikipedia.org devoted to the Russian cities. Each line in links.txt contains information about one edge (hyper-link): two nodes ides devided by \t. pages.txt contains names of web-pages indexed by ids in links.txt. Find the top 10 nodes by different centrality measures.

In [None]:
import networkx as nx
import matplotlib.pyplot as plt

In [None]:
with open("links.txt", "r") as f:
    G = nx.DiGraph()
    for line in f:
        u, v = line.strip().split('\t')
        G.add_edge(u, v)

In [None]:
page2link = {}
with open("pages.txt", "r", encoding="utf8") as f:
    for line in f:
        page, link = line.strip().split('\t')
        page2link[page] = link

In [None]:
def top_names(node_to_rank, names, top = 10):
    dict_sorted = sorted(node_to_rank.items(), key = lambda x:x[1], reverse=True)
    return([names[k] for k,_ in dict_sorted[:top]])

In [None]:
n = G.number_of_edges()

In [None]:
bc = nx.betweenness_centrality(G, normalized=False)

In [None]:
cl = {x[0]:x[1]/(n-1) for x in nx.closeness_centrality(G).items()}

In [None]:
eig = nx.eigenvector_centrality(G)

In [None]:
pr = nx.pagerank(G, alpha=0.85)

In [None]:
top_names(bc,page2link)

In [None]:
top_names(cl,page2link)

In [None]:
top_names(eig,page2link)

In [None]:
top_names(pr, page2link)

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.loglog(bc.values(),pr.values(), ls = 'None', marker = 'o')

In [None]:
plt.loglog(dict(G.degree()).values(),pr.values(), ls='None', marker = 'o')

In [None]:
plt.loglog(eig.values(),pr.values(), ls='None', marker = 'o')

In [None]:
plt.loglog(cl.values(),pr.values(), ls='None', marker = 'o')

In [None]:
n, bins, _ = plt.hist(bc.values(), bins = 100, histtype = 'step', density=True)
plt.yscale('log')
plt.xscale('log')
plt.xlabel('Betweenness, log')
plt.ylabel('Density')

In [None]:
mids = [(bins[i] + bins[i+1])/2 for i in range(len(bins)-1)]
plt.loglog(mids,n, ls = 'None', marker = 'o')
plt.xlabel('Betweenness, log')
plt.ylabel('Density')

In [None]:
n, bins, _ = plt.hist(pr.values(), bins = 100, histtype = 'step', density=True)
mids = [(bins[i] + bins[i+1])/2 for i in range(len(bins)-1)]
plt.figure()
plt.loglog(mids,n, ls = 'None', marker = 'o')
plt.xlabel('PageRank, log')
plt.ylabel('Density')

In [None]:
n, bins, _ = plt.hist(cl.values(), bins = 100, histtype = 'step', density=True)
mids = [(bins[i] + bins[i+1])/2 for i in range(len(bins)-1)]
plt.figure()
plt.plot(mids,n, ls = 'None', marker = 'o')
plt.xlabel('Closeness')
plt.ylabel('Density')

In [None]:
n, bins, _ = plt.hist(eig.values(), bins = 100, histtype = 'step', density=True)
mids = [(bins[i] + bins[i+1])/2 for i in range(len(bins)-1)]
plt.figure()
plt.loglog(mids,n, ls = 'None', marker = 'o')
plt.xlabel('Eigenvector, log')
plt.ylabel('Density')

In [None]:
wcc_list = list(nx.weakly_connected_components(G))
wcc_list = sorted(wcc_list, key = len, reverse=True)
scc_list = list(nx.strongly_connected_components(G))
scc_list = sorted(scc_list, key = len, reverse=True)
print(f'number of weakly connected componets = {len(wcc_list)}')
print(f'size of GWCC = {len(wcc_list[0])/len(G.nodes)}')
print(f'size of second WCC = {len(wcc_list[1])/len(G.nodes)}')
print(f'number of strongly connected componets = {len(scc_list)}')
print(f'size of GSCC = {len(scc_list[0])/len(G.nodes)}')
print(f'size of second SCC = {len(scc_list[1])/len(G.nodes)}')

In [None]:
n = G.number_of_nodes()
nodes_in_gwcc = list(wcc_list[0])
nodes_in_gscc = list(scc_list[0])

in_nodes = []
out_nodes = []

for node in nodes_in_gwcc:
    if not node in nodes_in_gscc:
        if nx.has_path(G, node, nodes_in_gscc[0]):
            in_nodes.append(node)
        elif nx.has_path(G, nodes_in_gscc[0], node):
            out_nodes.append(node)

print(f'In component size = {len(in_nodes)/n}')
print(f'Out component size = {len(out_nodes)/n}')
print(f'In + Out + SCC size to WCC = {(len(out_nodes) + len(in_nodes) + len(nodes_in_gscc))/len(nodes_in_gwcc)}')

In [None]:
max([eig[node] for node in in_nodes])