## Problem 1
Analyze betweenness centrality of the line graph on 50 nodes

In [None]:
import networkx as nx

In [None]:
n = 50

In [None]:
G = nx.Graph()
G.add_node(0)
for i in range(1,n):
    G.add_edge(i-1,i)

In [None]:
b = nx.betweenness_centrality(G, normalized=False)
b

In [None]:
bb = {}
for i in range(n):
    j = i + 1 # i starts from 0
    bb[i] = (b[i],(j-1)*(n - j))
    
bb

## Problem 3 
Calculate betweenness of all nodes in the graph of the example

In [None]:
edges = [(1,2),(2,3),(1,3),(3,6),(3,5),(3,4),(4,5),(5,6),(5,7),(6,7),(7,8),(8,9),(8,10)]

In [None]:
G = nx.Graph()

In [None]:
G.add_edges_from(edges)

In [None]:
betw = nx.betweenness_centrality(G, normalized=False)

In [None]:
betw

## Problem 4
Analyze the part ru.wikipedia.org devoted to the Russian cities. 
Each line in links.txt contains information about one edge (hyper-link): two nodes ides devided by \t. pages.txt contains names of web-pages indexed by ids in links.txt. Find the top 10 nodes by betweenness centrality.

In [None]:
with open("links.txt", "r") as f:
    G = nx.DiGraph()
    for line in f:
        u, v = line.strip().split('\t')
        G.add_edge(u, v)

In [None]:
page2link = {}
with open("pages.txt", "r", encoding="utf8") as f:
    for line in f:
        page, link = line.strip().split('\t')
        page2link[page] = link

In [None]:
import time
import numpy as np

In [None]:
start = time.time()
bc = nx.betweenness_centrality(G, normalized=False)
end = time.time()
print(f'elapsed time {end - start}')

In [None]:
print(f'mean bc = {np.mean(list(bc.values()))}')

In [None]:
len(G.nodes)

In [None]:
bc_sorted = sorted(bc.items(), key = lambda x:x[1], reverse=True)

In [None]:
bc_sorted[:10]

In [None]:
i = 0
for k,v in bc_sorted[:10]:
    i += 1
    print(f'{i}:{page2link[k]}')

In [None]:
start = time.time()
bc_approx = nx.betweenness_centrality(G, normalized=False, k = 100)
end = time.time()
print(f'elapsed time {end - start}')
print(f'mean bc (approx) = {np.mean(list(bc_approx.values()))}')
bc_approx_sorted = sorted(bc_approx.items(), key = lambda x:x[1], reverse=True)
i = 0
for k,v in bc_approx_sorted[:10]:
    i += 1
    print(f'{i}:{page2link[k]}')

In [None]:
degrees = G.degree()

In [None]:
degrees_sorted = sorted(dict(degrees).items(), key = lambda x:x[1], reverse=True)

In [None]:
degrees_sorted[:10]

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.loglog(bc.values(), dict(degrees).values(), ls='None',marker='o')
plt.xlabel('Betweenness, log')
plt.ylabel('Total degree, log')

In [None]:
G_simple = nx.Graph(G)

In [None]:
clust_coeffs = nx.clustering(G_simple)
bc = nx.betweenness_centrality(G_simple)

In [None]:
plt.loglog(bc.values(), dict(clust_coeffs).values(), ls='None',marker='o')
plt.xlabel('Betweenness, log')
plt.ylabel('Local clustering coefficient, log')

In [None]:
n, bins, _ = plt.hist(bc.values(), bins = 100, histtype = 'step', density=True)
plt.yscale('log')
plt.xscale('log')
plt.xlabel('Betweenness, log')
plt.ylabel('Density')

In [None]:
mids = [(bins[i] + bins[i+1])/2 for i in range(len(bins)-1)]
plt.loglog(mids,n, ls = 'None', marker = 'o')
plt.xlabel('Betweenness, log')
plt.ylabel('Density')

In [None]:
import igraph as ig

In [None]:
edges = []
nodes = []
with open("links.txt", "r") as f:
    for line in f:
        u, v = line.strip().split('\t')
        edges.append((u,v))
        nodes.append(u)
        nodes.append(v)
nodes = list(set(nodes))

In [None]:
graph = ig.Graph(directed=True)
graph.add_vertices(nodes)
graph.add_edges(edges)

In [None]:
start = time.time()
betw = graph.betweenness(directed=True)
end = time.time()
print(f'elapsed time {end - start}')

In [None]:
nodes_names = graph.vs['name']
betw_dict = {nodes_names[i]:betw[i] for i in range(len(nodes_names))}

In [None]:
bc_sorted = sorted(betw_dict.items(), key = lambda x:x[1], reverse=True)

In [None]:
bc_sorted[:10]

## Problem 5
Build the graph consisting of two disconnected Erdos-Renyi graphs on 100 nodes with $p=0.2$. Add three bridges between this disconnected graphs. Which nodes have the highest betweenness in the graph with bridges?

In [None]:
G1 = nx.erdos_renyi_graph(100, p = 0.2)
len(list(nx.connected_components(G1)))

In [None]:
G2 = nx.erdos_renyi_graph(100, p = 0.2)
len(list(nx.connected_components(G2)))

In [None]:
G = nx.Graph()
for u,v in G1.edges:
    G.add_edge(u,v)
for u,v in G2.edges:
    G.add_edge(100+u, 100+v)

In [None]:
list(nx.connected_components(G))

In [None]:
bridges = [(0, 100), (1,101), (2,102)]

In [None]:
for u,v in bridges:
    G.add_edge(u,v)

In [None]:
len(list(nx.connected_components(G)))

In [None]:
bc = nx.betweenness_centrality(G, normalized=False)
bc = sorted(bc.items(), key = lambda x:x[1], reverse=True)
bc[:10]

In [None]:
plt.hist([x[1] for x in bc], bins = 100)