# Problem 1

Analyse the results obtained by utilisation of the functions networkx.transitivity and networkx.average_clustering. 

Implement function calculating local clustering coefficient for the directed network without loops and multiple edges. Generate the sequence of random graphs $G_3^n$ in the Bollobas-Riordan model with $n = 100, 200, ..., 10000$ and plot the plot for global and mean local clustering coefficients in the following two cases:

- graphs are considered not to have directions, loops and multiple edges,

- item graphs are considered not to have loops and multiple edges, but directions are taken into account

Is it true that in these graphs local clustering coefficient by constant times higher than the global one?

Compare the results of utilization of the two functions  networkx.approximation.average_clustering and networkx.average_clustering

In [None]:
import networkx as nx
import matplotlib.pyplot as plt
import pylab
%matplotlib inline
import random


In [None]:
def generateSimpleBollobasRiordanGraph(n):
    G = nx.MultiDiGraph()
    G.add_node(0)
    G.add_edge(0, 0)
    repeated_nodes = [0, 0]
    while G.number_of_nodes() < n:
        new_node = G.number_of_nodes()
        G.add_node(new_node)
        repeated_nodes.append(new_node)
        destination = random.choice(repeated_nodes)
        repeated_nodes.append(destination)
        G.add_edge(new_node, destination)
    return G

def generateBollobasRiordanGraph(n, m):
    G1mn = generateSimpleBollobasRiordanGraph(m * n)
    G = nx.MultiDiGraph()
    for u, v in G1mn.edges():
        u_new, v_new = u // m, v // m
        G.add_edge(u_new, v_new)
    return G

In [None]:
H = nx.DiGraph()
H.add_edge(1, 2)
H.add_edge(1, 0)
H.add_edge(2, 0)
# H.add_edge(0, 2)
print(nx.transitivity(H))
print(nx.average_clustering(H))
print(nx.clustering(H))
# print(nx.triangles(H)) # not defined for directed graphs

Functions networkx.average_clustering and networkx.transitivity are differently defined for directed graphs. Therefore, we should implement average_clustering for the same definition as used for the transitivity case. 

Local clustering coefficient in this case can be defined as follows:
$$
C_v = \frac{|\{(x,y)\in E: x,y \in N_v\}|}{n_v(n_v-1)}
$$

In [None]:
from itertools import permutations

def local_clustering_directed(G, v):
    number_of_edges = 0
    nv = len(list(G.neighbors(v)))
    if nv <= 1:
        return 1
    for (x, y) in permutations(G.neighbors(v), 2):
        if x in G.neighbors(y):
            number_of_edges += 1
    return number_of_edges / nv / (nv - 1)

In [None]:
def average_local_clustering_directed(G):
    res = 0
    for v in G.nodes():
        res += local_clustering_directed(G, v)
    return res / G.number_of_nodes()

In [None]:
average_local_clustering_directed(H)

In [None]:
G = generateBollobasRiordanGraph(10000, 3)

In [None]:
print(nx.transitivity(nx.DiGraph(G)))
print(nx.transitivity(nx.Graph(G)))
# print(nx.transitivity(G))   #not defined for multigraphs
print(nx.average_clustering(nx.Graph(G)))
print(nx.average_clustering(nx.DiGraph(G)))
print(average_local_clustering_directed(nx.DiGraph(G)))

In [None]:
from networkx.algorithms import approximation
print(approximation.average_clustering(nx.Graph(G), trials=100))
print(approximation.average_clustering(nx.Graph(G), trials=1000))
print(approximation.average_clustering(nx.Graph(G), trials=10000))
print(approximation.average_clustering(nx.Graph(G), trials=100000))

In [None]:
di_graph_t = []  # global clustering coefficient (with directions)
di_graph_ac = [] # mean local clustring coefficient (with directions)
t = []           # global clustering coefficient (without directions)
ac = []          # mean local clustring coefficient (without directions)
ns = []          # numbers of nodes
for N in range(100, 10000, 100):
    print(N)
    G = generateBollobasRiordanGraph(N, 3)
    ns.append(N)
    t.append(nx.transitivity(nx.Graph(G)))
    ac.append(nx.average_clustering(nx.Graph(G)))
    di_graph_t.append(nx.transitivity(nx.DiGraph(G)))
    di_graph_ac.append(average_local_clustering_directed(nx.DiGraph(G)))

In [None]:
# undirected case
plt.loglog(ns, t, ls='None',marker='o', color='c')  # global clustering coefficient
plt.loglog(ns, ac, ls='None',marker='o', color='r')  # mean local clustering coefficient
plt.show()

In [None]:
# directed case
plt.loglog(ns, di_graph_t, ls='None',marker='o', color='c')  # global clustering coefficient
plt.loglog(ns, di_graph_ac, ls='None',marker='o', color='r')  # mean local clustering coefficient
plt.show()

# Problem 2

Generate the sequence of random graphs $G_3^n$ in the Bollobas-Riordan model with $n = 100, 200, ..., 10000$. Find the number of triangles in the graphs of this sequence. Compare the results with the estimate from Ryabchenko-Samosvat theorem.

In [None]:
from itertools import permutations
# number of triangles around v
def triangles_in_vertex(G, v):
    res = 0
    for u, w in permutations(G.neighbors(v), 2):
        if len({u, v, w}) == 3:
            res += G.number_of_edges(v, u) * G.number_of_edges(v, w) * G.number_of_edges(u, w)
    return res

# number of triangles in the graphs
def get_triangles_count(G):
    res = 0
    for v in G.nodes():
        res += triangles_in_vertex(G, v)
    return res

In [None]:
import numpy as np
triangles = []
ns = []
m = 3

for n in range(100, 10001, 100):
    print(n)
    G = generateBollobasRiordanGraph(n, m)
    triangles_count = get_triangles_count(G)
    triangles.append(triangles_count)
    ns.append(n)

Theorem (Ryabchenko, Samosvat). 

Let $m \ge 1,\ \ G^n_m$ be the random graph in the Bollobas-Riordan model and $H$ be some fixed graph.
	Then
	\begin{equation*}
		\mathbb{E}\left(\#(H,G^n_m)\right)\asymp n^{\#(d_i = 0)}(\sqrt{n})^{\#(d_i = 1)}(\ln n)^{\#(d_i = 2)},
	\end{equation*} 
	where $\#(d_i=k)$ is the number of nodes with degree $k$ in $H$.

In [None]:
pylab.rcParams['figure.figsize'] = 10, 10

plt.plot(ns, triangles, color='r', ls='None', marker='o', label="#triangles")
plt.plot(ns, [np.log(n)**3 for n in ns], color='g', label="(ln n)^3")
plt.title("#triangles (m = 3)", fontsize=20)
plt.ylabel("#triangles", fontsize=20)
plt.xlabel("n", fontsize=20)
plt.legend(fontsize=20)
plt.show()

# Problem 3

Analyse the dependence between local clustering coefficients averaged over the nodes with the same degree from their degree (in the undirected case) in the Backley-Ostgus model with  $n=1000, m=2, a =0.27$ .


In [None]:
from collections import defaultdict

In [None]:
def generateSimpleBuckleyOsthusGraph(a,n):
    G = nx.MultiDiGraph()
    G.add_node(0)
    G.add_edge(0,0)
    p = [a+1]
    while G.number_of_nodes() < n:
        new_node = G.number_of_nodes()
        G.add_node(new_node)
        p.append(a)
        probabilities = np.array(p)/((a+1.0)*new_node+a)
        destination = np.random.choice(G.nodes(),1,p=probabilities)[0]
        G.add_edge(new_node,destination)
        p[destination] += 1
    return G

In [None]:
def generateBuckleyOsthusGraph(a, n, m):
    G1mn = generateSimpleBuckleyOsthusGraph(a, m*n)
    G = nx.MultiDiGraph()
    for u, v in G1mn.edges():
        u_new, v_new = u // m, v // m
        G.add_edge(u_new, v_new)
    return G

In [None]:
n = 1000
m = 2
a = 0.27

In [None]:
G = generateBuckleyOsthusGraph(a,n,m)
G_simple = nx.Graph(G) 

In [None]:
clust_coeffs = nx.clustering(G_simple)
degrees = nx.degree(G_simple)

In [None]:
print(clust_coeffs)

In [None]:
print(degrees)

In [None]:
plt.loglog([value for _, value in degrees],clust_coeffs.values(),ls='None',marker='o')

In [None]:
unique_degrees = np.unique([value for _, value in degrees])
clust_dict = {}
for deg in unique_degrees:
    nodes =[key for key, _ in filter(lambda x: x[1]==deg, degrees)]
    clust_tmp = [clust_coeffs[key] for key in nodes]
    clust_dict[deg] = np.mean(clust_tmp)

In [None]:
clust_dict

In [None]:
plt.loglog(unique_degrees,clust_dict.values(),ls='None',marker='o')
plt.loglog(unique_degrees, [1/x for x in unique_degrees])