# Growing random graph model

<b> Click on the title and rename the file  with your name! </b>

In this homework you will implement a growing random graph model and test how fast its degree distribution converges.

In [1]:
import numpy as np

In [2]:
def GrowingRandomGraphs(N, gamma, M):
    """This function outputs a graph on N vertices that is generated as follows. It starts with a single vertex 0. 
    Then having a graph G_k on vertices 0,1,...,k-1 we add a new vertex k and for each i in {0,1,..,k-1} connect k and i with 
    probability min(1,((degree_{G_k}(i)+1)/(2e(G_k)+k))^(gamma))."""

    # It starts with a single vertex 0.
    G = Graph()
    G.add_vertices([0])

    edges = []
    #probs = {}
    for k in range(1, N):
        #prob = []
        # degrees of G on k-1 vertices
        d_Gk = dict(zip(G.vertices(), G.degree()))
        # number of edges of G on k-1 vertices
        e_Gk = len(G.edges())
        # draw an edge using the probability function
        for i in G.vertices():
            p = min(1, M*((1+d_Gk.get(i, 0))/(2*e_Gk+k))**gamma)
            if p >= np.random.rand():
                edges.append((k, i, p))
            #prob.append(float(p))
        #probs[k] = prob
        # add vertex k and the created edges
        G.add_vertices([k])
        G.add_edges(edges)
    return G #, probs

$$p = \min (1, M \left(\frac{d_{G_{k}}(i)+1}{2e_{G_k}+k}\right)^{\gamma})$$

Next you have to define the degree distribution and the total variation of the probability distribution. The degree distribution is practically the same as the degree histogram, only you have to divide each element with the number of vertices to get a probability distribution. The total variation of probability distributions P and Q is defined as
$$d_{\mathrm{TV}}(P,Q)=\frac{1}{2}\sum_{i=1}^{\infty}|P(i)-Q(i)|.$$
P and Q are given as lists, if they are not of the same length, then add $0$'s to the shorter list to make them to be of the same length.

In [3]:
def DegreeDistribution(G):
    """It outputs a list P such that P[i] is the probability that a random vertex (chosen uniformly) has degree i."""
    # G.degree_histogram() returns a list, whose i-th entry is the frequency of degree i
    # which we normalise with the number of vertices G.order() in order to get the degree distribution
    return np.array(G.degree_histogram()) / G.order()

In [4]:
def TotalVariation(P,Q):
    """It outputs the total variation of the probability distribution P and Q."""
    # numpy arrays are very convenient for this
    # pad zeros to P if necessary
    pad = max(len(Q)-len(P), 0)
    P = np.pad(np.array(P), (0, pad), 'constant', constant_values=(0, 0))
    # pad zeros to Q if necessary
    pad = max(len(P)-len(Q), 0)
    Q = np.pad(np.array(Q), (0, pad), 'constant', constant_values=(0, 0))    
    # calculate total variation based on formula
    return np.sum(np.abs(P-Q))/2

Later you will also need an induced subgraph to compare two moments of the same graph process. More precisely you will compare the final graph with an intermediate graph. 

In [5]:
def InducedSubgraph(G,n):
    """Given a graph G on N vertices it outputs the graph induced on vertices 0,1,..n-1 for $n< N$."""
    return G.subgraph(range(n))

In [6]:
G1=GrowingRandomGraphs(N=1000, gamma=1, M=2)
G2=GrowingRandomGraphs(N=1000, gamma=1, M=2)
H=InducedSubgraph(G1, n=100)
p1=DegreeDistribution(G1)
p2=DegreeDistribution(G2)
q1=DegreeDistribution(H)
print(f'p1:\n{p1}')
print(f'\np2:\n{p2}')
print(f'\nq1:\n{q1}')
print(f'\nTotalVariation(p1,p2)=\n{TotalVariation(p1,p2)}') #Total variation of the degree distibutions of two different copies.
print(f'\nTotalVariation(p1,q1)=\n{TotalVariation(p1,q1)}')  #Total variation of the degree distrbutions of a graph and a section of it. 

p1:
[0.094 0.182 0.176 0.141 0.123 0.074 0.054 0.035 0.024 0.021 0.011 0.007
 0.009 0.004 0.008 0.009 0.006 0.002 0.001 0.001 0.001 0.    0.003 0.
 0.002 0.003 0.002 0.    0.    0.    0.001 0.001 0.001 0.    0.001 0.
 0.    0.    0.001 0.    0.    0.    0.    0.    0.001 0.    0.    0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.001]

p2:
[0.097 0.173 0.19  0.145 0.112 0.063 0.054 0.035 0.032 0.015 0.011 0.019
 0.015 0.008 0.004 0.003 0.002 0.002 0.002 0.004 0.003 0.002 0.    0.
 0.    0.    0.001 0.001 0.001 0.    0.    0.001 0.    0.    0.    0.
 0.001 0.    0.001 0.002 0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.001]

q1:
[0.13 0.12 0.15 0.1  0.15 0.13 0.04 0.03 0.02 0.05 0.   0.01 0.03 0.01
 0.   0.02 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0

In [7]:
G1=GrowingRandomGraphs(N=1000, gamma=2, M=2)
G2=GrowingRandomGraphs(N=1000, gamma=2, M=2)
H=InducedSubgraph(G1, n=100)
p1=DegreeDistribution(G1)
p2=DegreeDistribution(G2)
q1=DegreeDistribution(H)
print(f'p1:\n{p1}')
print(f'\np2:\n{p2}')
print(f'\nq1:\n{q1}')
print(f'\nTotalVariation(p1,p2)=\n{TotalVariation(p1,p2)}') #Total variation of the degree distibutions of two different copies.
print(f'\nTotalVariation(p1,q1)=\n{TotalVariation(p1,q1)}')  #Total variation of the degree distrbutions of a graph and a section of it. 

p1:
[0.975 0.019 0.005 0.    0.    0.    0.    0.001]

p2:
[0.975 0.018 0.004 0.001 0.    0.001 0.    0.    0.001]

q1:
[0.85 0.11 0.03 0.   0.   0.   0.   0.01]

TotalVariation(p1,p2)=
0.0030000000000000005

TotalVariation(p1,q1)=
0.125


In [8]:
G1=GrowingRandomGraphs(1000,0.5,2)
G2=GrowingRandomGraphs(1000,0.5,2)
H=InducedSubgraph(G1, n=100)
p1=DegreeDistribution(G1)
p2=DegreeDistribution(G2)
q1=DegreeDistribution(H)
print(f'p1:\n{p1}')
print(f'\np2:\n{p2}')
print(f'\nq1:\n{q1}')
print(f'\nTotalVariation(p1,p2)=\n{TotalVariation(p1,p2)}') #Total variation of the degree distibutions of two different copies.
print(f'\nTotalVariation(p1,q1)=\n{TotalVariation(p1,q1)}')  #Total variation of the degree distrbutions of a graph and a section of it. 

p1:
[0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.001 0.    0.    0.001
 0.001 0.001 0.003 0.003 0.003 0.004 0.01  0.015 0.009 0.009 0.007 0.011
 0.021 0.014 0.018 0.021 0.02  0.022 0.025 0.028 0.032 0.022 0.02  0.03
 0.025 0.024 0.025 0.029 0.028 0.014 0.024 0.018 0.021 0.014 0.014 0.019
 0.019 0.014 0.011 0.017 0.018 0.021 0.011 0.014 0.009 0.013 0.006 0.011
 0.007 0.008 0.009 0.008 0.012 0.014 0.008 0.009 0.007 0.006 0.01  0.005
 0.006 0.006 0.004 0.008 0.004 0.005 0.002 0.009 0.006 0.005 0.002 0.005
 0.002 0.002 0.005 0.005 0.006 0.002 0.002 0.002 0.002 0.005 0.001 0.001
 0.002 0.002 0.002 0.    0.    0.001 0.001 0.001 0.001 0.003 0.001 0.003
 0.    0.001 0.    0.001 0.    0.    0.001 0.    0.    0.    0.001 0.
 0.    0.001 0.    0.    0.001 0.002 0.001 0.001 0.001 0.   

Do you have any intuition what happened for the different choices of gamma? Run more experiments to gain more insights.

The denominator grows with $k$, thus the quotient shrinks. The rate of this shrinkage can be affected by setting $M\geq1$ to a higher number or $\gamma\geq1$. However, when $\gamma\leq1$ or ($M$ is big enough) the function will result in a number that is bigger than one, so an edge will be created with one probability at the later stages of the iteration as well. For this reason, when $M$ or $\gamma$ are big, when we look at the graph at an early stage or a later stage, it doesn't really matter, as there will be an edge at later stages with decreasingingly small probabilities. This can be seen when we set $\gamma=2$ compared to the case where $\gamma=1$ ceteris paribus. Then, when we choose $\gamma=0.5$ the total variation for the fully evolved graph versus it's early induced subgraph is almost 1. Also we could experience a significant running time increment in this case.