# 10-02: Stationary States of Random Walks

*January 11 2023*  

In the second practice session we investigate the stationary state of random walks in networks. With this we will lay the foundation for the analytical study of diffusion speed in the upcoming lecture (and in exercise sheet 8).

In [1]:
import pathpy as pp
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

plt.style.use('default')
sns.set_style("whitegrid")

from numpy import linalg as npl
import scipy as sp

In [2]:
def transition_matrix(network, weight=True):
    A = network.adjacency_matrix(weight=weight)
    D = A.sum(axis=1)
    T = sp.sparse.csr_matrix((network.number_of_nodes(), network.number_of_nodes()))
    for i in range(network.number_of_nodes()):
        T[i,:] = A[i,:]/D[i]
    return T.todense()

def visitation_probabilities(network, initial_dist, t):
    T = transition_matrix(network)
    p_t = np.dot(initial_dist, npl.matrix_power(T,t))
    return p_t

def stationary_distribution(network):
    T =  transition_matrix(network)
    eigenvalues, eigenvectors = sp.linalg.eig(T, left=True, right=False)
    # With the following line we take the **leading eigenvector**, i.e. the eigenvector
    # corresponding to the **largest eigenvalue** of one
    # numpy.argsort returns an array of indices that would sort an array
    # note the multi-dimensional slicing :,x, which returns all entries 
    # in column x
    pi = eigenvectors[:,np.argsort(-eigenvalues)][:,0]

    # We make sure that the eigenvector is normalised
    pi = pi/sum(pi)
    return pi

## Periodicity and Existence of Stationary Distribution

In [3]:
n = pp.Network(directed=True)
for i in range(7):
    n.add_edge(str(i), str((i+1)%7))
n.plot(edge_color='gray')

In [4]:
initial_dist = [0]*7
initial_dist[0] = 1

for i in range(20):
    pi = visitation_probabilities(n, initial_dist, i)
    print("visitation probabilities after {0} steps = {1}".format(i, pi))

visitation probabilities after 0 steps = [[1. 0. 0. 0. 0. 0. 0.]]
visitation probabilities after 1 steps = [[0. 1. 0. 0. 0. 0. 0.]]
visitation probabilities after 2 steps = [[0. 0. 1. 0. 0. 0. 0.]]
visitation probabilities after 3 steps = [[0. 0. 0. 1. 0. 0. 0.]]
visitation probabilities after 4 steps = [[0. 0. 0. 0. 1. 0. 0.]]
visitation probabilities after 5 steps = [[0. 0. 0. 0. 0. 1. 0.]]
visitation probabilities after 6 steps = [[0. 0. 0. 0. 0. 0. 1.]]
visitation probabilities after 7 steps = [[1. 0. 0. 0. 0. 0. 0.]]
visitation probabilities after 8 steps = [[0. 1. 0. 0. 0. 0. 0.]]
visitation probabilities after 9 steps = [[0. 0. 1. 0. 0. 0. 0.]]
visitation probabilities after 10 steps = [[0. 0. 0. 1. 0. 0. 0.]]
visitation probabilities after 11 steps = [[0. 0. 0. 0. 1. 0. 0.]]
visitation probabilities after 12 steps = [[0. 0. 0. 0. 0. 1. 0.]]
visitation probabilities after 13 steps = [[0. 0. 0. 0. 0. 0. 1.]]
visitation probabilities after 14 steps = [[1. 0. 0. 0. 0. 0. 0.]]
visit

  self._set_arrayXarray(i, j, x)


If we calculate the eigenvalues and associated eigenvectors for the transition matrix, we find that - different from the example in notebook 10-01 - we get complex eigenvalues and eigenvectors. Moreover, the (real) eigenvalue one is not the largest eigenvalue of the transition matrix (note that we sort the eigenvalues based on their absolute value, i.e. their distance from the origin).

In [5]:
T =  transition_matrix(n)
eigenvalues, eigenvectors = sp.linalg.eig(T, left=True, right=False)
eigenvectors = eigenvectors[:,np.argsort(-eigenvalues)]
print(eigenvalues)
print('---')
print(eigenvectors)

[-0.90096887+0.43388374j -0.90096887-0.43388374j -0.22252093+0.97492791j
 -0.22252093-0.97492791j  1.        +0.j          0.6234898 +0.78183148j
  0.6234898 -0.78183148j]
---
[[ 0.37796447+0.j         -0.08410501-0.36848811j -0.08410501+0.36848811j
   0.23565699+0.29550452j  0.23565699-0.29550452j  0.37796447+0.j
   0.37796447-0.j        ]
 [ 0.37796447+0.j          0.23565699-0.29550452j  0.23565699+0.29550452j
  -0.34053422+0.16399264j -0.34053422-0.16399264j -0.34053422+0.16399264j
  -0.34053422-0.16399264j]
 [ 0.37796447+0.j          0.37796447+0.j          0.37796447-0.j
  -0.08410501-0.36848811j -0.08410501+0.36848811j  0.23565699-0.29550452j
   0.23565699+0.29550452j]
 [ 0.37796447+0.j          0.23565699+0.29550452j  0.23565699-0.29550452j
   0.37796447+0.j          0.37796447-0.j         -0.08410501+0.36848811j
  -0.08410501-0.36848811j]
 [ 0.37796447+0.j         -0.08410501+0.36848811j -0.08410501-0.36848811j
  -0.08410501+0.36848811j -0.08410501-0.36848811j -0.08410501-0.36

What happens if we add a single self-loop, e.g. by connecting node 0 to itself? We still get some complex values but the real eigenvalue one is the largest eigenvalue of the transition matrix.

In [6]:
n.add_edge('0','0')

T =  transition_matrix(n)
eigenvalues, eigenvectors = sp.linalg.eig(T, left=True, right=False)
eigenvectors = eigenvectors[:,np.argsort(-eigenvalues)]
print(eigenvalues)
print('---')
print(eigenvectors)

[ 1.        +0.j          0.64438276+0.69026752j  0.64438276-0.69026752j
 -0.13703613+0.86913433j -0.13703613-0.86913433j -0.75734663+0.38822447j
 -0.75734663-0.38822447j]
---
[[-0.63245553+0.j         -0.55185636+0.j         -0.55185636-0.j
  -0.26345667+0.35944466j -0.26345667-0.35944466j -0.37846194+0.11685575j
  -0.37846194-0.11685575j]
 [-0.31622777+0.j         -0.19939855-0.21359718j -0.19939855+0.21359718j
  -0.17845009-0.17969886j -0.17845009+0.17969886j  0.16654924-0.16252316j
   0.16654924+0.16252316j]
 [-0.31622777+0.j          0.02125181-0.30871052j  0.02125181+0.30871052j
   0.23332829-0.16853054j  0.23332829+0.16853054j -0.08703675+0.2592115j
  -0.08703675-0.2592115j ]
 [-0.31622777+0.j          0.25433168-0.20663748j  0.25433168+0.20663748j
   0.14790133+0.29178012j  0.14790133-0.29178012j -0.04792991-0.31769328j
  -0.04792991+0.31769328j]
 [-0.31622777+0.j          0.34375086+0.04755338j  0.34375086-0.04755338j
  -0.35375014+0.11439518j -0.35375014-0.11439518j  0.220402

If we calculate the visitation probabilities of a random walker, we find that the visitation probabilities converge to a stationary state.

In [7]:
for i in range(20):
    pi = visitation_probabilities(n, initial_dist, i)
    print("visitation probabilities after {0} steps = {1}".format(i, pi))

visitation probabilities after 0 steps = [[1. 0. 0. 0. 0. 0. 0.]]
visitation probabilities after 1 steps = [[0.5 0.5 0.  0.  0.  0.  0. ]]
visitation probabilities after 2 steps = [[0.25 0.25 0.5  0.   0.   0.   0.  ]]
visitation probabilities after 3 steps = [[0.125 0.125 0.25  0.5   0.    0.    0.   ]]
visitation probabilities after 4 steps = [[0.0625 0.0625 0.125  0.25   0.5    0.     0.    ]]
visitation probabilities after 5 steps = [[0.03125 0.03125 0.0625  0.125   0.25    0.5     0.     ]]
visitation probabilities after 6 steps = [[0.015625 0.015625 0.03125  0.0625   0.125    0.25     0.5     ]]
visitation probabilities after 7 steps = [[0.5078125 0.0078125 0.015625  0.03125   0.0625    0.125     0.25     ]]
visitation probabilities after 8 steps = [[0.50390625 0.25390625 0.0078125  0.015625   0.03125    0.0625
  0.125     ]]
visitation probabilities after 9 steps = [[0.37695312 0.25195312 0.25390625 0.0078125  0.015625   0.03125
  0.0625    ]]
visitation probabilities after 10 s

## Uniqueness of the Stationary Distribution

In the lecture we have seen that the uniqueness of the stationary distribution for a random walk in a network is connected to the notion of strong connectivity in the network.

An undirected network that is connected is necessarily strongly connected, so here the stationary distribution is unique, i.e. it does not depend on the initial distribution.

Let us test this in a simple connected, undirected example network with $10$ nodes and $15$ links. We compute the visitation probabilities after $t=10$ and $t=100$ steps for different initial distributions.

Do the visitation probabilities converge to a stationary distribution? How do the visitation probabilities after $100$ steps differ across different initial distributions?

In [9]:
er_net = pp.generators.random_graphs.ER_nm(n = 10, m = 15)
pp.algorithms.components.largest_connected_component(er_net)

n = er_net.number_of_nodes()

for i in range(n):
    print('starting in node {0}'.format(i))
    initial_dist = [0]*n
    initial_dist[i] = 1
    pi = visitation_probabilities(er_net, initial_dist, 10)
    print("visitation probabilities after 10 steps = {0}".format(pi))
    pi = visitation_probabilities(er_net, initial_dist, 500)
    print("visitation probabilities after 500 steps = {0}".format(pi))
    print('------')

starting in node 0
visitation probabilities after 10 steps = [[0.0472863  0.11922356 0.1294533  0.11688773 0.0472863  0.03637687
  0.1447661  0.15123647 0.08660607 0.1208773 ]]
visitation probabilities after 500 steps = [[0.03333333 0.1        0.13333333 0.13333333 0.03333333 0.03333333
  0.13333333 0.13333333 0.1        0.16666667]]
------
starting in node 1
visitation probabilities after 10 steps = [[0.03974119 0.1095474  0.13188616 0.12650302 0.03974119 0.0350869
  0.13917455 0.14221006 0.09376375 0.14234579]]
visitation probabilities after 500 steps = [[0.03333333 0.1        0.13333333 0.13333333 0.03333333 0.03333333
  0.13333333 0.13333333 0.1        0.16666667]]
------
starting in node 2
visitation probabilities after 10 steps = [[0.03236332 0.09891462 0.13677995 0.13065943 0.03236332 0.0349349
  0.13230512 0.13532564 0.09882453 0.16752917]]
visitation probabilities after 500 steps = [[0.03333333 0.1        0.13333333 0.13333333 0.03333333 0.03333333
  0.13333333 0.13333333 0.1 

In [10]:
T =  transition_matrix(er_net)
eigenvalues, eigenvectors = sp.linalg.eig(T, left=True, right=False)
print(eigenvalues)

[ 1.00000000e+00+0.j  6.18821989e-01+0.j  4.60961892e-01+0.j
 -7.90317692e-01+0.j -6.53933470e-01+0.j -2.90973576e-01+0.j
 -4.79583879e-01+0.j  1.35024737e-01+0.j  1.67104475e-18+0.j
 -1.60170306e-17+0.j]


Here we find that the stationary distribution is unique, i.e. if the visitation probabilities converge to a stationary state, this stationary state does not depend on the initial state (e.g. where we started the random walk). We also say that, due to the Markov property, the underlying Markov chain forgets whether it started.

Clearly, if we have a network with two connected components, the stationary distribution depends on the initial distribution, since the random walker cannot escape the component that it started in:

In [11]:
er_1 = pp.generators.random_graphs.ER_nm(n = 5, m = 7, node_uids=[str(i) for i in range(5)])
er_2 = pp.generators.random_graphs.ER_nm(n = 5, m = 8, node_uids=[str(i) for i in range(5, 10)])
er_net = er_1 + er_2
pp.plot(er_net, edge_color='gray')
n = 10

for i in range(n):
    print('starting in node {0}'.format(i))
    initial_dist = [0]*n
    initial_dist[i] = 1
    pi = visitation_probabilities(er_net, initial_dist, 10)
    print("visitation probabilities after 10 steps = {0}".format(pi))
    pi = visitation_probabilities(er_net, initial_dist, 500)
    print("visitation probabilities after 500 steps = {0}".format(pi))
    print('------')

starting in node 0
visitation probabilities after 10 steps = [[0.14416899 0.28522601 0.14203358 0.21622981 0.21234162 0.
  0.         0.         0.         0.        ]]
visitation probabilities after 500 steps = [[0.14285714 0.28571429 0.14285714 0.21428571 0.21428571 0.
  0.         0.         0.         0.        ]]
------
starting in node 1
visitation probabilities after 10 steps = [[0.142613   0.28620257 0.142613   0.21428571 0.21428571 0.
  0.         0.         0.         0.        ]]
visitation probabilities after 500 steps = [[0.14285714 0.28571429 0.14285714 0.21428571 0.21428571 0.
  0.         0.         0.         0.        ]]
------
starting in node 2
visitation probabilities after 10 steps = [[0.14203358 0.28522601 0.14416899 0.21234162 0.21622981 0.
  0.         0.         0.         0.        ]]
visitation probabilities after 500 steps = [[0.14285714 0.28571429 0.14285714 0.21428571 0.21428571 0.
  0.         0.         0.         0.        ]]
------
starting in node 3


The fact that there are two different stationary distributions, that depend on the initial distribution, is reflected by the fact that there are two ones in the eigenvalue sequence.

In [12]:
T =  transition_matrix(er_net)
eigenvalues, eigenvectors = sp.linalg.eig(T, left=True, right=False)
print(eigenvalues)

[ 1.        +0.j  0.27429189+0.j -0.16666667+0.j -0.5       +0.j
 -0.60762522+0.j  1.        +0.j -0.56433349+0.j -0.25      +0.j
  0.14766682+0.j -0.33333333+0.j]


For directed networks that are weakly but not strongly connected we may find the same behaviour, i.e. there can be cases where no unique stationary distribution exists.

In [16]:
er_1 = pp.generators.random_graphs.ER_nm(n = 5, m = 10, directed=True, node_uids=[str(i) for i in range(5)])
er_2 = pp.generators.random_graphs.ER_nm(n = 5, m = 10, directed=True, node_uids=[str(i) for i in range(5, 10)])
er_3 = pp.generators.random_graphs.ER_nm(n = 5, m = 10, directed=True, node_uids=[str(i) for i in range(10, 15)])
er_net = er_1 + er_2 + er_3
er_net.add_edge('4', '5')
er_net.add_edge('4', '10')
pp.plot(er_net, edge_color='gray')
n = 15

for i in range(n):
    print('starting in node {0}'.format(i))
    initial_dist = [0]*n
    initial_dist[i] = 1
    pi = visitation_probabilities(er_net, initial_dist, 10)
    print("visitation probabilities after 10 steps = {0}".format(pi))
    pi = visitation_probabilities(er_net, initial_dist, 500)
    print("visitation probabilities after 500 steps = {0}".format(pi))
    print('------')

starting in node 0
visitation probabilities after 10 steps = [[nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan]]
visitation probabilities after 500 steps = [[nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan]]
------
starting in node 1
visitation probabilities after 10 steps = [[nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan]]
visitation probabilities after 500 steps = [[nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan]]
------
starting in node 2
visitation probabilities after 10 steps = [[nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan]]
visitation probabilities after 500 steps = [[nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan]]
------
starting in node 3
visitation probabilities after 10 steps = [[nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan]]
visitation probabilities after 500 steps = [[nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan]]
------
starting in node 4
visitation probabilities 

In [17]:
T =  transition_matrix(er_net)
eigenvalues, eigenvectors = sp.linalg.eig(T, left=True, right=False)
print(eigenvalues)

ValueError: array must not contain infs or NaNs