# Random walks in 1 d

In [None]:
# load useful modules
import numpy as np
import matplotlib.pyplot as plt
import random
%matplotlib inline
%config  InlineBackend.figure_format="svg"

## Pure Python version

In [None]:
def sample(N, p):
    """
        Function that generates a sample of a random walk
        Parameters: N = length of the walk
                    p = probability of right step
                    
        Returns :   Path of the random walk
    """
    
    pos = [0 for i in xrange(N+1)]
    for i in xrange(1, N+1):
        if random.random() < p:   # pure python
            step = 1
        else:
            step = -1
        pos[i] = pos[i-1] + step

    return pos
        

In [None]:
#useful
help(sample)

In [None]:
def averages(N, iterations, p):
    """"
        Function that computes averages over many iterations of the random walk
        Parameters: N = length of the walk
                    iterations = Number of iterations to average
                    p = probability of right step
        
        Returns :   mean = mean positon as a function of time
                    var = variance of position as a function of time
    """""

    aver = [0.0 for i in xrange(N+1)]
    aver2 = [0.0 for i in xrange(N+1)]
    
    for i in xrange(iterations):
        pos = sample(N, p)
        for j in xrange(0, N+1):
            aver[j] += pos[j]
            aver2[j] += (pos[j]*pos[j])
 
    mean = [x/float(iterations) for x in aver]
    var = [aver2[i]/float(iterations) - mean[i]*mean[i] for i in xrange(0, N+1)]
    
    return mean, var

In [None]:
def main(N, iterations, p):
    """"
        Main function: Computes average position and variance as a function of time, and the theoretical prediction
        Parameters: N = length of the walk
                    iterations = Number of iterations to average
                    p = probability of right step
        Retunrs: time, mean, mean theoretical, variance, vartheo
    """""
    
    q = 1.0 -p
    
    mean, var = averages(N, iterations, p)
    
    # auxiliar numpy array
    t = np.arange(0, N+1)
    
    # theoretical values
    mean_theo = (p - q)*t
    var_theo = 4.0*p*q*t
    
    return t, mean, mean_theo, var, var_theo
  

In [None]:
def plot_walk(skip, *arg):
    """""
        Plots results:
        Trick: Variable number of arguments, from expanding a tuple
        Parameters:  skip = number of points in real data to skip (clearer figure)
        Assumes arguments in the order t, mean, mean_theo, var, var_theo
    """""
    t, mean, mean_theo, var, var_theo = arg
    plt.figure(figsize=(8,4))
    
    plt.subplot(1,2,1)
    plt.title("Average position")
    plt.plot(t[::skip], mean[::skip], "or", t, mean_theo, "k")
    plt.xlabel("Time")
    plt.ylabel("Average")
    
    plt.subplot(1,2,2)
    plt.title("Variance")
    plt.plot(t[::skip], var[::skip], "or", t, var_theo, "k")
    plt.xlabel("Time")
    plt.ylabel("Variance")

In [None]:
# Run simulations
t, m, mt, v, vt = main(10000, 100, 0.55)

In [None]:
# plot skipping 1
skip = 1
plot_walk(skip, t, m, mt, v, vt)

In [None]:
# plot skipping 500
skip = 500
plot_walk(skip, t, m, mt, v, vt)

In [None]:
# It is quite noisy for the variance. Increase the number of iterations
t, m, mt, v, vt = main(10000, 500, 0.55)

In [None]:
# plot skipping 500
skip = 500
plot_walk(skip, t, m, mt, v, vt)

Much better. The algorithm works. Now let us bechmarck it using the simple `%timeit`

In [None]:
%timeit main(10000, 500, 0.55)

Cuold be ok, but is still of the order of seconds.

Let us try to improve it, first with a simple numpy version

## Numpy version

In [None]:
def sample_np(N, p):
    """"
        Function that generates a sample of a random walk
        Parameters: N = length of the walk
                    p = probability of right step
                    
        Returns :   Path of the random walk
    """
    # create an array of steps using some numpy trick
    steps = 2*(np.random.rand(N+1)<p) -1
    # initialize first step
    steps[0] = 0
    
    # compute the path as the cumulative sume of the steps
    pos = steps.cumsum()

    return pos

In [None]:
def averages_np(N, iterations, p):
    """"
        Function that computes averages over many iterations of the random walk
        Parameters: N = length of the walk
                    iterations = Number of iterations to average
                    p = probability of right step
        
        Returns :   mean = mean positon as a function of time
                    var = variance of position as a function of time
    """""

    aver = np.zeros(N+1)
    aver2 = np.zeros(N+1)
    
    for i in xrange(iterations):
        pos = sample_np(N, p)
        aver += pos
        aver2 += (pos*pos)
    
    mean = aver/float(iterations)
    var = aver2/float(iterations) - mean*mean
    
    return mean, var

In [None]:
def main_np(N, iterations, p):
    """"
        Main function: Computes average position and variance as a function of time, and the theoretical prediction
        Parameters: N = length of the walk
                    iterations = Number of iterations to average
                    p = probability of right step
        Retunrs: time, mean, mean theoretical, variance, vartheo
    """""
    
    q = 1.0 -p
    
    mean, var = averages_np(N, iterations, p)
    
    # auxiliar numpy array
    t = np.arange(0, N+1)
    
    # theoretical values
    mean_theo = (p - q)*t
    var_theo = 4.0*p*q*t
    
    return t, mean, mean_theo, var, var_theo

In [None]:
# Run simulations
t, m, mt, v, vt = main_np(10000, 1000, 0.55)

In [None]:
# plot skipping 500
skip = 500
plot_walk(skip, t, m, mt, v, vt)

Seems ok. Let us benchmark it

In [None]:
%timeit main_np(10000, 1000, 0.55)

Notice how it is much faster: I used 1000 iterations here!!

## Profiling the pure python code

### `cProfile`

In [None]:
%%prun -s cumulative -q -l 10 -T Prof1

main(10000, 500, 0.55)

In [None]:
!more Prof1

In [None]:
%%prun -s cumulative -q -l 10 -T Prof1

main(50000, 500, 0.55)

In [None]:
!more Prof1

### `line_profiler`

Remember: you must install line profiler using

    pip install line_profiler

In [None]:
%load_ext line_profiler

In [None]:
%lprun -T lprof0 -f main main(10000, 1000, 0.55)

`averages` is costly: look inside

In [None]:
%lprun -T lprof0 -f averages main(10000, 1000, 0.55)

The function `sample` appears to be very costly. We should try to improve it

## Simple numba optimization

In [None]:
# import jit form numba
from numba import jit

In [None]:
@jit
def sample_nb(N, p):
    """"
        Function that generates a sample of a random walk
        Parameters: N = length of the walk
                    p = probability of right step
                    
        Returns :   Path of the random walk
    """
    
    pos = [0 for i in xrange(N+1)]
    for i in xrange(1, N+1):
        if random.random() < p:   # pure python
            step = 1
        else:
            step = -1
        pos[i] = pos[i-1] + step

    return pos
        
    
@jit
def averages_nb(N, iterations, p):
    """"
        Function that computes averages over many iterations of the random walk
        Parameters: N = length of the walk
                    iterations = Number of iterations to average
                    p = probability of right step
        
        Returns :   mean = mean positon as a function of time
                    var = variance of position as a function of time
    """""

    aver = [0.0 for i in xrange(N+1)]
    aver2 = [0.0 for i in xrange(N+1)]
    
    for i in xrange(iterations):
        pos = sample_nb(N, p)
        for j in xrange(0, N+1):
            aver[j] += pos[j]
            aver2[j] += (pos[j]*pos[j])
 
    mean = [x/float(iterations) for x in aver]
    var = [aver2[i]/float(iterations) - mean[i]*mean[i] for i in xrange(0, N+1)]
    
    return mean, var

@jit
def main_nb(N, iterations, p):
    """"
        Main function: Computes average position and variance as a function of time, and the theoretical prediction
        Parameters: N = length of the walk
                    iterations = Number of iterations to average
                    p = probability of right step
        Retunrs: time, mean, mean theoretical, variance, vartheo
    """""
    
    q = 1.0 -p
    
    mean, var = averages_nb(N, iterations, p)
    
    # auxiliar numpy array
    t = np.arange(0, N+1)
    
    # theoretical values
    mean_theo = (p - q)*t
    var_theo = 4.0*p*q*t
    
    return t, mean, mean_theo, var, var_theo
  

In [None]:
sample_nb(10000, 0.55)