In [1]:
import arviz
import numpy as np
import numba
import matplotlib.pyplot as plt
from scipy.spatial.distance import hamming
from time import time

#Import the helper functions and all the samplers
from BasicFunctions import *
from TabuSampler import tabu_sampler
from ZanellaSampler import zanella_sampler
from RandomWalkSampler import rw_sampler
from PointwiseSampler import pointwise_sampler 

#Autocorrelation function as implemented by Power and Goldman
@numba.njit()
def autocorr(x, lags):
    mean=np.mean(x)
    var=np.var(x)
    xp=x-mean
    corr=[1. if l==0 else np.sum(xp[l:]*xp[:-l])/len(x)/var for l in lags]
    return np.array(corr)

In [2]:
#Defining the Barker balancing functions
g = lambda t: t/(1+t)

In [3]:
## First plot ESS of all samplers compared for different beta values

#Some global parameters used
lmbd = 50
l = 15 #The number of categories for each record
p_cat = np.array([0.05, 0.15, 0.4, 0.2, 0.2]) #The pval vector for each category

#Parameters for the simulation
num_beta = 5
num_avg = 5
beta_array = np.array([0.01, 0.05, 0.10, 0.20, 0.30])
ESS_time_array = np.zeros((4,5))

#Simulation parameters
T_z = 30
thin_rate_z = 0.015
T_t = 60
thin_rate_t = 0.03
N = 2000


for b in np.arange(0, num_beta):
    #The chosen beta value
    beta = beta_array[b]
    
    #The placeholder array for each sampler
    results_iter = np.zeros((4,5))
    
    for iter in np.arange(0, num_avg):
        #Creating the sampling arrays
        p_match, x, y, M_truth, M_reverse_truth = create_databases(lmbd, l, p_cat, beta)
        N_1 = len(x)
        N_2 = len(y)
        num_gens = N_1*N_2

        #The starting state
        M_initial, M_reverse_initial = random_state(N_1, N_2)
        
        #Running each of the simulators and save the results
        #Random walk
        trace, energy, hamming, num_acc, runtime = rw_sampler(N, N_1, N_2, lmbd, p_match, M_truth, M_initial, M_reverse_initial,
                                                              p_cat, l, beta, x, y, print_rate=10)
        
        ESS_runtime = arviz.ess(hamming)/runtime
        results_iter[0, iter] = ESS_runtime
        
        #Pointwise sampler
        trace, energy, hamming, num_acc, runtime = pointwise_sampler(N, N_1, N_2, lmbd, p_match, g, M_truth, M_initial, 
                                                                     M_reverse_initial, p_cat, l, beta, x, y, print_rate=10)
        
        ESS_runtime = arviz.ess(hamming)/runtime
        results_iter[1, iter] = ESS_runtime
        
        #Zanella sampler
        trace, energy, hamming, num_iter, runtime = zanella_sampler(N_1, N_2, num_gens, M_initial, M_reverse_initial, g, T_z, M_truth, thin_rate_z, 10, lmbd, 
                                                                    p_match, l, p_cat, beta, x, y)
        
        
        ESS_runtime = arviz.ess(hamming)/runtime
        results_iter[2, iter] = ESS_runtime
        
        #Tabu sampler
        trace, energy, hamming, alpha, num_iter, runtime = tabu_sampler(N_1, N_2, num_gens, M_initial, M_reverse_initial, g, T_t, M_truth, thin_rate_t,
                                                                        10, lmbd, p_match, l, p_cat, beta, x, y)
        
        ESS_runtime = arviz.ess(hamming)/runtime
        results_iter[3, iter] = ESS_runtime
        
    #Taking the average over the 5 runs and put that into the result array
    ESS_time_array[0, b] = np.mean(results_iter[0,:])
    ESS_time_array[1, b] = np.mean(results_iter[1,:])
    ESS_time_array[2, b] = np.mean(results_iter[2,:])
    ESS_time_array[3, b] = np.mean(results_iter[3,:])
    
#Printing the final result
print("---------------------------------------------------")
print("The final array:")
print(ESS_time_array)
print("---------------------------------------------------")
        
        
        




Percent: [--------------------------------------->] 100%
Acceptance ratio:  0.073
Runtime:  7.18
Percent: [--------------------------------------->] 100%
Acceptance ratio:  0.9995
Runtime:  70.82
Runtime:  6.51
Average excursion length:  1.0
Runtime:  6.18
Percent: [--------------------------------------->] 100%
Acceptance ratio:  0.038
Runtime:  5.62
Percent: [--------------------------------------->] 100%
Acceptance ratio:  0.5015
Runtime:  60.49
Runtime:  8.0
Average excursion length:  1.0
Runtime:  8.03
Percent: [--------------------------------------->] 100%
Acceptance ratio:  0.04
Runtime:  5.2
Percent: [--------------------------------------->] 100%
Acceptance ratio:  1.0
Runtime:  77.92
Runtime:  10.68
Percent: [--------------------------------------> ] 97%Average excursion length:  1.0
Runtime:  10.34
Percent: [--------------------------------------->] 100%
Acceptance ratio:  0.032
Runtime:  6.26
Percent: [--------------------------------------->] 100%
Acceptance ratio:  0.999