In [1]:
import time

import numpy as np

import pylab as plt

cm = plt.cm.get_cmap('PuOr')


def color(c, unique_c):
    return cm(plt.Normalize(np.min(unique_c), np.max(unique_c))(c))



2023-12-12 11:05:53,023	INFO worker.py:1489 -- Connecting to existing Ray cluster at address: 192.168.178.154:6379...
2023-12-12 11:05:53,032	INFO worker.py:1664 -- Connected to Ray cluster. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


0,1
Python version:,3.11.5
Ray version:,2.8.1
Dashboard:,http://127.0.0.1:8265


In [None]:
# Load results into arrays of same names
save_file = "experiment_results.npz"

npzfile = np.load(save_file)
run_time_array = npzfile['run_time_array'].mean(-1)
log_Z_mean_array = npzfile['log_Z_mean_array'].mean(-1)
log_Z_uncert_array = np.sqrt(np.sqrt(npzfile['log_Z_uncert_array']).mean(-1))
num_likelihood_evals_array = npzfile['num_likelihood_evals_array'].mean(-1)
total_num_samples_array = npzfile['total_num_samples_array'].mean(-1)
total_num_phantom_samples_array = npzfile['total_num_phantom_samples_array'].mean(-1)
s_array = npzfile['s_array']
k_array = npzfile['k_array']
c_array = npzfile['c_array']
true_logZ = npzfile['true_logZ']


sample_efficiency_array = (npzfile['total_num_samples_array'] / npzfile['num_likelihood_evals_array']).mean(-1)
run_time_speed_up_array = run_time_array[:, 0:1] / run_time_array
efficiency_improvement_array = sample_efficiency_array / sample_efficiency_array[:, 0:1]


# After a threshold number of slices, the bias is independent of the number of phantom samples
A crucial component of nested sampling is generating i.i.d. uniform samples from the likelihood constrained prior distribution. When using Markov chain likelihood samplers, such as slice sampling, this is accomplished by sequentially drawing samples from an ergodic Markov chain. A well known problem is that if the number of proposals between acceptance is too low the samples with exhibit auto-correlation.

We directly observed this below by looking at bias in the resulting uncertainty estimate as a function of the number of proposal steps between acceptance. Crucially this property is independent of the number of phantom samples, which forms the crux of our discovery. Each point below corresponds to a particular fraction of phantom samples, and number of slices. In general, the bias decreases with increasing number of slices, and increasing phantom fraction. However, after a threshold number of slices, the bias is independent of the  phantom fraction. This is a crucial result, as it means that we can use a smaller number of likelihood evaluations to generate a larger number of i.i.d. samples from the likelihood constrained prior distribution, and thus achieve a high sample efficiency.

In [None]:
# Plot log_Z (with error bars) vs num slices, color coded by phantom fraction
plt.figure()
unique_c = np.unique(k_array / (k_array + 1))
for i, k in enumerate(k_array):
    phantom_fraction = k / (k + 1)
    plt.errorbar(s_array, log_Z_mean_array[:, i], yerr=log_Z_uncert_array[:, i], fmt='o',
                 c=color(phantom_fraction, unique_c),
                 label=f"Phantom Fraction: {phantom_fraction * 100:.0f}%")
plt.xlabel("Num Slices")
plt.ylabel("Bias (nats)")
plt.gca().axhline(true_logZ, color='k', linestyle='--')

# 
# 
# # put a red box around the region of interest (All points with slice factor >= 3)
# # Make the box a little bigger than the y error bars
# 
num_slice_factor_threshold = 4

mask = (s_array >= num_slice_factor_threshold)
x = s_array[mask]
y = log_Z_mean_array[mask]
yerr = log_Z_uncert_array[mask]

lower_left = [0.97 * np.min(x), np.min(y - 1.2 * yerr)]
upper_right = [1.01 * np.max(x), np.max(y + 1.2 * yerr)]
plt.gca().add_patch(plt.Rectangle(lower_left, upper_right[0] - lower_left[0], upper_right[1] - lower_left[1],
                                  fill=False, edgecolor='r', lw=2))

plt.legend(loc='lower right')
# plt.savefig("bias_vs_num_slices.png", dpi=300)
# plt.savefig("bias_vs_num_slices.pdf", dpi=300)
plt.show()

In [None]:
import pylab as plt

num_slice_factor_threshold = 4

# For all runs in consistent region (red box), plot bias vs num likelihood evals, color coded by number of slices
plt.figure()
colors = s_array
unique_c = np.unique(colors)
for j, s in enumerate(s_array):
    plt.errorbar(num_likelihood_evals_array[j, :], log_Z_mean_array[j, :], yerr=log_Z_uncert_array[j, :], fmt='o',
                 c=color(s, unique_c),
                 label=f"Num Slices: {s}")
plt.xlabel("Num Likelihood Evaluations")
plt.ylabel("Bias (nats)")
plt.gca().axhline(true_logZ, color='k', linestyle='--')
plt.legend(loc='lower right')
plt.show()

# Using phantom samples to improve sample efficiency
Using the above result, the sample efficiency can be significantly boosted by using a large enough number of slices, and larger phantom fraction. We can easily see this looking at bias vs run time speed up. The run time speed is defined as the ratio of the run time with no phantom samples to the run time with phantom samples. We see a speed up of almost 4x with a phantom fraction of 0.8, and slice factor >= 3. This is a significant improvement in sample efficiency, and is the key to achieving high sample efficiency with nested sampling in high dimensions.

In [None]:
# So imshow of bias over k and s.

plt.figure()
plt.imshow(log_Z_mean_array - true_logZ, origin='lower',
           extent=[np.min(k_array), np.max(k_array), np.min(s_array), np.max(s_array)],
           aspect='auto', cmap='PuOr')
plt.xlabel("Num Phantom Samples")
plt.ylabel("Num Slices")
plt.colorbar(label="Bias (nats)")
plt.show()


In [None]:
# Plot log_Z with y error bars vs phantom fraction
plt.figure()
unique_c = np.unique(s_array)
phantom_fraction = k_array / (k_array + 1)
for i, s in enumerate(s_array):
    plt.errorbar(phantom_fraction, log_Z_mean_array[i, :], yerr=log_Z_uncert_array[i, :], fmt='o', c=color(s, unique_c),
                 label=f"Num Slices: {s}")
plt.xlabel("Phantom Fraction")
plt.ylabel(r" $\log Z$ (nats)")
plt.gca().axhline(true_logZ, color='k', linestyle='--')
plt.legend(loc='lower right')
plt.show()


In [None]:
# Plot log Z vs sample efficiency, color coded by number of slices
plt.figure()
colors = s_array
unique_c = np.unique(colors)
for j, s in enumerate(s_array):
    plt.errorbar(sample_efficiency_array[j, :], log_Z_mean_array[j, :], yerr=log_Z_uncert_array[j, :], fmt='o',
                 c=color(s, unique_c),
                 label=f"Num Slices: {s}")
plt.xlabel("Sample Efficiency")
plt.ylabel(r" $\log Z$ (nats)")
plt.gca().axhline(true_logZ, color='k', linestyle='--')
plt.legend(loc='lower right')
plt.show()

In [None]:
# Plot log_Z vs run time, color coded by number of phantom samples
plt.figure()
colors = k_array
unique_c = np.unique(colors)
for i, k in enumerate(k_array):
    plt.errorbar(run_time_array[:, i], log_Z_mean_array[:, i], yerr=log_Z_uncert_array[:, i], fmt='o',
                 c=color(k, unique_c),
                 label=f"Num Phantom Samples: {k}")
plt.xlabel("Run Time (s)")
plt.ylabel(r" $\log Z$ (nats)")
plt.gca().axhline(true_logZ, color='k', linestyle='--')
plt.legend(loc='lower right')
plt.show()

In [None]:
# Plot speed up vs phantom fraction, plotting only those with slice factor >= 3

plt.figure()
colors = s_array
unique_c = np.unique(colors)
phantom_fraction = k_array / (k_array + 1)
for j, s in enumerate(s_array):
    plt.errorbar(phantom_fraction, run_time_speed_up_array[j, :], fmt='o', c=color(s, unique_c),
                 label=f"Num Slices: {s}")
plt.xlabel("Phantom Fraction")
plt.ylabel("Run Time Speed Up")
plt.gca().axhline(1, color='k', linestyle='--')
plt.legend(loc='lower right')
plt.show()

# Ablation study, large values of phantom fraction should introduce autocorrelation

We explore the impact of large values of phantom fraction on the resulting bias. We see that for large values of phantom fraction, the bias increases significantly. This is due to the fact that the phantom samples are no longer i.i.d. and thus the resulting log-evidence estimate is biased.

We restrict ourselves to `slice_factor=6` as this is the largest value of slice factor we consider, and thus the bias is independent of the number of slices. We explore `num_phantom` from `{s, 2*s, ..., s * (D-1) - 1, s * D - 1}`.


In [None]:
# Load results into arrays of same names
save_file = "ablation_results.npz"

npzfile = np.load(save_file)
run_time_array = npzfile['run_time_array'].mean(-1)
log_Z_mean_array = npzfile['log_Z_mean_array'].mean(-1)
log_Z_uncert_array = np.sqrt(np.sqrt(npzfile['log_Z_uncert_array']).mean(-1))
num_likelihood_evals_array = npzfile['num_likelihood_evals_array'].mean(-1)
total_num_samples_array = npzfile['total_num_samples_array'].mean(-1)
total_num_phantom_samples_array = npzfile['total_num_phantom_samples_array'].mean(-1)
s_array = npzfile['s_array']
k_array = npzfile['k_array']
c_array = npzfile['c_array']
true_logZ = npzfile['true_logZ']


sample_efficiency_array = (npzfile['total_num_samples_array'] / npzfile['num_likelihood_evals_array']).mean(-1)
run_time_speed_up_array = run_time_array[:, 0:1] / run_time_array
efficiency_improvement_array = sample_efficiency_array / sample_efficiency_array[:, 0:1]


In [None]:
# Plot bias with rms y error bars vs phantom fraction, colored by number of likelihood evals (with color bar not labels)
