In [None]:
import tensorflow as tf
from tensorflow_probability import distributions as tfd
import tensorflow_probability as tfp
import numpy as np
import matplotlib.pyplot as plt

import helper

# Marginal likelihoods on diagonal DB
We generate synthetic datasets with a diagonal decision boundary ($\alpha=45^{\circ}$), and test the mllh of the 2D model against the horizontal and vertical 1D models.

In [None]:
T = 8
data = helper.generate_data(T,alpha=45,sigma_reward=0.001)
helper.plot_data(data, labels=True)

In [None]:
sigma_reward_model = 0.3
alpha_prior = tfd.Uniform(0,360)
N = 100

a = [alpha_prior.sample(N),[90],[0]]
labels = ('O','|','--')
    
mllhs = helper.compute_log_mllhs(data['z'],data['r'],a,sigma_reward_model,verbose=True)
    
plt.plot(np.array(mllhs),'.-')
plt.legend(labels)
plt.title('model change at t = '+str(helper.index_of_model_change(mllhs, model_id = 0)))

plt.xlabel('t')
plt.ylabel('log p(D)')

plt.savefig('t model change.pdf')

## Variance of mllh estimate

How variable is the mllh estimate as a function of prior samples?

In [None]:
sigma_reward_model = 0.3
alpha_prior = tfd.Uniform(0,360)
N_prior_samples = 100
N_simulations = 5

a = [alpha_prior.sample(N),[90],[0]]
labels = ('O','|','--')

mllhs = []
for i in range(N_simulations):
    a = [alpha_prior.sample(N_prior_samples),[90],[0]]
    mllhs.append(helper.compute_log_mllhs(data['z'],data['r'],a,sigma_reward_model,verbose=True))


In [None]:
for i_model in range(3):
    plt.errorbar(np.arange(T),np.mean(np.array(mllhs),0).T[i_model],yerr=np.std(np.array(mllhs),0).T[i_model])
[plt.plot(np.array(mllhs)[i_sim].T[0],c='lightblue') for i_sim in range(N_simulations)];

## t of first model change
We want to see how quickly an ideal learner would realise that it needs to use a 2D decision boundary as a function of $\sigma_r$. We generate a 100 different datasets for each $\sigma_r$.

In [None]:
N_prior_samples = 30
T = 30
sigma_reward_models = [.1,.3,.5,.7]

id_changes_per_sigma = []
for sigma_reward_model in sigma_reward_models:
    pbar = tf.keras.utils.Progbar(100)
    id_changes = []
    for i in range(100):
        data = helper.generate_data(T,alpha=45,sigma_reward=0.001)
        a = [tfd.Uniform(0,360).sample(N_prior_samples),[90],[0]]
        mllhs = helper.compute_log_mllhs(data['z'],data['r'],a,sigma_reward_model)
        id_changes.append(helper.index_of_model_change(mllhs, model_id = 0, never_result=32))
        pbar.add(1)

    id_changes_per_sigma.append(np.array(id_changes))

In [None]:
plt.errorbar(sigma_reward_models, np.nanmean(id_changes_per_sigma,1),yerr=np.nanstd(id_changes_per_sigma,1))
plt.xlabel('sigma reward')
plt.ylabel('t of model change to 2D')
for i in range(len(id_changes_per_sigma)):
    plt.scatter(np.repeat(sigma_reward_models[i],len(id_changes_per_sigma[i]))+np.random.normal(0,0.01,len(id_changes_per_sigma[i])),id_changes_per_sigma[i],c='gray',s=1)
plt.savefig('t model change.pdf')