In [None]:
import tensorflow as tf
from tensorflow_probability import distributions as tfd
import tensorflow_probability as tfp
import numpy as np
import matplotlib.pyplot as plt

import helper

# Marginal likelihoods on diagonal DB
We generate synthetic datasets with a diagonal decision boundary ($\alpha=45^{\circ}$), and test the mllh of the 2D model against the horizontal and vertical 1D models.

In [None]:
T = 20
data = helper.generate_data(T,alpha=45,sigma_reward=0.001)
helper.plot_data(data, labels=True)

In [None]:
sigma_reward_model = 0.5
alpha_prior = tfd.Uniform(0,360)
N_prior_samples = 100

a = [alpha_prior.sample(N_prior_samples),[90],[0]]
labels = ('O','|','--')
    
mllhs = helper.compute_log_mllhs(data['z'],data['r'],a,sigma_reward_model,verbose=True)
    
plt.plot(np.array(mllhs),'.-')
plt.legend(labels)
plt.title('model change at t = '+str(helper.index_of_model_change(mllhs, model_id = 0)))

plt.xlabel('t')
plt.ylabel('log p(D)')

plt.savefig('t model change.pdf')

## Variance of mllh estimate

How variable is the mllh estimate as a function of prior samples?

In [None]:
N_prior_samples = 400
N_simulations = 10

labels = ('O','|','--')

mllhs = []
for i in range(N_simulations):
    a = [alpha_prior.sample(N_prior_samples),[90],[0]]
    mllhs.append(helper.compute_log_mllhs(data['z'],data['r'],a,sigma_reward_model,verbose=True))


In [None]:
for i_model in range(3):
    plt.errorbar(np.arange(T),np.mean(np.array(mllhs),0).T[i_model],yerr=np.std(np.array(mllhs),0).T[i_model],linewidth = .5)
plt.legend(labels)
[plt.plot(np.array(mllhs)[i_sim].T[0],c='lightblue',linewidth=.5) for i_sim in range(N_simulations)];
plt.xlabel('t')
plt.ylabel('log p(D)')
plt.title('N prior samples = '+str(N_prior_samples))
plt.savefig('mllh variance.pdf')

In [None]:
t_list = [1,10,20,30,40]
N_prior_samples_list = [50,100,200,400]
N_simulations = 30

T = 40
data = helper.generate_data(T,alpha=45,sigma_reward=0.001)

mllhs_nsample = []
for N_prior_samples in N_prior_samples_list:
    mllhs = []
    pbar = tf.keras.utils.Progbar(N_simulations) 
    for i in range(N_simulations):
        mllhs_t = []
        for t in t_list:
            mllhs_t.append(helper.compute_log_mllh(data['z'][:t],data['r'][:t],alpha_prior.sample(N_prior_samples),sigma_reward_model))
        mllhs.append(mllhs_t)
        pbar.add(1)
    mllhs_nsample.append(mllhs)

In [None]:
import matplotlib.ticker as ticker
plt.imshow(np.array([np.std(mllhs_nsample[i],0) for i in range(len(mllhs_nsample))]))
plt.colorbar()
plt.xlabel('t')
plt.ylabel('N prior samples')
plt.gca().xaxis.set_major_locator(ticker.FixedLocator(range(len(t_list))))
plt.gca().set_xticklabels(t_list)
plt.gca().yaxis.set_major_locator(ticker.FixedLocator(range(len(N_prior_samples_list))))
plt.gca().set_yticklabels(N_prior_samples_list)

In [None]:
[plt.plot(np.std(mllhs_nsample[i],0)) for i in range(len(N_prior_samples_list))]
plt.gca().xaxis.set_major_locator(ticker.FixedLocator(range(len(t_list))))
plt.gca().set_xticklabels(t_list);
plt.legend(N_prior_samples_list)
plt.xlabel('t')
plt.ylabel('log mllh estimator standard deviation')
plt.savefig('estimator std t.pdf')

In [None]:
sigma_reward_model_list = [0.1,0.3,0.5,0.7]
N_prior_samples_list = [50,100,200,400]
N_simulations = 30

T = 20
data = helper.generate_data(T,alpha=45,sigma_reward=0.001)

mllhs_nsample = []
for N_prior_samples in N_prior_samples_list:
    mllhs = []
    pbar = tf.keras.utils.Progbar(N_simulations) 
    for i in range(N_simulations):
        mllhs_t = []
        for sigma_reward_model in sigma_reward_model_list:
            mllhs_t.append(helper.compute_log_mllh(data['z'][:T],data['r'][:T],alpha_prior.sample(N_prior_samples),sigma_reward_model))
        mllhs.append(mllhs_t)
        pbar.add(1)
    mllhs_nsample.append(mllhs)

In [None]:
[plt.plot(np.std(mllhs_nsample[i],0)) for i in range(len(N_prior_samples_list))]
plt.gca().xaxis.set_major_locator(ticker.FixedLocator(range(len(sigma_reward_model_list))))
plt.gca().set_xticklabels(sigma_reward_model_list);
plt.legend(N_prior_samples_list)
plt.xlabel('sigma reward of model')
plt.ylabel('log mllh estimator standard deviation')
plt.savefig('estimator std sigma.pdf')

## t of first model change
We want to see how quickly an ideal learner would realise that it needs to use a 2D decision boundary as a function of $\sigma_r$. We generate a 100 different datasets for each $\sigma_r$.

In [None]:
N_prior_samples = 30
T = 30
sigma_reward_models = [.1,.3,.5]

id_changes_per_sigma = []
for sigma_reward_model in sigma_reward_models:
    pbar = tf.keras.utils.Progbar(100)
    id_changes = []
    for i in range(100):
        data = helper.generate_data(T,alpha=45,sigma_reward=0.001)
        a = [tfd.Uniform(0,360).sample(N_prior_samples),[90],[0]]
        mllhs = helper.compute_log_mllhs(data['z'],data['r'],a,sigma_reward_model)
        id_changes.append(helper.index_of_model_change(mllhs, model_id = 0, never_result=32))
        pbar.add(1)

    id_changes_per_sigma.append(np.array(id_changes))

In [None]:
plt.errorbar(sigma_reward_models, np.nanmean(id_changes_per_sigma,1),yerr=np.nanstd(id_changes_per_sigma,1))
plt.xlabel('sigma reward')
plt.ylabel('t of model change to 2D')
for i in range(len(id_changes_per_sigma)):
    plt.scatter(np.repeat(sigma_reward_models[i],len(id_changes_per_sigma[i]))+np.random.normal(0,0.01,len(id_changes_per_sigma[i])),id_changes_per_sigma[i],c='gray',s=1)
plt.savefig('t model change.pdf')