In [None]:
import tensorflow as tf
from tensorflow_probability import distributions as tfd
import tensorflow_probability as tfp
import numpy as np
import matplotlib.pyplot as plt

import helper

In [None]:
def compute_log_mllh(z, r, alpha_samples, sigma_reward):
    '''copmutes mllh by integrating over samples from the prior given as arguments. Can emulate 1d model mllh by only supplying a single sample from gamma.'''
    return np.log(np.mean(np.array([helper.model_llh_by_alpha(z, r, alpha=alpha_sample, sigma_reward=sigma_reward, method='np') for alpha_sample in alpha_samples])))

def compute_log_mllhs(z, r, a, sigma_reward, verbose=False):
    '''computes mllhs for a list of models, given a list of gamma samples for each model (a list of lists)'''
    if verbose:
        pbar = tf.keras.utils.Progbar(len(z))
    mllhs = []
    for t in range(len(z)):
        mllhs.append([compute_log_mllh(z[:t+1],r[:t+1],alpha_samples,sigma_reward) for alpha_samples in a])
        if verbose:
            pbar.add(1)
    return mllhs

def index_of_model_change(mllhs, model_id=0, never_result=np.nan):
    '''given a list of mllhs, computes first time index where best model is model_id'''
    ids_of_best_model = np.argmax(np.array(mllhs),1)
    if len(np.nonzero(ids_of_best_model == 0)[0]) == 0:
        id_change = never_result
    else:
        id_change = np.nonzero(ids_of_best_model == 0)[0][0]
    return id_change

In [None]:
data = helper.generate_data(8,alpha=45,sigma_reward=0.001)
helper.plot_data(data, labels=True)

In [None]:
N=10
sigma_reward_model = 0.3
a = [tfd.Uniform(0,180).sample(N),[90],[0]]
labels = ('O','|','--')
    
mllhs = compute_log_mllhs(data['z'],data['r'],a,sigma_reward_model,verbose=True)
    
plt.plot(np.array(mllhs),'.-')
plt.legend(labels)
plt.title('model change at t = '+str(index_of_model_change(mllhs, model_id = 0)))

In [None]:
N=10
sigma_reward_models = [.1,.2,.3,.4,.5]

id_changes_per_sigma = []
for sigma_reward_model in sigma_reward_models:
    pbar = tf.keras.utils.Progbar(100)
    id_changes = []
    for i in range(100):
        data = helper.generate_data(8,alpha=45,sigma_reward=0.001)
        a = [tfd.Uniform(0,180).sample(N),[90],[0]]
        mllhs = compute_log_mllhs(data['z'],data['r'],a,sigma_reward_model)
        id_changes.append(index_of_model_change(mllhs, model_id = 0, never_result=10))
        pbar.add(1)

    id_changes_per_sigma.append(np.array(id_changes))

In [None]:
plt.errorbar(sigma_reward_models, np.nanmean(id_changes_per_sigma,1),yerr=np.nanstd(id_changes_per_sigma,1))
plt.xlabel('sigma reward')
plt.ylabel('t of model change to 2D')
for i in range(len(id_changes_per_sigma)):
    plt.scatter(np.repeat(sigma_reward_models[i],len(id_changes_per_sigma[i]))+np.random.normal(0,0.01,len(id_changes_per_sigma[i])),id_changes_per_sigma[i],c='gray',s=1)