In [None]:
import tensorflow as tf
from tensorflow_probability import distributions as tfd
import tensorflow_probability as tfp
import tensorflow.keras.backend as K
import math as m
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D

import helper

In [None]:
# this is just a visual reference for the meaning of alpha
import matplotlib.gridspec as gridspec
subplots = [plt.subplot(grid_loc) for grid_loc in gridspec.GridSpec(1, 8)]
for i,sp in enumerate(subplots):
    data = helper.generate_data(200,alpha=i*45,sigma_reward=0.001)
    sp.scatter(*data['z'].T,c=data['r'])
    sp.set_aspect('equal')
    sp.axis('off')
    sp.set_title(str(i*45))

In [None]:
# example data
data = helper.generate_data(20,alpha=90,sigma_reward=0.001)
helper.plot_data(data, labels=True)

In [None]:
# relevant dimension of data points for alpha = 90
plt.plot(data['z'].T[1],'.-')

In [None]:
def iterative_model_log_llhs(z, r, alpha, sigma_reward):
    llhs = np.array(
    [helper.model_log_llh(z[:t], r[:t], alpha=alpha, sigma_reward=sigma_reward) 
        for t in range(z.shape[0])])
    return llhs

In [None]:
for alpha in np.arange(0,360,45):
    plt.plot(iterative_model_log_llhs(data['z'], data['r'], alpha=alpha, sigma_reward=0.1),'.-')
plt.legend(list(map(str,np.arange(0,360,45))))

In [None]:
for alpha in np.arange(0,360,45):
    plt.plot(iterative_model_log_llhs(data['z'], data['r'], alpha=alpha, sigma_reward=0.5),'.-')
plt.legend(list(map(str,np.arange(0,360,45))))

## Underflow problem

In [None]:
%%time
helper.model_log_llh(data['z'], data['r'], alpha=alpha, sigma_reward=0.1)

In [None]:
alpha_samples = tfd.Uniform(0,180).sample(1000)
plt.hist(np.array([helper.model_log_llh(data['z'], data['r'], alpha=alpha_sample, sigma_reward=0.5) for alpha_sample in alpha_samples]))

In [None]:
1/1e-100

In [None]:
helper.model_llh_by_alpha(data['z'], data['r'], alpha=90, sigma_reward=0.3)

In [None]:
def llh_under(z, r, alpha, sigma_reward, method='np'):
    gamma = helper.gamma_from_alpha(alpha)
    if method == 'tf':
        prob_per_sample = tfp.distributions.Normal(loc=tf.reduce_sum(tf.multiply(gamma,z),1), scale=sigma_reward).prob(r)
    elif method == 'np':
        prob_per_sample = tf.exp(-0.5 * 
                (tf.reduce_sum(tf.multiply(gamma,z),1) - r)**2 / sigma_reward**2) / (2*np.pi*sigma_reward**2)**0.1
    return prob_per_sample

In [None]:
plt.hist(np.array(llh_under(data['z'], data['r'], alpha=220, sigma_reward=.9, method='np')))

In [None]:
np.prod(np.array(llh_under(data['z'], data['r'], alpha=220, sigma_reward=.5, method='np')))