In [None]:
import tensorflow as tf
from tensorflow_probability import distributions as tfd
import tensorflow_probability as tfp
import tensorflow.keras.backend as K
import math as m
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D

import helper

In [None]:
# this is just a visual representation of the meaning of alpha for reference
import matplotlib.gridspec as gridspec
subplots = [plt.subplot(grid_loc) for grid_loc in gridspec.GridSpec(1, 8)]
for i,sp in enumerate(subplots):
    data = helper.generate_data(200,alpha=i*45,sigma_reward=0.001)
    sp.scatter(*data['z'].T,c=data['r'])
    sp.set_aspect('equal')
    sp.axis('off')
    sp.set_title(str(i*45))


In [None]:
# example data
data = helper.generate_data(10,alpha=90,sigma_reward=0.001)
plt.scatter(*data['z'].T,c=data['r'])
plt.gca().set_aspect('equal')
plt.colorbar();

# labels
labels = ['{0}'.format(i) for i in range(data['z'].shape[0])]
for label, x, y in zip(labels, data['z'][:, 0], data['z'][:, 1]):
    plt.annotate(
        label,
        xy=(x, y), xytext=(-20, 20),
        textcoords='offset points', ha='right', va='bottom',
        bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
        arrowprops=dict(arrowstyle = '->', connectionstyle='arc3,rad=0'))

In [None]:
# relevant dimension of data points for alpha = 90
plt.plot(data['z'].T[1],'.-')

In [None]:
def iterative_model_log_llhs(z, r, alpha, sigma_reward):
    llhs = np.array(
    [helper.model_log_llh(z[:t], r[:t], alpha=alpha, sigma_reward=sigma_reward) 
        for t in range(z.shape[0])])
    return llhs

In [None]:
for alpha in np.arange(0,360,45):
    plt.plot(iterative_model_log_llhs(data['z'], data['r'], alpha=alpha, sigma_reward=0.1),'.-')
plt.legend(list(map(str,np.arange(0,360,45))))

In [None]:
for alpha in np.arange(0,360,45):
    plt.plot(iterative_model_log_llhs(data['z'], data['r'], alpha=alpha, sigma_reward=0.5),'.-')
plt.legend(list(map(str,np.arange(0,360,45))))

In [None]:
%%time
helper.model_log_llh(data['z'], data['r'], alpha=alpha, sigma_reward=0.1)

In [None]:
alpha_samples = tfd.Uniform(0,180).sample(1000)
plt.hist(np.array([helper.model_log_llh(data['z'], data['r'], alpha=alpha_sample, sigma_reward=0.5) for alpha_sample in alpha_samples]))

In [None]:
1/1e-100

In [None]:
def model_llh(z, r, alpha, sigma_reward):
    gamma = helper.gamma_from_alpha(alpha)
    prob_per_sample = tfp.distributions.Normal(loc=tf.reduce_sum(tf.multiply(gamma,z),1), scale=sigma_reward).prob(r)
    return tf.reduce_sum(prob_per_sample)

def model_llh2(z, r, alpha, sigma_reward):
    gamma = helper.gamma_from_alpha(alpha)
    prob_per_sample = tf.exp(-0.5 * (tf.reduce_sum(tf.multiply(gamma,z),1) - r)**2 / sigma_reward**2) / (2*np.pi*sigma_reward**2)**0.5
    return tf.reduce_sum(prob_per_sample)

In [None]:
model_llh(data['z'], data['r'], alpha=90, sigma_reward=0.5)

In [None]:
# example data
data = helper.generate_data(30,alpha=45,sigma_reward=0.001)
plt.scatter(*data['z'].T,c=data['r'])
plt.gca().set_aspect('equal')
plt.colorbar();


In [None]:
N=10000

In [None]:
%%time
# 2D model
alpha_samples = tfd.Uniform(0,180).sample(N)
np.mean(np.array([model_llh(data['z'], data['r'], alpha=alpha_sample, sigma_reward=0.1) for alpha_sample in alpha_samples]))

In [None]:
%%time
# 2D model
alpha_samples = tfd.Uniform(0,180).sample(N)
np.mean(np.array([model_llh2(data['z'], data['r'], alpha=alpha_sample, sigma_reward=0.1) for alpha_sample in alpha_samples]))

In [None]:
%%time
# horizontal DB 1D model
alpha_samples = np.repeat(90,N)
np.mean(np.array([model_llh(data['z'], data['r'], alpha=alpha_sample, sigma_reward=0.1) for alpha_sample in alpha_samples]))

In [None]:
%%time
# horizontal DB 1D model
alpha_samples = np.repeat(90,N)
np.mean(np.array([model_llh2(data['z'], data['r'], alpha=alpha_sample, sigma_reward=0.1) for alpha_sample in alpha_samples]))

In [None]:
# vertical DB 1D model
alpha_samples = np.repeat(0,N)
np.mean(np.array([model_llh(data['z'], data['r'], alpha=alpha_sample, sigma_reward=0.1) for alpha_sample in alpha_samples]))

In [None]:
# worst possible 1D model
alpha_samples = np.repeat(225,N)
np.mean(np.array([model_llh(data['z'], data['r'], alpha=alpha_sample, sigma_reward=0.1) for alpha_sample in alpha_samples]))

In [None]:
# ground truth 1D model
alpha_samples = np.repeat(45,N)
np.mean(np.array([model_llh(data['z'], data['r'], alpha=alpha_sample, sigma_reward=0.1) for alpha_sample in alpha_samples]))