In [147]:
import numpy as np
from typing import List, Tuple
import seaborn as sns
sns.set_style('darkgrid')
import scipy.stats as sts
import numba

In [148]:
from env import AssortmentEnvironment
from base_agents import RandomAgent
from run_utils import run_episode

In [149]:
def gather_data(n_items: int, assortment_size: int, preferences: np.ndarray, n_steps: int) -> List[Tuple[np.ndarray, int]]:
    """
    returns: obs. Each tuple in obs is a ndarray of the indices of the items proposed and the index of the item picked
    """
    env = AssortmentEnvironment(n=n_items, v=preferences)
    rd_agent = RandomAgent(n=n_items, k=assortment_size)
    obs, _ = run_episode(envnmt=env, actor=rd_agent, n_steps=n_steps, verbose=False)
    return obs

In [150]:
N = 10
K = 3
V = np.random.rand(N+1)
V[-1] = 1.
T = 1000

In [151]:
observations = gather_data(n_items=N,
                            assortment_size=K,
                            preferences=V,
                            n_steps=T)

100%|██████████| 1000/1000 [00:00<00:00, 2557.80it/s]


In [152]:
actions, items_picked = list(zip(*observations))

### Initialization of the parameters

In [192]:
xi = - np.ones(N) * 0.66
# 0.5 std prior for the beta weights
# omega = 0.25, ...
# omega_inv = 4, 4, 4, ...
# omega_det = (0.25) ** N

In [193]:
@numba.jit(nopython=True)
def elbo(xi, mu, L, prior_std, picks, assortments):
    n = L.shape[0]
    k = assortments.shape[1]
    timesteps = picks.shape[0]
    sigma = L ** 2
    mu_gradient = - (mu - xi) / (prior_std ** 2)
    L_gradient = np.ones(n) / (prior_std ** 2)
    
    q_entropy = 0
    for i in range(n):
        q_entropy += np.log(np.abs(L[i]))
    
    cross_entropy = - 0.5 * np.sum( (sigma + (mu - xi) **2) / (prior_std ** 2))
    
    log_likelihood = 0
    for t in range(timesteps):
        item_picked = picks[t]

        if item_picked < n:
            log_likelihood += mu[item_picked]
            mu_gradient[item_picked] += 1

        assortment_weight = 1.
        for j in range(k):
            item_j = assortments[t, j]
            assortment_weight += np.exp(mu[item_j] + 0.5 * sigma[item_j])
    
        for j in range(k):
            item_j = assortments[t, j]
            w_tj = np.exp(mu[item_j] + 0.5 * sigma[item_j]) / assortment_weight
            mu_gradient[item_j] -= w_tj
            L_gradient[item_j] += w_tj
        
        log_likelihood -= np.log(assortment_weight)
    
    L_gradient = - L * L_gradient
    for i in range(n):
        L_gradient[i] += 1 / L[i]

#     print(cross_entropy, q_entropy, log_likelihood)
#     print(mu_gradient, L_gradient)
    return cross_entropy + q_entropy + log_likelihood, mu_gradient, L_gradient

In [194]:
def variational_approximation


SyntaxError: invalid syntax (<ipython-input-194-260ff787a0d9>, line 1)

In [196]:
step_size = 1e-3
mu = np.zeros(N)
L = np.ones(N)
for _ in range(250):
    objective, mu_g, l_g = elbo(xi=xi,
             mu=mu,
             L=L,
             prior_std=0.5,
             picks=np.array(items_picked),
             assortments=np.array(actions))
    mu = mu + step_size * mu_g
    L = L + step_size * l_g

-28.712000000000003 0.0 -1782.7462741628815
-24.63049244837719 -0.9013883208630007 -1695.5674092110464
-21.269577077990743 -1.7824335991744469 -1624.257983151054
-18.49065251606075 -2.6445929408333595 -1565.366907850693
-16.185392572999493 -3.4891358394749514 -1516.3339044480442
-14.268092889211358 -4.317119050506419 -1475.2186126522072
-12.670279281711608 -5.129372714205224 -1440.525860415787
-11.336788396393892 -5.926494209629735 -1411.0874010280486
-10.222860665038741 -6.708846336280781 -1385.9793150061453
-9.291951966208794 -7.476557386888393 -1364.4630392684371
-8.514065793617483 -8.229521691082335 -1345.9425457561608
-7.86446768711179 -8.967400094529495 -1329.932796177271
-7.322683609832653 -9.689620610816073 -1316.0361943404594
-6.8717114744181975 -10.395380209029783 -1303.9247827866732
-6.4973943654010755 -11.083649428708018 -1293.3266118116794
-6.187917760455648 -11.753182248349042 -1284.015172214775
-5.933402904974347 -12.40253430165793 -1275.801102957655
-5.725575586650713 -

In [197]:
np.exp(mu + 0.5 * L ** 2)

array([0.98330237, 0.2935034 , 0.26449986, 0.96802103, 0.37211242,
       0.35520835, 0.13294187, 0.11623803, 0.53169784, 0.56837141])

In [198]:
V[:-1]

array([0.93506449, 0.28455946, 0.22434769, 0.98784058, 0.47824273,
       0.30604718, 0.10032761, 0.07152905, 0.53446213, 0.6595919 ])