In [1]:
import numpy as np

from misc.matrix_geometric_resampling import matrix_geometric_resampling

rng = np.random.default_rng()

In [2]:
n = 10

matrix = np.identity(n) / n

def unbiased_estimator(rng: np.random.Generator) -> np.ndarray:
    index = rng.integers(n)

    result = np.zeros((n, n))
    result[index, index] = 1

    return result
    
result = matrix_geometric_resampling(rng, 10000, 0.05, unbiased_estimator)


In [3]:
from distributions.distribution_by_sequence import DistributionBySequence
from distributions.distribution import Distribution
from distributions.sequence import Sequence
from algorithms.semi_bandit_exp3 import SemiBanditExp3
from algorithms.full_bandit_exp3 import FullBanditExp3
from algorithms.semi_bandit_ftrl import SemiBanditFTRL
from algorithms.uniform_random import UniformRandom
from algorithms.non_contextual_exp3 import NonContextualExp3
from experiment_manager.experiment_manager import ExperimentManager

from misc.tensor_helpers import *

from distributions.actionsets.msets import MSets

from distributions.contexts.binary_context import BinaryContext
from distributions.thetas.single_hole import SingleHole
from distributions.thetas.independent_bernoulli import IndependentBernoulli

In [7]:
algo = FullBanditExp3()

length = 10000
d = 3
K = 3
actionset = MSets(K, 2)

epsilon = 0.25 * np.min([np.sqrt(K / length), 1])
print("epsilon: ", epsilon)
p = np.zeros((d, K)) + 0.5
for i in range(d):
    p[i, 0] -= epsilon

dist_lower_bound = Distribution(BinaryContext(d), IndependentBernoulli(d, K, p), actionset)
dist_holes = Distribution(BinaryContext(d), SingleHole(d, K, np.array([0.7, 0.3])), actionset)

seq = dist_lower_bound.generate(length, rng, rng)
algo.set_constants(rng, seq)
algo.M = 10
print(seq.sigma, seq.m, algo.beta)
algo.run_on_sequence(rng, seq)
None

epsilon:  0.004330127018922193
1.0 2 0.25


In [5]:
import time

algo.theta_position = 2

def unbiased_estimator(rng: np.random.Generator) -> np.ndarray:
    context_sample = seq.context_unbiased_estimator(rng)
    probabilities = algo.get_policy(context_sample)
    action_sample_index = rng.choice(np.arange(algo.actionset.number_of_actions), p=probabilities)

    tensor = np.einsum("a,b,c,d->abcd", context_sample, context_sample,  algo.actionset[action_sample_index],  algo.actionset[action_sample_index])
    return tensor_to_matrix(tensor)


start = time.time()
inverse = matrix_geometric_resampling(algo.rng, algo.M, algo.beta, unbiased_estimator)
end = time.time()
print(end - start, inverse)

print("\nStart:")
start = time.time()

action_matrix = np.zeros((K,K))
for i in range(d):
    context = np.zeros(d)
    context[i] = 1

    probabilities = algo.get_policy(context)
    weighted_action = np.einsum("ab,a->b", algo.actionset.actionset, probabilities)
    print(probabilities, weighted_action)
    action_matrix += np.outer(weighted_action, weighted_action) / d

print("\naction_matrix:")
print(action_matrix)

tensor = np.einsum("ab,cd->abcd", np.identity(d)/d,  action_matrix)
kron = np.kron(np.identity(d)/d,  action_matrix)
print("\ntensor:", tensor.shape, "\n", kron)
# print(matrix_to_tensor(tensor_to_matrix(tensor), d, K))
# print(tensor)
# print(tensor_to_matrix(matrix_to_tensor(kron, d, K)))
# print("\n")
inverse = np.linalg.inv(kron + np.identity(d * K) * 1e-5)
end = time.time()
print(end - start, inverse)


0.019003629684448242 [[ 3.97350264  0.          0.          0.          0.          0.        ]
 [ 0.          6.44167483  0.          0.          0.          0.        ]
 [ 0.          0.          5.48950106  0.          0.          0.        ]
 [ 0.          0.          0.          3.29663034  0.          0.        ]
 [ 0.          0.          0.          0.         14.35939407  0.        ]
 [ 0.          0.          0.          0.          0.          2.99915168]]

Start:
[0.41106732 0.58893268] [0.41106732 0.58893268]
[0.62161589 0.37838411] [0.62161589 0.37838411]
[0.53296776 0.46703224] [0.53296776 0.46703224]

action_matrix:
[[0.27981243 0.24207123]
 [0.24207123 0.23604512]]

tensor: (3, 3, 2, 2) 
 [[0.09327081 0.08069041 0.         0.         0.         0.        ]
 [0.08069041 0.07868171 0.         0.         0.         0.        ]
 [0.         0.         0.09327081 0.08069041 0.         0.        ]
 [0.         0.         0.08069041 0.07868171 0.         0.        ]
 [0.     