In [1]:
import numpy as np

from misc.matrix_geometric_resampling import matrix_geometric_resampling

rng = np.random.default_rng()

In [2]:
n = 10

matrix = np.identity(n) / n

def unbiased_estimator(rng: np.random.Generator) -> np.ndarray:
    index = rng.integers(n)

    result = np.zeros((n, n))
    result[index, index] = 1

    return result
    
result = matrix_geometric_resampling(rng, 1, 0.05, unbiased_estimator)
print(result)
np.tensordot(result, matrix, axes=(([0,1], [1,0])))

[[0.1    0.     0.     0.     0.     0.     0.     0.     0.     0.    ]
 [0.     0.1    0.     0.     0.     0.     0.     0.     0.     0.    ]
 [0.     0.     0.1    0.     0.     0.     0.     0.     0.     0.    ]
 [0.     0.     0.     0.1    0.     0.     0.     0.     0.     0.    ]
 [0.     0.     0.     0.     0.1    0.     0.     0.     0.     0.    ]
 [0.     0.     0.     0.     0.     0.1    0.     0.     0.     0.    ]
 [0.     0.     0.     0.     0.     0.     0.1    0.     0.     0.    ]
 [0.     0.     0.     0.     0.     0.     0.     0.0975 0.     0.    ]
 [0.     0.     0.     0.     0.     0.     0.     0.     0.1    0.    ]
 [0.     0.     0.     0.     0.     0.     0.     0.     0.     0.1   ]]


array(0.09975)

In [3]:
from distributions.distribution_by_sequence import DistributionBySequence
from distributions.distribution import Distribution
from distributions.sequence import Sequence
from algorithms.semi_bandit_exp3 import SemiBanditExp3
from algorithms.full_bandit_exp3 import FullBanditExp3
from algorithms.semi_bandit_ftrl import SemiBanditFTRL
from algorithms.uniform_random import UniformRandom
from algorithms.non_contextual_exp3 import NonContextualExp3
from experiment_manager.experiment_manager import ExperimentManager

from distributions.actionsets.msets import MSets

from distributions.contexts.binary_context import BinaryContext
from distributions.thetas.single_hole import SingleHole
from distributions.thetas.independent_bernoulli import IndependentBernoulli

In [4]:
algo = FullBanditExp3()

length = 1000
d = 3
K = 3
actionset = MSets(K, 2)

epsilon = 0.25 * np.min([np.sqrt(K / length), 1])
print("epsilon: ", epsilon)
p = np.zeros((d, K)) + 0.5
for i in range(d):
    p[i, 0] -= epsilon

dist_lower_bound = Distribution(BinaryContext(d), IndependentBernoulli(d, K, p), actionset)
dist_holes = Distribution(BinaryContext(d), SingleHole(d, K, np.array([0.7, 0.3])), actionset)

seq = dist_lower_bound.generate(length, rng, rng)
algo.set_constants(rng, seq)
print(seq.sigma, seq.m, algo.beta)
algo.run_on_sequence(rng, seq)
None

epsilon:  0.013693063937629153
1.0 2 0.25


In [5]:
import time

#algo.theta_position = 20

def unbiased_estimator(rng: np.random.Generator) -> np.ndarray:
    context_sample = seq.context_unbiased_estimator(rng)
    probabilities = algo.get_policy(context_sample)
    action_sample_index = rng.choice(np.arange(algo.actionset.number_of_actions), p=probabilities)

    tensor = np.einsum("a,b,c,d->cadb", context_sample, context_sample,  algo.actionset[action_sample_index],  algo.actionset[action_sample_index])
    output_matrix_length = len(context_sample) * len(algo.actionset[action_sample_index])
    return tensor.reshape(output_matrix_length, output_matrix_length)


start = time.time()
inverse = matrix_geometric_resampling(algo.rng, algo.M, algo.beta, unbiased_estimator)
end = time.time()
print(end - start, inverse)

start = time.time()

action_matrix = np.zeros((K,K))
for i in range(d):
    context = np.zeros(d)
    context[i] = 1

    probabilities = algo.get_policy(context)
    weighted_action = np.einsum("ab,a->b", algo.actionset.actionset, probabilities)
    action_matrix += np.outer(weighted_action, weighted_action) / d

print(action_matrix)

tensor = np.einsum("ab,cd->cadb", np.identity(d)/d,  action_matrix).reshape((d*K, d*K))
inverse = np.linalg.inv(tensor)
end = time.time()
print(end - start, inverse)


0.11011528968811035 [[ 6.40478872  0.          0.         -3.90576338  0.          0.
  -2.42341785  0.          0.        ]
 [ 0.          8.79742664  0.          0.          0.19227039  0.
   0.         -5.61493004  0.        ]
 [ 0.          0.          5.72354187  0.          0.         -0.12645221
   0.          0.         -3.38018915]
 [-2.67159436  0.          0.          4.98284855  0.          0.
  -0.42882779  0.          0.        ]
 [ 0.         -1.49656575  0.          0.          9.69976298  0.
   0.         -4.48204748  0.        ]
 [ 0.          0.         -0.60299018  0.          0.          6.45919873
   0.          0.         -1.46564225]
 [-1.6785429   0.          0.         -1.81270489  0.          0.
   4.64531783  0.          0.        ]
 [ 0.         -2.58240046  0.          0.         -3.64177845  0.
   0.          7.79438645  0.        ]
 [ 0.          0.         -3.86979221  0.          0.         -1.79434759
   0.          0.          6.24094769]]
[[0.448150