In [1]:
import numpy as np

from misc.matrix_geometric_resampling import matrix_geometric_resampling

rng = np.random.default_rng()

In [2]:
n = 10

matrix = np.identity(n) / n

def unbiased_estimator(rng: np.random.Generator) -> np.ndarray:
    index = rng.integers(n)

    result = np.zeros((n, n))
    result[index, index] = 1

    return result
    
result = matrix_geometric_resampling(rng, 1, 0.05, unbiased_estimator)
print(result)
np.tensordot(result, matrix, axes=(([0,1], [1,0])))

[[0.1    0.     0.     0.     0.     0.     0.     0.     0.     0.    ]
 [0.     0.1    0.     0.     0.     0.     0.     0.     0.     0.    ]
 [0.     0.     0.1    0.     0.     0.     0.     0.     0.     0.    ]
 [0.     0.     0.     0.1    0.     0.     0.     0.     0.     0.    ]
 [0.     0.     0.     0.     0.1    0.     0.     0.     0.     0.    ]
 [0.     0.     0.     0.     0.     0.1    0.     0.     0.     0.    ]
 [0.     0.     0.     0.     0.     0.     0.1    0.     0.     0.    ]
 [0.     0.     0.     0.     0.     0.     0.     0.1    0.     0.    ]
 [0.     0.     0.     0.     0.     0.     0.     0.     0.0975 0.    ]
 [0.     0.     0.     0.     0.     0.     0.     0.     0.     0.1   ]]


array(0.09975)

In [3]:
from distributions.distribution_by_sequence import DistributionBySequence
from distributions.distribution import Distribution
from distributions.sequence import Sequence
from algorithms.semi_bandit_exp3 import SemiBanditExp3
from algorithms.full_bandit_exp3 import FullBanditExp3
from algorithms.semi_bandit_ftrl import SemiBanditFTRL
from algorithms.uniform_random import UniformRandom
from algorithms.non_contextual_exp3 import NonContextualExp3
from experiment_manager.experiment_manager import ExperimentManager

from distributions.actionsets.msets import MSets

from distributions.contexts.binary_context import BinaryContext
from distributions.thetas.single_hole import SingleHole
from distributions.thetas.independent_bernoulli import IndependentBernoulli

In [4]:
algo = FullBanditExp3()

length = 1000
d = 3
K = 2
actionset = MSets(K, 1)

epsilon = 0.25 * np.min([np.sqrt(K / length), 1])
print("epsilon: ", epsilon)
p = np.zeros((d, K)) + 0.5
for i in range(d):
    p[i, 0] -= epsilon

dist_lower_bound = Distribution(BinaryContext(d), IndependentBernoulli(d, K, p), actionset)
dist_holes = Distribution(BinaryContext(d), SingleHole(d, K, np.array([0.7, 0.3])), actionset)

seq = dist_lower_bound.generate(length, rng, rng)
algo.set_constants(rng, seq)
print(seq.sigma, seq.m, algo.beta)
algo.run_on_sequence(rng, seq)
None

epsilon:  0.011180339887498949
1.0 1 0.5
breaking at 129/338
breaking at 138/338
breaking at 156/338
breaking at 142/338
breaking at 126/338
breaking at 118/338
breaking at 128/338
breaking at 116/338
breaking at 148/338
breaking at 132/338
breaking at 128/338
breaking at 153/338
breaking at 119/338
breaking at 148/338
breaking at 132/338
breaking at 118/338
breaking at 119/338
breaking at 128/338
breaking at 123/338
breaking at 132/338
breaking at 118/338
breaking at 163/338
breaking at 139/338
breaking at 115/338
breaking at 136/338
breaking at 132/338
breaking at 113/338
breaking at 142/338
breaking at 149/338
breaking at 111/338
breaking at 167/338
breaking at 112/338
breaking at 143/338
breaking at 140/338
breaking at 141/338
breaking at 149/338
breaking at 133/338
breaking at 125/338
breaking at 124/338
breaking at 163/338
breaking at 143/338
breaking at 123/338
breaking at 127/338
breaking at 122/338
breaking at 113/338
breaking at 143/338
breaking at 127/338
breaking at 132/338

In [5]:
import time

#algo.theta_position = 20

def unbiased_estimator(rng: np.random.Generator) -> np.ndarray:
    context_sample = seq.context_unbiased_estimator(rng)
    probabilities = algo.get_policy(context_sample)
    action_sample_index = rng.choice(np.arange(algo.actionset.number_of_actions), p=probabilities)

    tensor = np.einsum("a,b,c,d->abcd", context_sample, context_sample,  algo.actionset[action_sample_index],  algo.actionset[action_sample_index])
    output_matrix_length = len(context_sample) * len(algo.actionset[action_sample_index])
    return tensor.reshape(output_matrix_length, output_matrix_length)


start = time.time()
inverse = matrix_geometric_resampling(algo.rng, algo.M, algo.beta, unbiased_estimator)
end = time.time()
print(end - start, inverse)

start = time.time()

action_matrix = np.zeros((K,K))
for i in range(d):
    context = np.zeros(d)
    context[i] = 1

    probabilities = algo.get_policy(context)
    print(probabilities)
    weighted_action = np.einsum("ab,a->b", algo.actionset.actionset, probabilities)
    action_matrix += np.outer(weighted_action, weighted_action) / d

print(action_matrix)
tensor = np.einsum("ab,cd->abcd", np.identity(d)/d,  action_matrix).reshape((d*K, d*K))
inverse = np.linalg.inv(tensor)
end = time.time()
print(end - start, inverse)


0.10340404510498047 [[ 2.10729240e+00  3.01239934e+02  0.00000000e+00 -2.11120870e+01
   0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  1.69500000e+02  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  1.69500000e+02  0.00000000e+00
  -2.63950000e+03  0.00000000e+00]
 [ 0.00000000e+00 -2.42750000e+03  0.00000000e+00  1.69500000e+02
   0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   1.69500000e+02  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00 -2.47370892e+02  0.00000000e+00
   3.68904181e+03  8.75874092e+00]]
[0.62640246 0.37359754]
[0.40784657 0.59215343]
[0.54391642 0.45608358]
[[0.28485464 0.2412005 ]
 [0.2412005  0.23274435]]


LinAlgError: Singular matrix