In [21]:
import numpy as np
import xbrl.envs as bandits
from xbrl.algs.generalized_linear import UCBGLM
from xbrl.envs.hlsutils import is_hls, derank_hls, hls_rank, random_transform
import matplotlib.pyplot as plt

In [22]:
rng = np.random.RandomState(seed=1)
n_runs = 5
tol = 1e-6

In [23]:
env_seeds = [rng.randint(9999) for _ in range(n_runs)]
env_seeds

[235, 5192, 905, 7813, 2895]

In [24]:
agent_seeds = [rng.randint(9999) for _ in range(n_runs)]
agent_seeds

[5056, 144, 4225, 7751, 3462]

In [25]:
def make_agent(seed, env, algo=UCBGLM):
    return algo(
            env=env,
            seed=seed,
            update_every_n_steps=1,
            delta=0.01,
            bonus_scale=1.,
            opt_tolerance=1e-8
        )

## Motivating Example

In [26]:
#hls_features = rng.uniform(low=-1., high=1., size=(20, 4, 5))
hls_features = np.load("../problem_data/basic_features.npy")
dim = hls_features.shape[-1]
hls_param = rng.uniform(low=-1., high=1., size=dim)
hls_param = np.load("../problem_data/basic_param.npy")
assert is_hls(hls_features, hls_features @ hls_param)
horizon = 100000
hls_features.shape

(20, 4, 5)

In [27]:
instance_seeds = [rng.randint(9999) for _ in range(dim)]
instance_seeds

[4764, 8093, 6542, 562, 8151]

In [28]:
hls_features = np.load("../problem_data/basic_features.npy")
dim = hls_features.shape[-1]
hls_param = np.load("../problem_data/basic_param.npy")
assert is_hls(hls_features, hls_features@hls_param)

def make_env(rank, seed):
    features, param = derank_hls(hls_features, hls_param, newrank=rank, 
                                 transform=True, 
                                 normalize=True, 
                                 seed=instance_seeds[rank-1]) 
    assert hls_rank(features, features @ param, tol=tol) == rank
    return bandits.CBFinite(feature_matrix=features,
                           rewards=features@param,
                           noise="bernoulli",
                           seed=seed)

In [29]:
results = []
lines = []
ranks = [5, 4, 3, 2, 1]
for k in range(len(ranks)):
    regrets = []
    results.append([])
    for i in range(n_runs):
        env = make_env(ranks[k], env_seeds[i])
        agent = make_agent(agent_seeds[i], env)
        agent.reset()
        result = agent.run(horizon=horizon)
        results[k].append(result)
        regrets.append(result['expected_regret'])
    regrets = np.array(regrets)
    avg_regret = np.mean(regrets, axis=0)
    line, = plt.plot(avg_regret, label="hls_rank=%d"%ranks[k])
    lines.append(line)
plt.legend(handles=lines)
plt.show()

  1%|█                                                                                    | 1200/100000 [00:03<04:48, 342.29it/s, % optimal arm (last 100 steps)=24.75%, train loss=0, expected regret=330]


KeyboardInterrupt: 