In [None]:
%matplotlib inline
import math
import numpy as np

from tdfs.routines import tdfs_routine, tdfs_log_upper_bound
from plots import regret_plt

# Single example TDFS
This next run is designed just to show the results we obtain with TDFS

In [None]:
#  UNIVERSE PARAMETERS
n_users = 3
n_arms = 5
t_horizon = 1000
arm_means = [0.2, 0.3, 0.5, 0.8, 0.9]
best_arms_mean = np.sort(arm_means)
best_arms_mean = best_arms_mean[::-1]
best_arms_mean = best_arms_mean[:n_users]

In [None]:
total_rewards = tdfs_routine(n_users, n_arms, t_horizon, arm_means, alg='ucb')

In [None]:
regret_plt(best_arms_mean, total_rewards)

# Expected regret curve
This next section is designed to show the expected regret curve using a Monte-Carlo strategy. It also shows the upper calculated thanks to the second theorem 2.

In [None]:
# MONTE-CARLO PARAMETERS
mc_horizon = 50
rewards = np.zeros((mc_horizon, t_horizon))

In [None]:
for t in range(mc_horizon):
    rewards[t, :] = tdfs_routine(n_users, n_arms, t_horizon, arm_means)[:, 0]

In [None]:
c = tdfs_log_upper_bound(n_users, arm_means)
upper_bound = [c * math.log(t) for t in range(1, t_horizon)]
upper_bound = [0] + upper_bound

In [None]:
expected_rewards = np.mean(rewards, axis=0)
regret_plt(best_arms_mean, expected_rewards, upper_bound=upper_bound)