In [17]:
import matplotlib.pyplot as plt 
import numpy as np 
import math 
from tqdm import tqdm
np.random.seed(2025)

In [18]:
def uniform_sampling(T, K, means):
    rewards = np.zeros(K)
    for k in range(K):
        rewards[k] = np.sum(np.random.binomial(1, means[k], T // K))
    empirical_means = rewards / (T//K)
    return rewards, empirical_means

In [19]:
def successive_rejects(T, K, means):
    A1 = np.array(list(range(1, K+1)))
    log_bar_K = 1/2 + np.sum([1/k for k in range(2, K+1)])
    n0 = 0
    for j in range(1, K):
        nj = math.floor((T-K) / (log_bar_K * (K + 1 - j)))

In [20]:
def run_experience(algorithm, T, K, means, n_iter=10**4):
    counter = np.zeros(K)
    for n in tqdm(range(n_iter)):
        if algorithm=="uniform_sampling":
            rewards, empirical_means = uniform_sampling(T, K, means)
            best_arm = np.argmax(empirical_means) 
        else:
            rewards, empirical_means = successive_rejects(T, K, means)
            best_arm = np.argmax(empirical_means) 
        counter[best_arm] += 1
    counter /= n_iter
    p = 1 - np.sum(counter[1:])
    return p
        

In [22]:
K = 20
means = [0.6] + [0.5] * (K-1)
Ts = [100, 500, 2000]

for T in Ts:
    p_error = run_experience(algorithm="uniform_sampling", T=T, K=K, means=means, n_iter=10**4)
    print(f"\nT = {T}")
    print(f"Probability of error for uniform sampling is: {p_error}")

100%|██████████| 10000/10000 [00:00<00:00, 28588.26it/s]



T = 100
Probability of error for uniform sampling is: 0.22360000000000002


100%|██████████| 10000/10000 [00:00<00:00, 27149.57it/s]



T = 500
Probability of error for uniform sampling is: 0.2846000000000001


100%|██████████| 10000/10000 [00:00<00:00, 18101.45it/s]


T = 2000
Probability of error for uniform sampling is: 0.6051





In [16]:
arms = [0.6, 0.7, 0.7, 0.4]
np.argmax(arms)


1