In [5]:
import numpy as np
from tqdm import tqdm
from math import log, ceil, floor, log2, exp
from collections import defaultdict

# Simulate one case
def generate_case(n_arms=6, pulls_per_arm=1500):
    base = np.round(np.sort(np.random.uniform(0.1, 0.9, n_arms)), 2)
    while min(np.diff(base)) < 0.1:
        base = np.round(np.sort(np.random.uniform(0.1, 0.9, n_arms)), 2)

    costs = {i: np.round(np.random.uniform(0.05, 0.2), 3) for i in range(n_arms)}
    arm_pulls = defaultdict(list)

    for arm in range(n_arms):
        rewards = np.random.normal(loc=base[arm], scale=0.05, size=pulls_per_arm)
        rewards = np.clip(rewards, 0, 1)
        arm_pulls[arm] = [(r, costs[arm]) for r in rewards]

    return arm_pulls, np.argmax(base)

# Hoeffding confidence
def hoeffding_confidence(chosen_arm, sample_means, num_pulls):
    chosen_mean = sample_means.get(chosen_arm, 0)
    confidences = []
    for arm, mean in sample_means.items():
        if arm == chosen_arm:
            continue
        delta = chosen_mean - mean
        if delta <= 0:
            confidences.append(0.0)
        else:
            n = min(num_pulls.get(arm, 1), num_pulls.get(chosen_arm, 1))
            conf = 1 - exp(-0.5 * n * delta ** 2)
            confidences.append(conf)
    return min(confidences) if confidences else 1.0

# EGE algorithm
def exponential_gap_elimination(arm_pulls, delta=0.05):
    S = list(arm_pulls.keys())
    r = 1
    total_cost = 0
    total_pulls = 0
    sample_means = {}
    num_pulls = {}

    while len(S) > 1:
        ε_r = 2 ** (-r / 4)
        δ_r = delta / (50 * r ** 3)
        t_r = ceil((2 / (ε_r ** 2)) * log(2 / δ_r))

        max_available = min([len(arm_pulls[arm]) - num_pulls.get(arm, 0) for arm in S])
        if max_available == 0:
            break
        t_r = min(t_r, max_available)

        round_means = {}
        valid_arms = []

        for arm in S:
            start = num_pulls.get(arm, 0)
            available = len(arm_pulls[arm]) - start
            arm_tr = min(t_r, available)
            if arm_tr <= 0:
                continue

            pulls = arm_pulls[arm][start:start + arm_tr]
            rewards = [r for r, _ in pulls]
            costs = [c for _, c in pulls]

            if not rewards:
                continue

            avg = np.mean(rewards)
            sample_means[arm] = avg
            round_means[arm] = avg
            num_pulls[arm] = start + len(rewards)
            total_cost += sum(costs)
            total_pulls += len(rewards)
            valid_arms.append(arm)

        if len(valid_arms) == 0:
            break

        best_arm = max(round_means, key=round_means.get)
        p_hat_star = round_means[best_arm]
        S = [arm for arm in valid_arms if round_means[arm] >= p_hat_star - ε_r]
        r += 1

    chosen_arm = S[0] if S else None
    confidence = hoeffding_confidence(chosen_arm, sample_means, num_pulls)
    return chosen_arm, total_pulls, total_cost, confidence

# SH algorithm
def sequential_halving(arm_pulls, total_budget):
    S = list(arm_pulls.keys())
    R = ceil(log2(len(S)))
    budget_used = 0
    total_pulls = 0
    sample_means = {}
    num_pulls = {}

    for r in range(R):
        if len(S) <= 1:
            break

        avg_costs = {arm: arm_pulls[arm][0][1] for arm in S}
        mean_cost_per_arm = sum(avg_costs.values()) / len(S)
        max_total_pulls = floor((total_budget - budget_used) / mean_cost_per_arm)
        t_r = max(floor(max_total_pulls / len(S)), 1)

        max_available = min([len(arm_pulls[arm]) - num_pulls.get(arm, 0) for arm in S])
        if max_available == 0:
            break
        t_r = min(t_r, max_available)

        round_means = {}
        valid_arms = []

        for arm in S:
            start = num_pulls.get(arm, 0)
            available = len(arm_pulls[arm]) - start
            arm_tr = min(t_r, available)
            if arm_tr <= 0:
                continue

            pulls = arm_pulls[arm][start:start + arm_tr]
            rewards = [r for r, _ in pulls]
            costs = [c for _, c in pulls]
            cost_sum = sum(costs)

            if not rewards or (budget_used + cost_sum > total_budget):
                continue

            sample_means[arm] = np.mean(rewards)
            round_means[arm] = sample_means[arm]
            num_pulls[arm] = start + len(rewards)
            budget_used += cost_sum
            total_pulls += len(rewards)
            valid_arms.append(arm)

        if len(valid_arms) == 0:
            break

        sorted_arms = sorted(round_means.items(), key=lambda x: x[1], reverse=True)
        S = [arm for arm, _ in sorted_arms[:ceil(len(valid_arms) / 2)]]

        if budget_used >= total_budget:
            break

    chosen_arm = S[0] if S else None
    confidence = hoeffding_confidence(chosen_arm, sample_means, num_pulls)
    return chosen_arm, total_pulls, budget_used, confidence

# Run 5000 simulations
def run_simulations(n_cases=5000, delta=0.05, budget=1000):
    ege_stats = []
    sh_stats = []

    for _ in tqdm(range(n_cases), desc="Running simulations"):
        arm_pulls, true_best = generate_case()

        # EGE
        ege_arm, ege_pulls, ege_cost, ege_conf = exponential_gap_elimination(arm_pulls, delta=delta)
        ege_stats.append((ege_pulls, ege_cost, ege_conf, ege_arm == true_best))

        # SH
        sh_arm, sh_pulls, sh_cost, sh_conf = sequential_halving(arm_pulls, total_budget=budget)
        sh_stats.append((sh_pulls, sh_cost, sh_conf, sh_arm == true_best))

    def summarize(stats):
        pulls, costs, confs, correct = zip(*stats)
        return {
            'avg_pulls': np.mean(pulls),
            'avg_cost': np.mean(costs),
            'avg_confidence': np.mean(confs),
            'accuracy_%': 100 * np.mean(correct),
            'std_error_pulls': np.std(pulls) / np.sqrt(len(pulls)),
            'std_error_cost': np.std(costs) / np.sqrt(len(costs)),
            'std_error_conf': np.std(confs) / np.sqrt(len(confs)),
        }

    return summarize(ege_stats), summarize(sh_stats)

# Run it
if __name__ == "__main__":
    ege_results, sh_results = run_simulations()
    print("\nEGE Results:")
    for k, v in ege_results.items():
        print(f"  {k}: {v:.4f}")

    print("\nSH Results:")
    for k, v in sh_results.items():
        print(f"  {k}: {v:.4f}")


Running simulations: 100%|██████████| 5000/5000 [01:01<00:00, 81.42it/s]



EGE Results:
  avg_pulls: 4846.9706
  avg_cost: 602.8437
  avg_confidence: 0.0694
  accuracy_%: 6.9400
  std_error_pulls: 7.2442
  std_error_cost: 1.7742
  std_error_conf: 0.0036

SH Results:
  avg_pulls: 7960.5284
  avg_cost: 979.8483
  avg_confidence: 0.9028
  accuracy_%: 92.5600
  std_error_pulls: 12.0831
  std_error_cost: 0.7157
  std_error_conf: 0.0039


In [4]:
from tqdm import tqdm

def run_simulations(n_cases=5000, delta=0.05, budget=1000):
    ege_stats = []
    sh_stats = []

    for _ in tqdm(range(n_cases), desc="Running simulations"):
        arm_pulls, true_best = generate_case()

        ege_arm, ege_pulls, ege_cost, ege_conf = exponential_gap_elimination(arm_pulls, delta=delta)
        ege_stats.append((ege_pulls, ege_cost, ege_conf, ege_arm == true_best))

        sh_arm, sh_pulls, sh_cost, sh_conf = sequential_halving(arm_pulls, total_budget=budget)
        sh_stats.append((sh_pulls, sh_cost, sh_conf, sh_arm == true_best))

    def summarize(stats):
        pulls, costs, confs, correct = zip(*stats)
        return {
            'avg_pulls': np.mean(pulls),
            'avg_cost': np.mean(costs),
            'avg_confidence': np.mean(confs),
            'accuracy_%': 100 * np.mean(correct),
            'std_error_pulls': np.std(pulls) / np.sqrt(len(pulls)),
            'std_error_cost': np.std(costs) / np.sqrt(len(costs)),
            'std_error_conf': np.std(confs) / np.sqrt(len(confs)),
        }

    return summarize(ege_stats), summarize(sh_stats)

# Run and print
if __name__ == "__main__":
    ege_results, sh_results = run_simulations()
    print("\nEGE Results:")
    for k, v in ege_results.items():
        print(f"  {k}: {v:.4f}")

    print("\nSH Results:")
    for k, v in sh_results.items():
        print(f"  {k}: {v:.4f}")


Running simulations: 100%|██████████| 5000/5000 [01:02<00:00, 80.46it/s]


EGE Results:
  avg_pulls: 4844.0894
  avg_cost: 606.9386
  avg_confidence: 0.0742
  accuracy_%: 7.4200
  std_error_pulls: 7.1480
  std_error_cost: 1.7644
  std_error_conf: 0.0037

SH Results:
  avg_pulls: 7933.6926
  avg_cost: 981.2214
  avg_confidence: 0.8995
  accuracy_%: 92.7600
  std_error_pulls: 12.2359
  std_error_cost: 0.6867
  std_error_conf: 0.0039



