In [1]:
import numpy as np
import pandas as pd
from math import log, ceil, floor, log2, exp
from collections import defaultdict
from tqdm import tqdm

# --- Algorithm Code Reused from Earlier (shortened for reuse) ---
def hoeffding_confidence(chosen_arm, sample_means, num_pulls):
    chosen_mean = sample_means.get(chosen_arm, 0)
    confidences = []
    for arm, mean in sample_means.items():
        if arm == chosen_arm:
            continue
        delta = chosen_mean - mean
        if delta <= 0:
            confidences.append(0.0)
        else:
            n = min(num_pulls.get(arm, 1), num_pulls.get(chosen_arm, 1))
            conf = 1 - exp(-0.5 * n * delta**2)
            confidences.append(conf)
    return min(confidences) if confidences else 1.0

def exponential_gap_elimination(arm_pulls, delta=0.1):
    S = list(arm_pulls.keys())
    r = 1
    total_cost = 0
    total_pulls = 0
    sample_means = {}
    num_pulls = {}

    while len(S) > 1:
        ε_r = 2 ** (-r / 4)
        δ_r = delta / (50 * r ** 3)
        t_r = ceil((2 / (ε_r ** 2)) * log(2 / δ_r))

        max_available = min([len(arm_pulls[arm]) - num_pulls.get(arm, 0) for arm in S])
        if max_available == 0:
            break
        t_r = min(t_r, max_available)

        round_means = {}
        valid_arms = []

        for arm in S:
            start = num_pulls.get(arm, 0)
            pulls = arm_pulls[arm][start:start + t_r]
            rewards = [r for r, _ in pulls]
            costs = [c for _, c in pulls]

            if not rewards:
                continue

            avg = np.mean(rewards)
            sample_means[arm] = avg
            round_means[arm] = avg
            num_pulls[arm] = start + len(rewards)
            total_cost += sum(costs)
            total_pulls += len(rewards)
            valid_arms.append(arm)

        if not valid_arms:
            break

        best_arm = max(round_means, key=round_means.get)
        p_hat_star = round_means[best_arm]
        S = [arm for arm in valid_arms if round_means[arm] >= p_hat_star - ε_r]
        r += 1

    chosen_arm = S[0] if S else None
    confidence = hoeffding_confidence(chosen_arm, sample_means, num_pulls)
    return chosen_arm, total_pulls, total_cost, confidence

def sequential_halving(arm_pulls, total_budget):
    S = list(arm_pulls.keys())
    R = ceil(log2(len(S)))
    budget_used = 0
    total_pulls = 0
    sample_means = {}
    num_pulls = {}

    for r in range(R):
        if len(S) <= 1:
            break

        avg_costs = {arm: np.mean([c for _, c in arm_pulls[arm]]) for arm in S}
        mean_cost_per_arm = sum(avg_costs.values()) / len(S)
        max_total_pulls = floor((total_budget - budget_used) / mean_cost_per_arm)
        t_r = max(floor(max_total_pulls / len(S)), 1)

        max_available = min([len(arm_pulls[arm]) - num_pulls.get(arm, 0) for arm in S])
        if max_available == 0:
            break
        t_r = min(t_r, max_available)

        round_means = {}
        valid_arms = []

        for arm in S:
            start = num_pulls.get(arm, 0)
            pulls = arm_pulls[arm][start:start + t_r]
            rewards = [r for r, _ in pulls]
            costs = [c for _, c in pulls]
            cost_sum = sum(costs)

            if not rewards or (budget_used + cost_sum > total_budget):
                continue

            sample_means[arm] = np.mean(rewards)
            round_means[arm] = sample_means[arm]
            num_pulls[arm] = start + len(rewards)
            budget_used += cost_sum
            total_pulls += len(rewards)
            valid_arms.append(arm)

        if not valid_arms:
            break

        sorted_arms = sorted(round_means.items(), key=lambda x: x[1], reverse=True)
        S = [arm for arm, _ in sorted_arms[:ceil(len(valid_arms) / 2)]]

        if budget_used >= total_budget:
            break

    chosen_arm = S[0] if S else None
    confidence = hoeffding_confidence(chosen_arm, sample_means, num_pulls)
    return chosen_arm, total_pulls, budget_used, confidence

# --- Simulation Code ---
def simulate_run():
    arm_means = np.sort(np.random.uniform(0, 1, 6))
    for i in range(1, len(arm_means)):
        if arm_means[i] - arm_means[i-1] < 0.1:
            arm_means[i] = arm_means[i-1] + 0.1
    arm_means = np.clip(arm_means, 0, 1)

    arm_costs = np.random.uniform(0.05, 0.2, 6)
    arm_pulls = {
        arm: [(np.random.normal(loc=arm_means[arm], scale=0.05), arm_costs[arm]) for _ in range(1500)]
        for arm in range(6)
    }
    true_best_arm = int(np.argmax(arm_means))

    ege = exponential_gap_elimination(arm_pulls, delta=0.1)
    sh = sequential_halving(arm_pulls, total_budget=2000)

    return {
        'ege': (*ege, int(ege[0] == true_best_arm)),
        'sh': (*sh, int(sh[0] == true_best_arm))
    }

def aggregate_results(results):
    def summarize(alg):
        pulls = np.array([r[alg][1] for r in results])
        cost = np.array([r[alg][2] for r in results])
        conf = np.array([r[alg][3] for r in results])
        correct = np.array([r[alg][4] for r in results])
        return {
            'avg_pulls': pulls.mean(),
            'avg_cost': cost.mean(),
            'avg_confidence': conf.mean(),
            'accuracy_%': correct.mean() * 100,
            'std_error_pulls': pulls.std(ddof=1) / np.sqrt(len(pulls)),
            'std_error_cost': cost.std(ddof=1) / np.sqrt(len(cost)),
            'std_error_conf': conf.std(ddof=1) / np.sqrt(len(conf))
        }
    return {alg: summarize(alg) for alg in ['ege', 'sh']}

# --- Run the Simulation ---
if __name__ == '__main__':
    results = [simulate_run() for _ in tqdm(range(5000))]
    summary = aggregate_results(results)
    print("\nSummary Results:")
    for alg, stats in summary.items():
        print(f"\n{alg.upper()} Results:")
        for k, v in stats.items():
            print(f"  {k}: {v:.4f}")


100%|██████████| 5000/5000 [01:50<00:00, 45.41it/s]


Summary Results:

EGE Results:
  avg_pulls: 4829.3316
  avg_cost: 604.3638
  avg_confidence: 0.1622
  accuracy_%: 16.2200
  std_error_pulls: 18.0668
  std_error_cost: 2.7392
  std_error_conf: 0.0052

SH Results:
  avg_pulls: 9000.0000
  avg_cost: 1126.7393
  avg_confidence: 0.9270
  accuracy_%: 98.0400
  std_error_pulls: 0.0000
  std_error_cost: 2.2558
  std_error_conf: 0.0033



