In [1]:
import pandas as pd
import numpy as np
from math import log, ceil, floor, log2, exp
from collections import defaultdict
import random

def hoeffding_confidence(chosen_arm, sample_means, num_pulls):
    chosen_mean = sample_means.get(chosen_arm, 0)
    confidences = []
    for arm, mean in sample_means.items():
        if arm == chosen_arm:
            continue
        delta = chosen_mean - mean
        if delta <= 0:
            confidences.append(0.0)
        else:
            n = min(num_pulls.get(arm, 1), num_pulls.get(chosen_arm, 1))
            conf = 1 - exp(-0.5 * n * delta**2)
            confidences.append(conf)
    return min(confidences) if confidences else 1.0

def exponential_gap_elimination(arm_pulls, delta=0.05):
    S = list(arm_pulls.keys())
    r = 1
    total_cost, total_pulls = 0, 0
    sample_means, num_pulls = {}, {}

    while len(S) > 1:
        ε_r = 2 ** (-r / 4)
        δ_r = delta / (50 * r ** 3)
        t_r = ceil((2 / (ε_r ** 2)) * log(2 / δ_r))

        t_r = min(t_r, min(len(arm_pulls[arm]) - num_pulls.get(arm, 0) for arm in S))
        if t_r <= 0:
            break

        round_means, valid_arms = {}, []
        for arm in S:
            start = num_pulls.get(arm, 0)
            pulls = arm_pulls[arm][start:start + t_r]
            if not pulls:
                continue
            rewards = [r for r, _ in pulls]
            costs = [c for _, c in pulls]
            sample_means[arm] = np.mean(rewards)
            round_means[arm] = sample_means[arm]
            num_pulls[arm] = start + len(rewards)
            total_cost += sum(costs)
            total_pulls += len(rewards)
            valid_arms.append(arm)

        if not valid_arms:
            break

        best_arm = max(round_means, key=round_means.get)
        S = [arm for arm in valid_arms if round_means[arm] >= round_means[best_arm] - ε_r]
        r += 1

    chosen = S[0] if S else None
    confidence = hoeffding_confidence(chosen, sample_means, num_pulls)
    return chosen, total_pulls, total_cost, confidence

def sequential_halving(arm_pulls, total_budget):
    S = list(arm_pulls.keys())
    R = ceil(log2(len(S)))
    budget_used, total_pulls = 0, 0
    sample_means, num_pulls = {}, {}

    for r in range(R):
        if len(S) <= 1:
            break
        mean_cost = np.mean([np.mean([c for _, c in arm_pulls[arm]]) for arm in S])
        t_r = max(floor((total_budget - budget_used) / mean_cost / len(S)), 1)
        t_r = min(t_r, min(len(arm_pulls[arm]) - num_pulls.get(arm, 0) for arm in S))
        if t_r <= 0:
            break

        round_means, valid_arms = {}, []
        for arm in S:
            start = num_pulls.get(arm, 0)
            pulls = arm_pulls[arm][start:start + t_r]
            if not pulls:
                continue
            rewards = [r for r, _ in pulls]
            costs = [c for _, c in pulls]
            cost_sum = sum(costs)
            if budget_used + cost_sum > total_budget:
                continue
            sample_means[arm] = np.mean(rewards)
            round_means[arm] = sample_means[arm]
            num_pulls[arm] = start + len(rewards)
            budget_used += cost_sum
            total_pulls += len(rewards)
            valid_arms.append(arm)

        if not valid_arms:
            break
        S = [arm for arm, _ in sorted(round_means.items(), key=lambda x: x[1], reverse=True)[:ceil(len(valid_arms) / 2)]]

    chosen = S[0] if S else None
    confidence = hoeffding_confidence(chosen, sample_means, num_pulls)
    return chosen, total_pulls, budget_used, confidence

def generate_dataset(num_arms=6, trials=800, reward_means=None, costs=None):
    data = []
    for trial in range(trials):
        for arm in range(num_arms):
            reward = np.clip(np.random.normal(loc=reward_means[arm], scale=0.05), 0, 1)
            cost = costs[arm]
            data.append((trial, arm, reward, cost))
    df = pd.DataFrame(data, columns=["trial", "arm", "reward", "cost"])
    return df

def run_scenario_2(num_cases=5000):
    ege_correct, ege_conf, ege_cost = 0, 0, 0
    sh_correct, sh_conf, sh_cost = 0, 0, 0
    num_arms, trials, delta = 6, 800, 0.05
    budget = random.choice([1000, 1500])  # You can randomize between 1000 or 1500 if desired

    for _ in range(num_cases):
        base = random.uniform(0.2, 0.3)
        gaps = sorted([random.uniform(0.01, 0.08) for _ in range(num_arms)])
        reward_means = [base + g for g in gaps]
        true_best = np.argmax(reward_means)
        costs = [round(random.uniform(0.04, 0.1), 3) for _ in range(num_arms)]
        df = generate_dataset(num_arms, trials, reward_means, costs)

        arm_pulls = defaultdict(list)
        for _, row in df.iterrows():
            arm_pulls[int(row["arm"])].append((row["reward"], row["cost"]))

        ege_arm, _, ecost, econf = exponential_gap_elimination(arm_pulls, delta=delta)
        sh_arm, _, scost, sconf = sequential_halving(arm_pulls, total_budget=budget)

        ege_correct += int(ege_arm == true_best)
        ege_conf += econf
        ege_cost += ecost

        sh_correct += int(sh_arm == true_best)
        sh_conf += sconf
        sh_cost += scost

    print("\n--- Scenario 2: Moderate Reward Gap ---")
    print("EGE: Accuracy = {:.2f}%, Avg Confidence = {:.3f}, Avg Cost = {:.2f}".format(
        100 * ege_correct / num_cases, ege_conf / num_cases, ege_cost / num_cases))
    print("SH : Accuracy = {:.2f}%, Avg Confidence = {:.3f}, Avg Cost = {:.2f}".format(
        100 * sh_correct / num_cases, sh_conf / num_cases, sh_cost / num_cases))

# Run this function to evaluate Scenario 2
run_scenario_2()



--- Scenario 2: Moderate Reward Gap ---
EGE: Accuracy = 0.00%, Avg Confidence = 0.000, Avg Cost = 335.58
SH : Accuracy = 92.02%, Avg Confidence = 0.062, Avg Cost = 335.58
