In [1]:
# ================================
# Algorithms and Utility Functions
# ================================

import numpy as np
from math import log, ceil, floor, log2, exp
from collections import defaultdict

def hoeffding_confidence(chosen_arm, sample_means, num_pulls):
    """
    Calculates Hoeffding-based lower bound confidence for the selected arm.
    """
    chosen_mean = sample_means.get(chosen_arm, 0)
    confidences = []
    for arm, mean in sample_means.items():
        if arm == chosen_arm:
            continue
        delta = chosen_mean - mean
        if delta <= 0:
            confidences.append(0.0)
        else:
            n = min(num_pulls.get(arm, 1), num_pulls.get(chosen_arm, 1))
            conf = 1 - exp(-0.5 * n * delta**2)
            confidences.append(conf)
    return min(confidences) if confidences else 1.0

def exponential_gap_elimination(arm_pulls, delta=0.05):
    """
    EGE algorithm for fixed-confidence best arm identification.
    """
    S = list(arm_pulls.keys())
    r = 1
    total_cost = 0
    total_pulls = 0
    sample_means = {}
    num_pulls = {}
    stopping_early = False

    while len(S) > 1:
        ε_r = 2 ** (-r / 4)
        δ_r = delta / (50 * r ** 3)
        t_r = ceil((2 / (ε_r ** 2)) * log(2 / δ_r))

        max_available = min([len(arm_pulls[arm]) - num_pulls.get(arm, 0) for arm in S])
        if max_available == 0:
            stopping_early = True
            break
        t_r = min(t_r, max_available)

        round_means = {}
        valid_arms = []

        for arm in S:
            start = num_pulls.get(arm, 0)
            available = len(arm_pulls[arm]) - start
            arm_tr = min(t_r, available)
            if arm_tr <= 0:
                continue

            pulls = arm_pulls[arm][start:start + arm_tr]
            rewards = [r for r, _ in pulls]
            costs = [c for _, c in pulls]

            if not rewards:
                continue

            avg = np.mean(rewards)
            sample_means[arm] = avg
            round_means[arm] = avg
            num_pulls[arm] = start + len(rewards)
            total_cost += sum(costs)
            total_pulls += len(rewards)
            valid_arms.append(arm)

        if len(valid_arms) == 0:
            stopping_early = True
            break

        best_arm = max(round_means, key=round_means.get)
        p_hat_star = round_means[best_arm]
        S = [arm for arm in valid_arms if round_means[arm] >= p_hat_star - ε_r]
        r += 1

    chosen_arm = S[0] if S else None
    confidence = hoeffding_confidence(chosen_arm, sample_means, num_pulls)
    return chosen_arm, total_pulls, total_cost, confidence, stopping_early

def sequential_halving(arm_pulls, total_budget):
    """
    SH algorithm for fixed-budget best arm identification.
    """
    S = list(arm_pulls.keys())
    R = ceil(log2(len(S)))
    budget_used = 0
    total_pulls = 0
    sample_means = {}
    num_pulls = {}
    stopping_early = False

    for r in range(R):
        if len(S) <= 1:
            break

        avg_costs = {arm: np.mean([c for _, c in arm_pulls[arm]]) for arm in S}
        mean_cost_per_arm = sum(avg_costs.values()) / len(S)
        max_total_pulls = floor((total_budget - budget_used) / mean_cost_per_arm)
        t_r = max(floor(max_total_pulls / len(S)), 1)

        max_available = min([len(arm_pulls[arm]) - num_pulls.get(arm, 0) for arm in S])
        if max_available == 0:
            stopping_early = True
            break
        t_r = min(t_r, max_available)

        round_means = {}
        valid_arms = []

        for arm in S:
            start = num_pulls.get(arm, 0)
            available = len(arm_pulls[arm]) - start
            arm_tr = min(t_r, available)
            if arm_tr <= 0:
                continue

            pulls = arm_pulls[arm][start:start + arm_tr]
            rewards = [r for r, _ in pulls]
            costs = [c for _, c in pulls]
            cost_sum = sum(costs)

            if not rewards or (budget_used + cost_sum > total_budget):
                continue

            sample_means[arm] = np.mean(rewards)
            round_means[arm] = sample_means[arm]
            num_pulls[arm] = start + len(rewards)
            budget_used += cost_sum
            total_pulls += len(rewards)
            valid_arms.append(arm)

        if len(valid_arms) == 0:
            stopping_early = True
            break

        sorted_arms = sorted(round_means.items(), key=lambda x: x[1], reverse=True)
        S = [arm for arm, _ in sorted_arms[:ceil(len(valid_arms) / 2)]]

        if budget_used >= total_budget:
            break

    chosen_arm = S[0] if S else None
    confidence = hoeffding_confidence(chosen_arm, sample_means, num_pulls)
    return chosen_arm, total_pulls, budget_used, confidence, stopping_early


In [6]:
# ================================
# Experiment Runner and Formatter
# ================================

import random
import pandas as pd
from tqdm.auto import tqdm
import numpy as np
from collections import defaultdict

# Adjustable Parameters
NUM_CASES = 5000
NUM_ARMS = 6
NUM_PULLS_PER_ARM = 1500
COST_RANGE = (0.05, 0.2)
REWARD_GAP_MIN = 0.1
DELTA = 0.05
BUDGET = 1000

def generate_case(normal_gap=REWARD_GAP_MIN):
    """
    Generate one synthetic case with 6 arms having uniform rewards and random costs.
    Ensures only one best arm with reward gap ≥ 0.1.
    """
    best_mean = round(random.uniform(0.6, 0.9), 3)
    other_means = [best_mean - random.uniform(normal_gap, normal_gap + 0.1) for _ in range(NUM_ARMS - 1)]
    all_means = [best_mean] + other_means
    random.shuffle(all_means)

    # Use a fixed width for uniform distribution range
    width = 0.2  # adjust this if needed
    lower_bounds = [max(0.0, m - width / 2) for m in all_means]
    upper_bounds = [min(1.0, m + width / 2) for m in all_means]

    pulls = defaultdict(list)
    for arm in range(NUM_ARMS):
        rewards = np.random.uniform(low=lower_bounds[arm], high=upper_bounds[arm], size=NUM_PULLS_PER_ARM)
        costs = np.random.uniform(*COST_RANGE, size=NUM_PULLS_PER_ARM)
        pulls[arm] = list(zip(rewards, costs))

    return pulls, lower_bounds, upper_bounds

def run_experiments(num_cases=5000):
    ege_results = {'pulls': [], 'cost': [], 'confidence': [], 'correct': 0, 'early': 0}
    sh_results = {'pulls': [], 'cost': [], 'confidence': [], 'correct': 0, 'early': 0}
    all_lows, all_highs = [], []

    for _ in tqdm(range(num_cases)):
        arm_pulls, lows, highs = generate_case()
        all_lows.extend(lows)
        all_highs.extend(highs)
        best_arm = int(np.argmax([(l + h) / 2 for l, h in zip(lows, highs)]))  # based on mean

        # Run EGE
        ege_arm, ep, ec, ecf, early_e = exponential_gap_elimination(arm_pulls, delta=DELTA)
        ege_results['pulls'].append(ep)
        ege_results['cost'].append(ec)
        ege_results['confidence'].append(ecf)
        ege_results['correct'] += (ege_arm == best_arm)
        ege_results['early'] += early_e

        # Run SH
        sh_arm, sp, sc, scf, early_s = sequential_halving(arm_pulls, total_budget=BUDGET)
        sh_results['pulls'].append(sp)
        sh_results['cost'].append(sc)
        sh_results['confidence'].append(scf)
        sh_results['correct'] += (sh_arm == best_arm)
        sh_results['early'] += early_s

    return ege_results, sh_results, all_lows, all_highs

def format_summary(results, num_cases):
    """
    Convert summary stats into a neat DataFrame.
    """
    def summarize(res):
        return {
            "Average Pulls": np.mean(res['pulls']),
            "Average Cost": np.mean(res['cost']),
            "Average Confidence": np.mean(res['confidence']),
            "Accuracy (%)": 100 * res['correct'] / num_cases,
            "Stopping Early (%)": 100 * res['early'] / num_cases
        }

    ege_stats = summarize(results[0])
    sh_stats = summarize(results[1])
    df_summary = pd.DataFrame([ege_stats, sh_stats], index=["EGE", "SH"])
    df_summary = df_summary.round(2)

    dist_summary = {
        "Avg Lower Bound": np.mean(results[2]),
        "Avg Upper Bound": np.mean(results[3]),
        "Avg Width": np.mean([h - l for l, h in zip(results[2], results[3])])
    }
    dist_df = pd.DataFrame([dist_summary], index=["Distribution Stats"]).round(3)

    return df_summary, dist_df

# Run Experiments
results = run_experiments(NUM_CASES)

# Display formatted summary
summary_df, dist_df = format_summary(results, NUM_CASES)

print("\n=== Algorithm Performance Summary ===")
display(summary_df)

print("\n=== Reward Distribution Summary ===")
display(dist_df)


  0%|          | 0/5000 [00:00<?, ?it/s]


=== Algorithm Performance Summary ===


Unnamed: 0,Average Pulls,Average Cost,Average Confidence,Accuracy (%),Stopping Early (%)
EGE,9000.0,1125.0,0.16,16.2,100.0
SH,7685.29,960.29,0.83,87.0,50.88



=== Reward Distribution Summary ===


Unnamed: 0,Avg Lower Bound,Avg Upper Bound,Avg Width
Distribution Stats,0.525,0.725,0.2


In [7]:
# ================================
# Experiment Runner and Formatter
# ================================

import random
import pandas as pd
from tqdm.auto import tqdm
import numpy as np
from collections import defaultdict

# Adjustable Parameters
NUM_CASES = 5000
NUM_ARMS = 6
NUM_PULLS_PER_ARM = 1500
COST_RANGE = (0.05, 0.2)
REWARD_GAP_MIN = 0.1
DELTA = 0.1
BUDGET = 2000

def generate_case(normal_gap=REWARD_GAP_MIN):
    """
    Generate one synthetic case with 6 arms having uniform rewards and random costs.
    Ensures only one best arm with reward gap ≥ 0.1.
    """
    best_mean = round(random.uniform(0.6, 0.9), 3)
    other_means = [best_mean - random.uniform(normal_gap, normal_gap + 0.1) for _ in range(NUM_ARMS - 1)]
    all_means = [best_mean] + other_means
    random.shuffle(all_means)

    # Use a fixed width for uniform distribution range
    width = 0.2  # adjust this if needed
    lower_bounds = [max(0.0, m - width / 2) for m in all_means]
    upper_bounds = [min(1.0, m + width / 2) for m in all_means]

    pulls = defaultdict(list)
    for arm in range(NUM_ARMS):
        rewards = np.random.uniform(low=lower_bounds[arm], high=upper_bounds[arm], size=NUM_PULLS_PER_ARM)
        costs = np.random.uniform(*COST_RANGE, size=NUM_PULLS_PER_ARM)
        pulls[arm] = list(zip(rewards, costs))

    return pulls, lower_bounds, upper_bounds

def run_experiments(num_cases=5000):
    ege_results = {'pulls': [], 'cost': [], 'confidence': [], 'correct': 0, 'early': 0}
    sh_results = {'pulls': [], 'cost': [], 'confidence': [], 'correct': 0, 'early': 0}
    all_lows, all_highs = [], []

    for _ in tqdm(range(num_cases)):
        arm_pulls, lows, highs = generate_case()
        all_lows.extend(lows)
        all_highs.extend(highs)
        best_arm = int(np.argmax([(l + h) / 2 for l, h in zip(lows, highs)]))  # based on mean

        # Run EGE
        ege_arm, ep, ec, ecf, early_e = exponential_gap_elimination(arm_pulls, delta=DELTA)
        ege_results['pulls'].append(ep)
        ege_results['cost'].append(ec)
        ege_results['confidence'].append(ecf)
        ege_results['correct'] += (ege_arm == best_arm)
        ege_results['early'] += early_e

        # Run SH
        sh_arm, sp, sc, scf, early_s = sequential_halving(arm_pulls, total_budget=BUDGET)
        sh_results['pulls'].append(sp)
        sh_results['cost'].append(sc)
        sh_results['confidence'].append(scf)
        sh_results['correct'] += (sh_arm == best_arm)
        sh_results['early'] += early_s

    return ege_results, sh_results, all_lows, all_highs

def format_summary(results, num_cases):
    """
    Convert summary stats into a neat DataFrame.
    """
    def summarize(res):
        return {
            "Average Pulls": np.mean(res['pulls']),
            "Average Cost": np.mean(res['cost']),
            "Average Confidence": np.mean(res['confidence']),
            "Accuracy (%)": 100 * res['correct'] / num_cases,
            "Stopping Early (%)": 100 * res['early'] / num_cases
        }

    ege_stats = summarize(results[0])
    sh_stats = summarize(results[1])
    df_summary = pd.DataFrame([ege_stats, sh_stats], index=["EGE", "SH"])
    df_summary = df_summary.round(2)

    dist_summary = {
        "Avg Lower Bound": np.mean(results[2]),
        "Avg Upper Bound": np.mean(results[3]),
        "Avg Width": np.mean([h - l for l, h in zip(results[2], results[3])])
    }
    dist_df = pd.DataFrame([dist_summary], index=["Distribution Stats"]).round(3)

    return df_summary, dist_df

# Run Experiments
results = run_experiments(NUM_CASES)

# Display formatted summary
summary_df, dist_df = format_summary(results, NUM_CASES)

print("\n=== Algorithm Performance Summary ===")
display(summary_df)

print("\n=== Reward Distribution Summary ===")
display(dist_df)

  0%|          | 0/5000 [00:00<?, ?it/s]


=== Algorithm Performance Summary ===


Unnamed: 0,Average Pulls,Average Cost,Average Confidence,Accuracy (%),Stopping Early (%)
EGE,9000.0,1124.95,0.16,15.86,100.0
SH,9000.0,1124.95,1.0,100.0,100.0



=== Reward Distribution Summary ===


Unnamed: 0,Avg Lower Bound,Avg Upper Bound,Avg Width
Distribution Stats,0.526,0.726,0.2
