In [2]:
# ================================
# Algorithms and Utility Functions
# ================================

import numpy as np
from math import log, ceil, floor, log2, exp
from collections import defaultdict

def hoeffding_confidence(chosen_arm, sample_means, num_pulls):
    """
    Calculates Hoeffding-based lower bound confidence for the selected arm.
    """
    chosen_mean = sample_means.get(chosen_arm, 0)
    confidences = []
    for arm, mean in sample_means.items():
        if arm == chosen_arm:
            continue
        delta = chosen_mean - mean
        if delta <= 0:
            confidences.append(0.0)
        else:
            n = min(num_pulls.get(arm, 1), num_pulls.get(chosen_arm, 1))
            conf = 1 - exp(-0.5 * n * delta**2)
            confidences.append(conf)
    return min(confidences) if confidences else 1.0

def exponential_gap_elimination(arm_simulators, delta=0.05): # Changed parameter name for clarity
    """
    EGE algorithm for fixed-confidence best arm identification.
    """
    S = list(arm_simulators.keys()) # Changed variable name for clarity
    r = 1
    total_cost = 0
    total_pulls = 0
    sample_means = {}
    num_pulls = {}
    stopping_early = False

    while len(S) > 1:
        ε_r = 2 ** (-r / 4)
        δ_r = delta / (50 * r ** 3)
        t_r = ceil((2 / (ε_r ** 2)) * log(2 / δ_r))

        # --- MODIFICATION START ---
        # Removed max_available check as pulls are infinite
        # And removed min(t_r, max_available) for t_r calculation
        # --- MODIFICATION END ---

        round_means = {}
        valid_arms = []

        for arm in S:
            # Current pulls for this arm, if any, for calculating current mean
            current_rewards = []
            current_costs = []

            # Pull 'arm_tr' times or until budget/condition met within the round
            # We explicitly pull t_r times here
            for _ in range(t_r):
                reward, cost = arm_simulators[arm].pull() # On-demand pull
                current_rewards.append(reward)
                current_costs.append(cost)

            if not current_rewards:
                # If no pulls were performed in this round for this arm (shouldn't happen with infinite pulls,
                # but retained the check structure for robustness)
                continue

            avg = np.mean(current_rewards)

            # Update overall sample_means and num_pulls for each arm
            # This is cumulative across rounds
            if arm not in sample_means:
                sample_means[arm] = 0.0
                num_pulls[arm] = 0

            # Weighted average for sample_means
            sample_means[arm] = (sample_means[arm] * num_pulls[arm] + sum(current_rewards)) / (num_pulls[arm] + len(current_rewards))
            num_pulls[arm] += len(current_rewards)

            round_means[arm] = avg # Mean for current round's pulls
            total_cost += sum(current_costs)
            total_pulls += len(current_rewards)
            valid_arms.append(arm)

        if len(valid_arms) == 0:
            stopping_early = True
            break

        best_arm = max(round_means, key=round_means.get)
        p_hat_star = round_means[best_arm]
        S = [arm for arm in valid_arms if round_means[arm] >= p_hat_star - ε_r]
        r += 1

    chosen_arm = S[0] if S else None
    confidence = hoeffding_confidence(chosen_arm, sample_means, num_pulls)
    return chosen_arm, total_pulls, total_cost, confidence, stopping_early

def sequential_halving(arm_simulators, total_budget): # Changed parameter name for clarity
    """
    SH algorithm for fixed-budget best arm identification.
    """
    S = list(arm_simulators.keys()) # Changed variable name for clarity
    R = ceil(log2(len(S)))
    budget_used = 0
    total_pulls = 0
    sample_means = {}
    num_pulls = {}
    stopping_early = False

    # Initialize sample_means and num_pulls for all arms to avoid key errors later
    for arm in S:
        sample_means[arm] = 0.0
        num_pulls[arm] = 0

    for r in range(R):
        if len(S) <= 1:
            break

        # --- MODIFICATION START ---
        # Avg cost per pull is now estimated from COST_RANGE as pulls are on-demand
        estimated_avg_cost_per_pull = (COST_RANGE[0] + COST_RANGE[1]) / 2

        # Calculate max_total_pulls based on remaining budget and current number of active arms
        # This assumes each pull costs the estimated average.
        remaining_budget = total_budget - budget_used

        if remaining_budget <= 0 or estimated_avg_cost_per_pull == 0:
            stopping_early = True
            break # No budget left or no cost means no pulls can be made

        # Calculate pulls per arm for this round
        # Distribute remaining budget as evenly as possible among current active arms
        t_r = floor(remaining_budget / (len(S) * estimated_avg_cost_per_pull))
        t_r = max(t_r, 1) # Ensure at least 1 pull per arm if budget allows

        # Removed max_available check as pulls are infinite
        # --- MODIFICATION END ---

        round_means = {}
        valid_arms = []

        # List to track actual costs incurred in this round to prevent over-budgeting for next pulls
        current_round_pulls_info = defaultdict(lambda: {'rewards': [], 'costs': []})

        for arm in S:

            # --- MODIFICATION START ---
            # Perform t_r pulls for the current arm
            # Keep track of rewards and costs for this arm in this round
            for _ in range(t_r):
                reward, cost = arm_simulators[arm].pull() # On-demand pull
                if budget_used + cost > total_budget:
                    stopping_early = True # Cannot make this pull due to budget
                    break # Stop pulling for this arm in this round

                current_round_pulls_info[arm]['rewards'].append(reward)
                current_round_pulls_info[arm]['costs'].append(cost)
                budget_used += cost
                total_pulls += 1
            # --- MODIFICATION END ---

            if not current_round_pulls_info[arm]['rewards']:
                continue # No pulls were actually made for this arm in this round (e.g., due to budget exhaustion)

            avg_rewards_this_round = np.mean(current_round_pulls_info[arm]['rewards'])

            # Update overall sample_means and num_pulls for each arm
            # This is cumulative across rounds
            sample_means[arm] = (sample_means[arm] * num_pulls[arm] + sum(current_round_pulls_info[arm]['rewards'])) / (num_pulls[arm] + len(current_round_pulls_info[arm]['rewards']))
            num_pulls[arm] += len(current_round_pulls_info[arm]['rewards'])

            round_means[arm] = avg_rewards_this_round # Mean for current round's pulls
            valid_arms.append(arm)

        if len(valid_arms) == 0:
            stopping_early = True
            break

        sorted_arms = sorted(round_means.items(), key=lambda x: x[1], reverse=True)
        S = [arm for arm, _ in sorted_arms[:ceil(len(valid_arms) / 2)]]

        if budget_used >= total_budget:
            break

    chosen_arm = S[0] if S else None
    confidence = hoeffding_confidence(chosen_arm, sample_means, num_pulls)
    return chosen_arm, total_pulls, budget_used, confidence, stopping_early

In [None]:
# ================================
# Experiment Runner and Generator
# ================================

import random
import pandas as pd
from tqdm.auto import tqdm
import numpy as np
from collections import defaultdict


# Adjustable Parameters
NUM_CASES = 5000
NUM_ARMS = 6
NUM_PULLS_PER_ARM = 1500
COST_RANGE = (0.05, 0.2)
REWARD_GAP_MIN = 0.1
DELTA = 0.05
BUDGET = 1000

# --- MODIFICATION START ---
# NEW: ArmSimulator Class for on-demand pulls
class ArmSimulator:
    def __init__(self, mean, std_dev, cost_range):
        self.mean = mean
        self.std_dev = std_dev
        self.cost_range = cost_range
        self.current_pull_count = 0

    def pull(self):
        """Generates a new reward and cost for this arm."""
        reward = np.random.normal(loc=self.mean, scale=self.std_dev)
        cost = np.random.uniform(*self.cost_range)
        self.current_pull_count += 1
        return reward, cost

def generate_case(normal_gap=REWARD_GAP_MIN):
    """
    Generate one synthetic case with 6 arms having normal rewards and random costs.
    Ensures only one best arm with reward gap ≥ 0.1.
    Returns ArmSimulator instances for on-demand pulls.
    """
    best_mean = round(random.uniform(0.6, 0.9), 3)
    other_means = [best_mean - random.uniform(normal_gap, normal_gap + 0.1) for _ in range(NUM_ARMS - 1)]
    all_means = [best_mean] + other_means
    random.shuffle(all_means)

    std_devs = [round(random.uniform(0.05, 0.15), 3) for _ in range(NUM_ARMS)]

    arm_simulators = {}
    for arm_id in range(NUM_ARMS):
        arm_simulators[arm_id] = ArmSimulator(all_means[arm_id], std_devs[arm_id], COST_RANGE)

    return arm_simulators, all_means, std_devs

def run_experiments(num_cases=5000):
    ege_results = {'pulls': [], 'cost': [], 'confidence': [], 'correct': 0, 'early': 0}
    sh_results = {'pulls': [], 'cost': [], 'confidence': [], 'correct': 0, 'early': 0}
    all_means, all_stds = [], []

    for _ in tqdm(range(num_cases)):
        arm_simulators, means, stds = generate_case()
        all_means.extend(means)
        all_stds.extend(stds)
        best_arm = int(np.argmax(means))

        ege_arm, ep, ec, ecf, early_e = exponential_gap_elimination(arm_simulators, delta=DELTA)
        ege_results['pulls'].append(ep)
        ege_results['cost'].append(ec)
        ege_results['confidence'].append(ecf)
        ege_results['correct'] += (ege_arm == best_arm)
        ege_results['early'] += early_e

        arm_simulators_for_sh, _, _ = generate_case()
        sh_arm, sp, sc, scf, early_s = sequential_halving(arm_simulators_for_sh, total_budget=BUDGET)
        sh_results['pulls'].append(sp)
        sh_results['cost'].append(sc)
        sh_results['confidence'].append(scf)
        sh_results['correct'] += (sh_arm == best_arm)
        sh_results['early'] += early_s

    return ege_results, sh_results, all_means, all_stds

def format_summary(results, num_cases):
    """
    Convert summary stats into a neat DataFrame and add Standard Error metrics.
    """
    def summarize(res):
        # Calculate means
        avg_pulls = np.mean(res['pulls'])
        avg_cost = np.mean(res['cost'])
        avg_confidence = np.mean(res['confidence'])

        # Calculate standard deviations
        std_dev_cost = np.std(res['cost'])
        std_dev_confidence = np.std(res['confidence'])

        # Calculate standard errors of the mean
        sem_cost = std_dev_cost / np.sqrt(num_cases)
        sem_confidence = std_dev_confidence / np.sqrt(num_cases)

        # Calculate standard error as a percentage of the mean
        # Handle division by zero for cases where mean is zero
        sem_cost_percent = (sem_cost / avg_cost * 100) if avg_cost != 0 else 0
        sem_confidence_percent = (sem_confidence / avg_confidence * 100) if avg_confidence != 0 else 0

        return {
            "Average Pulls": avg_pulls,
            "Average Cost": avg_cost,
            "Standard Error in Cost (%)": sem_cost_percent,
            "Average Confidence": avg_confidence,
            "Standard Error in Confidence (%)": sem_confidence_percent,
            "Accuracy (%)": 100 * res['correct'] / num_cases,
            "Stopping Early (%)": 100 * res['early'] / num_cases
        }

    ege_stats = summarize(results[0])
    sh_stats = summarize(results[1])
    df_summary = pd.DataFrame([ege_stats, sh_stats], index=["EGE", "SH"])
    df_summary = df_summary.round(2)

    dist_summary = {
        "Avg of Arm Means": np.mean(results[2]),
        "Avg of Arm StdDevs": np.mean(results[3])
    }
    dist_df = pd.DataFrame([dist_summary], index=["Distribution Stats"]).round(3)

    return df_summary, dist_df

# Run Experiments
results = run_experiments(NUM_CASES)

# Display formatted summary
summary_df, dist_df = format_summary(results, NUM_CASES)

print("\n=== Algorithm Performance Summary ===")
display(summary_df)

print("\n=== Reward Distribution Summary ===")
display(dist_df)

  0%|          | 0/5000 [00:00<?, ?it/s]


=== Algorithm Performance Summary ===


Unnamed: 0,Average Pulls,Average Cost,Standard Error in Cost (%),Average Confidence,Standard Error in Confidence (%),Accuracy (%),Stopping Early (%)
EGE,37897.27,4737.07,0.3,1.0,0.0,100.0,0.0
SH,8000.07,999.94,0.0,0.83,0.64,15.94,81.64



=== Reward Distribution Summary ===


Unnamed: 0,Avg of Arm Means,Avg of Arm StdDevs
Distribution Stats,0.626,0.1


In [3]:
# ================================
# Experiment Runner and Generator
# ================================

import random
import pandas as pd
from tqdm.auto import tqdm
import numpy as np
from collections import defaultdict


# Adjustable Parameters
NUM_CASES = 5000
NUM_ARMS = 6
NUM_PULLS_PER_ARM = 1500
COST_RANGE = (0.05, 0.2)
REWARD_GAP_MIN = 0.1
DELTA = 0.01
BUDGET = 2000

# --- MODIFICATION START ---
# NEW: ArmSimulator Class for on-demand pulls
class ArmSimulator:
    def __init__(self, mean, std_dev, cost_range):
        self.mean = mean
        self.std_dev = std_dev
        self.cost_range = cost_range
        self.current_pull_count = 0

    def pull(self):
        """Generates a new reward and cost for this arm."""
        reward = np.random.normal(loc=self.mean, scale=self.std_dev)
        cost = np.random.uniform(*self.cost_range)
        self.current_pull_count += 1
        return reward, cost

def generate_case(normal_gap=REWARD_GAP_MIN):
    """
    Generate one synthetic case with 6 arms having normal rewards and random costs.
    Ensures only one best arm with reward gap ≥ 0.1.
    Returns ArmSimulator instances for on-demand pulls.
    """
    best_mean = round(random.uniform(0.6, 0.9), 3)
    other_means = [best_mean - random.uniform(normal_gap, normal_gap + 0.1) for _ in range(NUM_ARMS - 1)]
    all_means = [best_mean] + other_means
    random.shuffle(all_means)

    std_devs = [round(random.uniform(0.05, 0.15), 3) for _ in range(NUM_ARMS)]

    arm_simulators = {}
    for arm_id in range(NUM_ARMS):
        arm_simulators[arm_id] = ArmSimulator(all_means[arm_id], std_devs[arm_id], COST_RANGE)

    return arm_simulators, all_means, std_devs

def run_experiments(num_cases=5000):
    ege_results = {'pulls': [], 'cost': [], 'confidence': [], 'correct': 0, 'early': 0}
    sh_results = {'pulls': [], 'cost': [], 'confidence': [], 'correct': 0, 'early': 0}
    all_means, all_stds = [], []

    for _ in tqdm(range(num_cases)):
        arm_simulators, means, stds = generate_case()
        all_means.extend(means)
        all_stds.extend(stds)
        best_arm = int(np.argmax(means))

        ege_arm, ep, ec, ecf, early_e = exponential_gap_elimination(arm_simulators, delta=DELTA)
        ege_results['pulls'].append(ep)
        ege_results['cost'].append(ec)
        ege_results['confidence'].append(ecf)
        ege_results['correct'] += (ege_arm == best_arm)
        ege_results['early'] += early_e

        arm_simulators_for_sh, _, _ = generate_case()
        sh_arm, sp, sc, scf, early_s = sequential_halving(arm_simulators_for_sh, total_budget=BUDGET)
        sh_results['pulls'].append(sp)
        sh_results['cost'].append(sc)
        sh_results['confidence'].append(scf)
        sh_results['correct'] += (sh_arm == best_arm)
        sh_results['early'] += early_s

    return ege_results, sh_results, all_means, all_stds

def format_summary(results, num_cases):
    """
    Convert summary stats into a neat DataFrame and add Standard Error metrics.
    """
    def summarize(res):
        # Calculate means
        avg_pulls = np.mean(res['pulls'])
        avg_cost = np.mean(res['cost'])
        avg_confidence = np.mean(res['confidence'])

        # Calculate standard deviations
        std_dev_cost = np.std(res['cost'])
        std_dev_confidence = np.std(res['confidence'])

        # Calculate standard errors of the mean
        sem_cost = std_dev_cost / np.sqrt(num_cases)
        sem_confidence = std_dev_confidence / np.sqrt(num_cases)

        # Calculate standard error as a percentage of the mean
        # Handle division by zero for cases where mean is zero
        sem_cost_percent = (sem_cost / avg_cost * 100) if avg_cost != 0 else 0
        sem_confidence_percent = (sem_confidence / avg_confidence * 100) if avg_confidence != 0 else 0

        return {
            "Average Pulls": avg_pulls,
            "Average Cost": avg_cost,
            "Standard Error in Cost (%)": sem_cost_percent,
            "Average Confidence": avg_confidence,
            "Standard Error in Confidence (%)": sem_confidence_percent,
            "Accuracy (%)": 100 * res['correct'] / num_cases,
            "Stopping Early (%)": 100 * res['early'] / num_cases
        }

    ege_stats = summarize(results[0])
    sh_stats = summarize(results[1])
    df_summary = pd.DataFrame([ege_stats, sh_stats], index=["EGE", "SH"])
    df_summary = df_summary.round(2)

    dist_summary = {
        "Avg of Arm Means": np.mean(results[2]),
        "Avg of Arm StdDevs": np.mean(results[3])
    }
    dist_df = pd.DataFrame([dist_summary], index=["Distribution Stats"]).round(3)

    return df_summary, dist_df

# Run Experiments
results = run_experiments(NUM_CASES)

# Display formatted summary
summary_df, dist_df = format_summary(results, NUM_CASES)

print("\n=== Algorithm Performance Summary ===")
display(summary_df)

print("\n=== Reward Distribution Summary ===")
display(dist_df)

  0%|          | 0/5000 [00:00<?, ?it/s]


=== Algorithm Performance Summary ===


Unnamed: 0,Average Pulls,Average Cost,Standard Error in Cost (%),Average Confidence,Standard Error in Confidence (%),Accuracy (%),Stopping Early (%)
EGE,42164.06,5270.42,0.3,1.0,0.0,100.0,0.0
SH,15999.51,1999.94,0.0,0.84,0.62,16.52,81.58



=== Reward Distribution Summary ===


Unnamed: 0,Avg of Arm Means,Avg of Arm StdDevs
Distribution Stats,0.625,0.1
