In [1]:
# ==========================================
# Median Elimination Algorithm and Utilities
# ==========================================

import numpy as np
from math import log, ceil
from collections import defaultdict

def median_elimination(arm_pulls, epsilon=0.1, delta=0.05):
    """
    Median Elimination (PAC-style) for best-arm identification.

    Args:
        arm_pulls (dict): {arm_index: [(reward, cost), ...]}
        epsilon (float): Accuracy parameter (ε-optimal arm).
        delta (float): Confidence parameter (1 - δ success).

    Returns:
        chosen_arm (int): Selected ε-optimal arm.
        total_pulls (int): Total number of pulls used.
        total_cost (float): Total cost incurred.
        confidence (float): Lower bound on probability of correctness.
        stopping_early (bool): True if terminated due to data exhaustion.
    """
    S = list(arm_pulls.keys())
    ε_l = epsilon / 4
    δ_l = delta / 2
    total_pulls = 0
    total_cost = 0
    sample_means = {}
    num_pulls = defaultdict(int)
    stopping_early = False

    while len(S) > 1:
        t_l = ceil((4 / (ε_l ** 2)) * log(3 / δ_l))

        max_available = min([len(arm_pulls[arm]) - num_pulls[arm] for arm in S])
        if max_available <= 0:
            stopping_early = True
            break
        t_l = min(t_l, max_available)

        empirical_means = {}
        for arm in S:
            start = num_pulls[arm]
            pulls = arm_pulls[arm][start:start + t_l]
            rewards = [r for r, _ in pulls]
            costs = [c for _, c in pulls]

            if not rewards:
                continue

            avg_reward = np.mean(rewards)
            empirical_means[arm] = avg_reward
            sample_means[arm] = avg_reward
            num_pulls[arm] += len(rewards)
            total_pulls += len(rewards)
            total_cost += sum(costs)

        if not empirical_means:
            stopping_early = True
            break

        median_value = np.median(list(empirical_means.values()))
        S = [arm for arm in S if empirical_means.get(arm, 0) >= median_value]

        ε_l *= 0.75
        δ_l *= 0.5

    chosen_arm = S[0] if S else None
    confidence = hoeffding_confidence(chosen_arm, sample_means, num_pulls)
    return chosen_arm, total_pulls, total_cost, confidence, stopping_early


def hoeffding_confidence(chosen_arm, sample_means, num_pulls):
    """
    Returns a conservative lower bound on the confidence that chosen_arm is ε-optimal.
    """
    chosen_mean = sample_means.get(chosen_arm, 0)
    confidences = []
    for arm, mean in sample_means.items():
        if arm == chosen_arm:
            continue
        delta = chosen_mean - mean
        if delta <= 0:
            confidences.append(0.0)
        else:
            n = min(num_pulls.get(arm, 1), num_pulls.get(chosen_arm, 1))
            conf = 1 - np.exp(-0.5 * n * delta ** 2)
            confidences.append(conf)
    return min(confidences) if confidences else 1.0


In [3]:
# ====================================================
# Gamma-Distributed Synthetic Data + ME Experiment Run
# ====================================================

import random
import numpy as np
from collections import defaultdict
from tqdm.auto import tqdm

# Constants
NUM_CASES = 5000
NUM_ARMS = 6
NUM_PULLS_PER_ARM = 1500
COST_RANGE = (0.05, 0.2)
REWARD_GAP_MIN = 0.1
DELTA = 0.05

def generate_gamma_case(normal_gap=REWARD_GAP_MIN):
    """
    Generate one synthetic case with Gamma-distributed rewards.
    Ensures one best arm and others with a reward gap ≥ REWARD_GAP_MIN.
    """
    best_mean = round(random.uniform(0.6, 0.9), 3)
    other_means = [best_mean - random.uniform(normal_gap, normal_gap + 0.1) for _ in range(NUM_ARMS - 1)]
    all_means = [best_mean] + other_means
    random.shuffle(all_means)

    # Fix shape; derive scale so that mean = shape × scale
    shape_params = [round(random.uniform(2.0, 5.0), 2) for _ in range(NUM_ARMS)]
    scale_params = [round(mu / shape, 3) for mu, shape in zip(all_means, shape_params)]

    pulls = defaultdict(list)
    for arm in range(NUM_ARMS):
        rewards = np.random.gamma(shape=shape_params[arm], scale=scale_params[arm], size=NUM_PULLS_PER_ARM)
        rewards = np.clip(rewards, 0, 1)  # Clamp to [0, 1] for Hoeffding applicability
        costs = np.random.uniform(*COST_RANGE, size=NUM_PULLS_PER_ARM)
        pulls[arm] = list(zip(rewards, costs))

    return pulls, shape_params, scale_params

def run_median_elimination_experiments(num_cases=NUM_CASES):
    """
    Run Median Elimination on gamma-distributed synthetic bandit tasks.
    """
    me_results = {'pulls': [], 'cost': [], 'confidence': [], 'correct': 0, 'early': 0}
    all_shapes, all_scales = [], []

    for _ in tqdm(range(num_cases)):
        arm_pulls, shapes, scales = generate_gamma_case()
        all_shapes.extend(shapes)
        all_scales.extend(scales)

        true_means = [s * scale for s, scale in zip(shapes, scales)]
        best_arm = int(np.argmax(true_means))

        me_arm, mp, mc, mcf, early_m = median_elimination(arm_pulls, epsilon=0.1, delta=DELTA)
        me_results['pulls'].append(mp)
        me_results['cost'].append(mc)
        me_results['confidence'].append(mcf)
        me_results['correct'] += (me_arm == best_arm)
        me_results['early'] += early_m

    return me_results, all_shapes, all_scales

def print_me_summary(results, num_cases):
    def summarize(res):
        return {
            'Avg Pulls': np.mean(res['pulls']),
            'Avg Cost': np.mean(res['cost']),
            'Avg Confidence': np.mean(res['confidence']),
            'Accuracy (%)': 100 * res['correct'] / num_cases,
            'Stopping Early (%)': 100 * res['early'] / num_cases
        }

    me_stats = summarize(results[0])
    print("\n=== Median Elimination (Gamma Rewards) ===")
    for k, v in me_stats.items():
        print(f"{k}: {v:.2f}")

    print("\n=== Gamma Distribution Summary ===")
    print(f"Average Shape Parameter: {np.mean(results[1]):.4f}")
    print(f"Average Scale Parameter: {np.mean(results[2]):.4f}")

# Run the experiments and print results
results = run_median_elimination_experiments()
print_me_summary(results, NUM_CASES)


  0%|          | 0/5000 [00:00<?, ?it/s]


=== Median Elimination (Gamma Rewards) ===
Avg Pulls: 9000.00
Avg Cost: 1125.03
Avg Confidence: 0.28
Accuracy (%): 32.60
Stopping Early (%): 100.00

=== Gamma Distribution Summary ===
Average Shape Parameter: 3.5020
Average Scale Parameter: 0.1907


In [4]:
# ====================================================
# Gamma-Distributed Synthetic Data + ME Experiment Run
# ====================================================

import random
import numpy as np
from collections import defaultdict
from tqdm.auto import tqdm

# Constants
NUM_CASES = 5000
NUM_ARMS = 6
NUM_PULLS_PER_ARM = 1500
COST_RANGE = (0.05, 0.2)
REWARD_GAP_MIN = 0.1
DELTA = 0.1

def generate_gamma_case(normal_gap=REWARD_GAP_MIN):
    """
    Generate one synthetic case with Gamma-distributed rewards.
    Ensures one best arm and others with a reward gap ≥ REWARD_GAP_MIN.
    """
    best_mean = round(random.uniform(0.6, 0.9), 3)
    other_means = [best_mean - random.uniform(normal_gap, normal_gap + 0.1) for _ in range(NUM_ARMS - 1)]
    all_means = [best_mean] + other_means
    random.shuffle(all_means)

    # Fix shape; derive scale so that mean = shape × scale
    shape_params = [round(random.uniform(2.0, 5.0), 2) for _ in range(NUM_ARMS)]
    scale_params = [round(mu / shape, 3) for mu, shape in zip(all_means, shape_params)]

    pulls = defaultdict(list)
    for arm in range(NUM_ARMS):
        rewards = np.random.gamma(shape=shape_params[arm], scale=scale_params[arm], size=NUM_PULLS_PER_ARM)
        rewards = np.clip(rewards, 0, 1)  # Clamp to [0, 1] for Hoeffding applicability
        costs = np.random.uniform(*COST_RANGE, size=NUM_PULLS_PER_ARM)
        pulls[arm] = list(zip(rewards, costs))

    return pulls, shape_params, scale_params

def run_median_elimination_experiments(num_cases=NUM_CASES):
    """
    Run Median Elimination on gamma-distributed synthetic bandit tasks.
    """
    me_results = {'pulls': [], 'cost': [], 'confidence': [], 'correct': 0, 'early': 0}
    all_shapes, all_scales = [], []

    for _ in tqdm(range(num_cases)):
        arm_pulls, shapes, scales = generate_gamma_case()
        all_shapes.extend(shapes)
        all_scales.extend(scales)

        true_means = [s * scale for s, scale in zip(shapes, scales)]
        best_arm = int(np.argmax(true_means))

        me_arm, mp, mc, mcf, early_m = median_elimination(arm_pulls, epsilon=0.1, delta=DELTA)
        me_results['pulls'].append(mp)
        me_results['cost'].append(mc)
        me_results['confidence'].append(mcf)
        me_results['correct'] += (me_arm == best_arm)
        me_results['early'] += early_m

    return me_results, all_shapes, all_scales

def print_me_summary(results, num_cases):
    def summarize(res):
        return {
            'Avg Pulls': np.mean(res['pulls']),
            'Avg Cost': np.mean(res['cost']),
            'Avg Confidence': np.mean(res['confidence']),
            'Accuracy (%)': 100 * res['correct'] / num_cases,
            'Stopping Early (%)': 100 * res['early'] / num_cases
        }

    me_stats = summarize(results[0])
    print("\n=== Median Elimination (Gamma Rewards) ===")
    for k, v in me_stats.items():
        print(f"{k}: {v:.2f}")

    print("\n=== Gamma Distribution Summary ===")
    print(f"Average Shape Parameter: {np.mean(results[1]):.4f}")
    print(f"Average Scale Parameter: {np.mean(results[2]):.4f}")

# Run the experiments and print results
results = run_median_elimination_experiments()
print_me_summary(results, NUM_CASES)


  0%|          | 0/5000 [00:00<?, ?it/s]


=== Median Elimination (Gamma Rewards) ===
Avg Pulls: 9000.00
Avg Cost: 1124.95
Avg Confidence: 0.29
Accuracy (%): 33.30
Stopping Early (%): 100.00

=== Gamma Distribution Summary ===
Average Shape Parameter: 3.5036
Average Scale Parameter: 0.1900
