In [2]:
# ==========================================
# Median Elimination Algorithm and Utilities
# ==========================================

import numpy as np
from math import log, ceil
from collections import defaultdict

def median_elimination(arm_pulls, epsilon=0.1, delta=0.05):
    """
    Median Elimination (PAC-style) for best-arm identification.

    Args:
        arm_pulls (dict): {arm_index: [(reward, cost), ...]}
        epsilon (float): Accuracy parameter (ε-optimal arm).
        delta (float): Confidence parameter (1 - δ success).

    Returns:
        chosen_arm (int): Selected ε-optimal arm.
        total_pulls (int): Total number of pulls used.
        total_cost (float): Total cost incurred.
        confidence (float): Lower bound on probability of correctness.
        stopping_early (bool): True if terminated due to data exhaustion.
    """
    S = list(arm_pulls.keys())
    ε_l = epsilon / 4
    δ_l = delta / 2
    total_pulls = 0
    total_cost = 0
    sample_means = {}
    num_pulls = defaultdict(int)
    stopping_early = False

    while len(S) > 1:
        t_l = ceil((4 / (ε_l ** 2)) * log(3 / δ_l))

        max_available = min([len(arm_pulls[arm]) - num_pulls[arm] for arm in S])
        if max_available <= 0:
            stopping_early = True
            break
        t_l = min(t_l, max_available)

        empirical_means = {}
        for arm in S:
            start = num_pulls[arm]
            pulls = arm_pulls[arm][start:start + t_l]
            rewards = [r for r, _ in pulls]
            costs = [c for _, c in pulls]

            if not rewards:
                continue

            avg_reward = np.mean(rewards)
            empirical_means[arm] = avg_reward
            sample_means[arm] = avg_reward
            num_pulls[arm] += len(rewards)
            total_pulls += len(rewards)
            total_cost += sum(costs)

        if not empirical_means:
            stopping_early = True
            break

        median_value = np.median(list(empirical_means.values()))
        S = [arm for arm in S if empirical_means.get(arm, 0) >= median_value]

        ε_l *= 0.75
        δ_l *= 0.5

    chosen_arm = S[0] if S else None
    confidence = hoeffding_confidence(chosen_arm, sample_means, num_pulls)
    return chosen_arm, total_pulls, total_cost, confidence, stopping_early


def hoeffding_confidence(chosen_arm, sample_means, num_pulls):
    """
    Returns a conservative lower bound on the confidence that chosen_arm is ε-optimal.
    """
    chosen_mean = sample_means.get(chosen_arm, 0)
    confidences = []
    for arm, mean in sample_means.items():
        if arm == chosen_arm:
            continue
        delta_val = chosen_mean - mean
        if delta_val <= 0:
            confidences.append(0.0)
        else:
            n = min(num_pulls.get(arm, 1), num_pulls.get(chosen_arm, 1))
            conf = 1 - np.exp(-0.5 * n * delta_val ** 2)
            confidences.append(conf)
    return min(confidences) if confidences else 1.0

In [3]:
# ======================================
# Synthetic Data Generator & Experiment
# ======================================

import random
import numpy as np
import pandas as pd
from collections import defaultdict
from tqdm.auto import tqdm

# Constants
NUM_CASES = 5000
NUM_ARMS = 6
# NUM_PULLS_PER_ARM is now just a conceptual placeholder; it is no longer a hard limit
NUM_PULLS_PER_ARM = 1500
COST_RANGE = (0.05, 0.2)
REWARD_GAP_MIN = 0.1
DELTA = 0.05

# --- MODIFICATION START ---

class ArmSimulator:
    """Provides a single source of on-demand rewards and costs for one arm."""
    def __init__(self, mean, std_dev, cost_range):
        self.mean = mean
        self.std_dev = std_dev
        self.cost_range = cost_range

    def pull(self):
        reward = np.random.normal(loc=self.mean, scale=self.std_dev)
        cost = np.random.uniform(*self.cost_range)
        return reward, cost

class OnDemandPullList:
    """Simulates a list for a single arm, generating pulls on-demand via slicing."""
    def __init__(self, arm_simulator):
        self._simulator = arm_simulator
        self._cache = []

    def __len__(self):
        # CORRECTED: Return a very large integer instead of a float
        return 999999999

    def __getitem__(self, key):
        if isinstance(key, slice):
            start = key.start if key.start is not None else 0
            stop = key.stop if key.stop is not None else start + 1
            step = key.step if key.step is not None else 1

            # Ensure enough pulls are in the cache
            pulls_needed = stop - len(self._cache)
            if pulls_needed > 0:
                for _ in range(pulls_needed):
                    self._cache.append(self._simulator.pull())

            # Return the slice from the cache
            return self._cache[key]
        else:
            # Handle single-item access if necessary
            if key >= len(self._cache):
                self._cache.append(self._simulator.pull())
            return self._cache[key]

def generate_case(normal_gap=REWARD_GAP_MIN):
    """
    Generates ArmSimulator instances and wraps them in OnDemandPullList objects
    to mimic the original data structure.
    """
    best_mean = round(random.uniform(0.6, 0.9), 3)
    other_means = [best_mean - random.uniform(normal_gap, normal_gap + 0.1) for _ in range(NUM_ARMS - 1)]
    all_means = [best_mean] + other_means
    random.shuffle(all_means)

    std_devs = [round(random.uniform(0.05, 0.15), 3) for _ in range(NUM_ARMS)]

    arm_pulls_on_demand = defaultdict(OnDemandPullList)
    for arm_id in range(NUM_ARMS):
        simulator = ArmSimulator(all_means[arm_id], std_devs[arm_id], COST_RANGE)
        arm_pulls_on_demand[arm_id] = OnDemandPullList(simulator)

    return arm_pulls_on_demand, all_means, std_devs

def run_median_elimination_experiments(num_cases=NUM_CASES):
    """
    Run Median Elimination across multiple synthetic bandit instances.
    """
    me_results = {'pulls': [], 'cost': [], 'confidence': [], 'correct': 0, 'early': 0}
    all_means, all_stds = [], []

    for _ in tqdm(range(num_cases)):
        arm_pulls, means, stds = generate_case()
        all_means.extend(means)
        all_stds.extend(stds)
        best_arm = int(np.argmax(means))

        me_arm, mp, mc, mcf, early_m = median_elimination(arm_pulls, epsilon=0.1, delta=DELTA)
        me_results['pulls'].append(mp)
        me_results['cost'].append(mc)
        me_results['confidence'].append(mcf)
        me_results['correct'] += (me_arm == best_arm)
        me_results['early'] += early_m

    return me_results, all_means, all_stds

def format_summary(results, num_cases):
    """
    Convert summary stats into a neat DataFrame and add Standard Error metrics.
    """
    def summarize(res):
        avg_pulls = np.mean(res['pulls'])
        avg_cost = np.mean(res['cost'])
        avg_confidence = np.mean(res['confidence'])

        std_dev_cost = np.std(res['cost'])
        std_dev_confidence = np.std(res['confidence'])

        sem_cost = std_dev_cost / np.sqrt(num_cases)
        sem_confidence = std_dev_confidence / np.sqrt(num_cases)

        sem_cost_percent = (sem_cost / avg_cost * 100) if avg_cost != 0 else 0
        sem_confidence_percent = (sem_confidence / avg_confidence * 100) if avg_confidence != 0 else 0

        return {
            "Average Pulls": avg_pulls,
            "Average Cost": avg_cost,
            "Standard Error in Cost (%)": sem_cost_percent,
            "Average Confidence": avg_confidence,
            "Standard Error in Confidence (%)": sem_confidence_percent,
            "Accuracy (%)": 100 * res['correct'] / num_cases,
            "Stopping Early (%)": 100 * res['early'] / num_cases
        }

    me_stats = summarize(results[0])
    df_summary = pd.DataFrame([me_stats], index=["Median Elimination"])
    df_summary = df_summary.round(2)

    dist_summary = {
        "Avg of Arm Means": np.mean(results[1]),
        "Avg of Arm StdDevs": np.mean(results[2])
    }
    dist_df = pd.DataFrame([dist_summary], index=["Distribution Stats"]).round(3)

    return df_summary, dist_df

# Run
results = run_median_elimination_experiments(NUM_CASES)

# Display formatted summary
summary_df, dist_df = format_summary(results, NUM_CASES)

print("\n=== Algorithm Performance Summary ===")
display(summary_df)

print("\n=== Reward Distribution Summary ===")
display(dist_df)

  0%|          | 0/5000 [00:00<?, ?it/s]


=== Algorithm Performance Summary ===


Unnamed: 0,Average Pulls,Average Cost,Standard Error in Cost (%),Average Confidence,Standard Error in Confidence (%),Accuracy (%),Stopping Early (%)
Median Elimination,620672.0,77583.5,0.0,1.0,0.0,100.0,0.0



=== Reward Distribution Summary ===


Unnamed: 0,Avg of Arm Means,Avg of Arm StdDevs
Distribution Stats,0.624,0.1


In [3]:
# ======================================
# Synthetic Data Generator & Experiment
# ======================================

import random
import numpy as np
import pandas as pd
from collections import defaultdict
from tqdm.auto import tqdm

# Constants
NUM_CASES = 5000
NUM_ARMS = 6
# NUM_PULLS_PER_ARM is now just a conceptual placeholder; it is no longer a hard limit
NUM_PULLS_PER_ARM = 1500
COST_RANGE = (0.05, 0.2)
REWARD_GAP_MIN = 0.1
DELTA = 0.1

# --- MODIFICATION START ---

class ArmSimulator:
    """Provides a single source of on-demand rewards and costs for one arm."""
    def __init__(self, mean, std_dev, cost_range):
        self.mean = mean
        self.std_dev = std_dev
        self.cost_range = cost_range

    def pull(self):
        reward = np.random.normal(loc=self.mean, scale=self.std_dev)
        cost = np.random.uniform(*self.cost_range)
        return reward, cost

class OnDemandPullList:
    """Simulates a list for a single arm, generating pulls on-demand via slicing."""
    def __init__(self, arm_simulator):
        self._simulator = arm_simulator
        self._cache = []

    def __len__(self):
        # CORRECTED: Return a very large integer instead of a float
        return 999999999

    def __getitem__(self, key):
        if isinstance(key, slice):
            start = key.start if key.start is not None else 0
            stop = key.stop if key.stop is not None else start + 1
            step = key.step if key.step is not None else 1

            # Ensure enough pulls are in the cache
            pulls_needed = stop - len(self._cache)
            if pulls_needed > 0:
                for _ in range(pulls_needed):
                    self._cache.append(self._simulator.pull())

            # Return the slice from the cache
            return self._cache[key]
        else:
            # Handle single-item access if necessary
            if key >= len(self._cache):
                self._cache.append(self._simulator.pull())
            return self._cache[key]

def generate_case(normal_gap=REWARD_GAP_MIN):
    """
    Generates ArmSimulator instances and wraps them in OnDemandPullList objects
    to mimic the original data structure.
    """
    best_mean = round(random.uniform(0.6, 0.9), 3)
    other_means = [best_mean - random.uniform(normal_gap, normal_gap + 0.1) for _ in range(NUM_ARMS - 1)]
    all_means = [best_mean] + other_means
    random.shuffle(all_means)

    std_devs = [round(random.uniform(0.05, 0.15), 3) for _ in range(NUM_ARMS)]

    arm_pulls_on_demand = defaultdict(OnDemandPullList)
    for arm_id in range(NUM_ARMS):
        simulator = ArmSimulator(all_means[arm_id], std_devs[arm_id], COST_RANGE)
        arm_pulls_on_demand[arm_id] = OnDemandPullList(simulator)

    return arm_pulls_on_demand, all_means, std_devs

def run_median_elimination_experiments(num_cases=NUM_CASES):
    """
    Run Median Elimination across multiple synthetic bandit instances.
    """
    me_results = {'pulls': [], 'cost': [], 'confidence': [], 'correct': 0, 'early': 0}
    all_means, all_stds = [], []

    for _ in tqdm(range(num_cases)):
        arm_pulls, means, stds = generate_case()
        all_means.extend(means)
        all_stds.extend(stds)
        best_arm = int(np.argmax(means))

        me_arm, mp, mc, mcf, early_m = median_elimination(arm_pulls, epsilon=0.1, delta=DELTA)
        me_results['pulls'].append(mp)
        me_results['cost'].append(mc)
        me_results['confidence'].append(mcf)
        me_results['correct'] += (me_arm == best_arm)
        me_results['early'] += early_m

    return me_results, all_means, all_stds

def format_summary(results, num_cases):
    """
    Convert summary stats into a neat DataFrame and add Standard Error metrics.
    """
    def summarize(res):
        avg_pulls = np.mean(res['pulls'])
        avg_cost = np.mean(res['cost'])
        avg_confidence = np.mean(res['confidence'])

        std_dev_cost = np.std(res['cost'])
        std_dev_confidence = np.std(res['confidence'])

        sem_cost = std_dev_cost / np.sqrt(num_cases)
        sem_confidence = std_dev_confidence / np.sqrt(num_cases)

        sem_cost_percent = (sem_cost / avg_cost * 100) if avg_cost != 0 else 0
        sem_confidence_percent = (sem_confidence / avg_confidence * 100) if avg_confidence != 0 else 0

        return {
            "Average Pulls": avg_pulls,
            "Average Cost": avg_cost,
            "Standard Error in Cost (%)": sem_cost_percent,
            "Average Confidence": avg_confidence,
            "Standard Error in Confidence (%)": sem_confidence_percent,
            "Accuracy (%)": 100 * res['correct'] / num_cases,
            "Stopping Early (%)": 100 * res['early'] / num_cases
        }

    me_stats = summarize(results[0])
    df_summary = pd.DataFrame([me_stats], index=["Median Elimination"])
    df_summary = df_summary.round(2)

    dist_summary = {
        "Avg of Arm Means": np.mean(results[1]),
        "Avg of Arm StdDevs": np.mean(results[2])
    }
    dist_df = pd.DataFrame([dist_summary], index=["Distribution Stats"]).round(3)

    return df_summary, dist_df

# Run
results = run_median_elimination_experiments(NUM_CASES)

# Display formatted summary
summary_df, dist_df = format_summary(results, NUM_CASES)

print("\n=== Algorithm Performance Summary ===")
display(summary_df)

print("\n=== Reward Distribution Summary ===")
display(dist_df)

  0%|          | 0/5000 [00:00<?, ?it/s]


=== Algorithm Performance Summary ===


Unnamed: 0,Average Pulls,Average Cost,Standard Error in Cost (%),Average Confidence,Standard Error in Confidence (%),Accuracy (%),Stopping Early (%)
Median Elimination,542356.0,67795.05,0.0,1.0,0.0,100.0,0.0



=== Reward Distribution Summary ===


Unnamed: 0,Avg of Arm Means,Avg of Arm StdDevs
Distribution Stats,0.624,0.1
