In [None]:
# ============================================
# Imports and Setup
# ============================================
import pandas as pd
import numpy as np
from math import log, ceil, floor, log2, exp
from collections import defaultdict

# ============================================
# Data Preprocessing Function
# ============================================
def load_and_prepare_data(filepath):
    """
    Load dataset and organize it into a dictionary:
    arm_id -> list of (reward, cost) tuples
    """
    df = pd.read_csv(filepath)
    df = df.sort_values(by="trial")

    arm_pulls = defaultdict(list)
    for _, row in df.iterrows():
        arm = int(row["arm"])
        arm_pulls[arm].append((row["reward"], row["cost"]))

    return df, arm_pulls

# ============================================
# Confidence Estimator
# ============================================
def hoeffding_confidence(chosen_arm, sample_means, num_pulls):
    """
    Calculate Hoeffding-based lower confidence bound for selected arm
    to estimate the probability that it is the best arm.
    """
    chosen_mean = sample_means.get(chosen_arm, 0)
    confidences = []
    for arm, mean in sample_means.items():
        if arm == chosen_arm:
            continue
        delta = chosen_mean - mean
        if delta <= 0:
            confidences.append(0.0)
        else:
            n = min(num_pulls.get(arm, 1), num_pulls.get(chosen_arm, 1))
            conf = 1 - exp(-0.5 * n * delta**2)
            confidences.append(conf)
    return min(confidences) if confidences else 1.0

# ============================================
# Exponential-Gap Elimination (Fixed Confidence)
# ============================================
def exponential_gap_elimination(arm_pulls, delta=0.05):
    """
    Implements the Exponential-Gap Elimination algorithm for best arm identification
    under a fixed confidence setting.
    """
    S = list(arm_pulls.keys())
    r = 1
    total_cost = 0
    total_pulls = 0
    sample_means = {}
    num_pulls = {}

    while len(S) > 1:
        ε_r = 2 ** (-r / 4)
        δ_r = delta / (50 * r ** 3)
        t_r = ceil((2 / (ε_r ** 2)) * log(2 / δ_r))

        max_available = min([len(arm_pulls[arm]) - num_pulls.get(arm, 0) for arm in S])
        if max_available == 0:
            print(f"Stopping early: no data left to sample in round {r}")
            break
        t_r = min(t_r, max_available)

        round_means = {}
        valid_arms = []

        for arm in S:
            start = num_pulls.get(arm, 0)
            available = len(arm_pulls[arm]) - start
            arm_tr = min(t_r, available)
            if arm_tr <= 0:
                continue

            pulls = arm_pulls[arm][start:start + arm_tr]
            rewards = [r for r, _ in pulls]
            costs = [c for _, c in pulls]

            if not rewards:
                continue

            avg = np.mean(rewards)
            sample_means[arm] = avg
            round_means[arm] = avg
            num_pulls[arm] = start + len(rewards)
            total_cost += sum(costs)
            total_pulls += len(rewards)
            valid_arms.append(arm)

        if len(valid_arms) == 0:
            print(f"Stopping early: no arms with data in round {r}")
            break

        best_arm = max(round_means, key=round_means.get)
        p_hat_star = round_means[best_arm]
        S = [arm for arm in valid_arms if round_means[arm] >= p_hat_star - ε_r]
        r += 1

    chosen_arm = S[0] if S else None
    confidence = hoeffding_confidence(chosen_arm, sample_means, num_pulls)
    return chosen_arm, total_pulls, total_cost, confidence

# ============================================
# Sequential Halving (Fixed Budget)
# ============================================
def sequential_halving(arm_pulls, total_budget):
    """
    Implements the Sequential Halving algorithm for best arm identification
    under a fixed budget constraint.
    """
    S = list(arm_pulls.keys())
    R = ceil(log2(len(S)))
    budget_used = 0
    total_pulls = 0
    sample_means = {}
    num_pulls = {}

    for r in range(R):
        if len(S) <= 1:
            break

        avg_costs = {arm: np.mean([c for _, c in arm_pulls[arm]]) for arm in S}
        mean_cost_per_arm = sum(avg_costs.values()) / len(S)
        max_total_pulls = floor((total_budget - budget_used) / mean_cost_per_arm)
        t_r = max(floor(max_total_pulls / len(S)), 1)

        max_available = min([len(arm_pulls[arm]) - num_pulls.get(arm, 0) for arm in S])
        if max_available == 0:
            print(f"Stopping early: no data left to sample in round {r}")
            break
        t_r = min(t_r, max_available)

        round_means = {}
        valid_arms = []

        for arm in S:
            start = num_pulls.get(arm, 0)
            available = len(arm_pulls[arm]) - start
            arm_tr = min(t_r, available)
            if arm_tr <= 0:
                continue

            pulls = arm_pulls[arm][start:start + arm_tr]
            rewards = [r for r, _ in pulls]
            costs = [c for _, c in pulls]
            cost_sum = sum(costs)

            if not rewards or (budget_used + cost_sum > total_budget):
                continue

            sample_means[arm] = np.mean(rewards)
            round_means[arm] = sample_means[arm]
            num_pulls[arm] = start + len(rewards)
            budget_used += cost_sum
            total_pulls += len(rewards)
            valid_arms.append(arm)

        if len(valid_arms) == 0:
            print(f"Stopping early: no arms with data in round {r}")
            break

        sorted_arms = sorted(round_means.items(), key=lambda x: x[1], reverse=True)
        S = [arm for arm, _ in sorted_arms[:ceil(len(valid_arms) / 2)]]

        if budget_used >= total_budget:
            break

    chosen_arm = S[0] if S else None
    confidence = hoeffding_confidence(chosen_arm, sample_means, num_pulls)
    return chosen_arm, total_pulls, budget_used, confidence

# ============================================
# Evaluation Function
# ============================================
def run_algorithms_on_dataset(filepath, delta=0.05, budget=500):
    """
    Run both EGE and SH algorithms on a dataset and print comparative results.
    """
    print(f"\n--- Running on dataset: {filepath.split('/')[-1]} ---")
    df, arm_pulls = load_and_prepare_data(filepath)

    print("Running Exponential-Gap Elimination...")
    ege_arm, ege_pulls, ege_cost, ege_conf = exponential_gap_elimination(arm_pulls, delta=delta)

    print("Running Sequential Halving...")
    sh_arm, sh_pulls, sh_cost, sh_conf = sequential_halving(arm_pulls, total_budget=budget)

    # True best arm
    true_means = df.groupby("arm")["reward"].mean().to_dict()
    best_true_arm = max(true_means, key=true_means.get)

    print("\n=== COMPARISON RESULTS ===")
    print(f"True Best Arm: {best_true_arm} (Mean = {true_means[best_true_arm]:.3f})")
    print(f"[EGE] Arm: {ege_arm} | Pulls: {ege_pulls} | Cost: {ege_cost:.2f} | Confidence: {ege_conf:.3f} | Correct? {ege_arm == best_true_arm}")
    print(f"[SH ] Arm: {sh_arm}  | Pulls: {sh_pulls} | Cost: {sh_cost:.2f} | Confidence: {sh_conf:.3f} | Correct? {sh_arm == best_true_arm}")

# ============================================


In [None]:
import random

def generate_dataset(num_arms, num_trials, reward_means, arm_costs):
    data = []
    for trial in range(num_trials):
        for arm in range(num_arms):
            reward = np.random.normal(loc=reward_means[arm], scale=0.05)
            reward = max(min(reward, 1), 0)  # Clamp reward between 0 and 1
            cost = arm_costs[arm]
            data.append({"trial": trial, "arm": arm, "reward": reward, "cost": cost})
    return pd.DataFrame(data)

def simulate_scenario(scenario_name, gap_range, delta, budget, num_cases=5000, num_arms=5, num_trials=800):
    print(f"\n--- {scenario_name} ---")
    correct_ege = correct_sh = 0
    total_conf_ege = total_conf_sh = 0.0
    total_cost_ege = total_cost_sh = 0.0

    for i in range(num_cases):
        best_arm = random.randint(0, num_arms - 1)
        max_reward = random.uniform(0.7, 0.9)
        reward_means = []
        for a in range(num_arms):
            if a == best_arm:
                reward_means.append(max_reward)
            else:
                reward_means.append(max_reward - random.uniform(*gap_range))

        arm_costs = [round(random.uniform(0.04, 0.1), 3) for _ in range(num_arms)]
        df = generate_dataset(num_arms, num_trials, reward_means, arm_costs)
        df = df.sort_values(by="trial")

        # Convert to arm_pulls
        arm_pulls = defaultdict(list)
        for _, row in df.iterrows():
            arm_pulls[int(row["arm"])].append((row["reward"], row["cost"]))

        # Run both algorithms
        ege_arm, _, ege_cost, ege_conf = exponential_gap_elimination(arm_pulls, delta=delta)
        sh_arm, _, sh_cost, sh_conf = sequential_halving(arm_pulls, total_budget=budget)

        # True best arm
        true_best = np.argmax(reward_means)

        correct_ege += int(ege_arm == true_best)
        correct_sh += int(sh_arm == true_best)
        total_conf_ege += ege_conf
        total_conf_sh += sh_conf
        total_cost_ege += ege_cost
        total_cost_sh += sh_cost

        if (i + 1) % 500 == 0:
            print(f"Completed {i+1}/{num_cases} cases")

    print("\n=== Results ===")
    print(f"EGE - Accuracy: {correct_ege / num_cases:.3f}, Avg Conf: {total_conf_ege / num_cases:.3f}, Avg Cost: {total_cost_ege / num_cases:.2f}")
    print(f"SH  - Accuracy: {correct_sh / num_cases:.3f}, Avg Conf: {total_conf_sh / num_cases:.3f}, Avg Cost: {total_cost_sh / num_cases:.2f}")

# Run all three scenarios
simulate_scenario(
    "Scenario 1: High Reward Gap",
    gap_range=(0.1, 0.2),
    delta=0.01,
    budget=500
)

simulate_scenario(
    "Scenario 2: Moderate Reward Gap",
    gap_range=(0.01, 0.08),
    delta=0.05,
    budget=1500
)

simulate_scenario(
    "Scenario 3: Low Reward Gap",
    gap_range=(0.05, 0.08),
    delta=0.1,
    budget=2000
)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Stopping early: no data left to sample in round 1
Stopping early: no data left to sample in round 8
Stopping early: no data left to sample in round 1
Stopping early: no data left to sample in round 8
Stopping early: no data left to sample in round 1
Stopping early: no data left to sample in round 8
Stopping early: no data left to sample in round 1
Stopping early: no data left to sample in round 8
Stopping early: no data left to sample in round 1
Stopping early: no data left to sample in round 8
Stopping early: no data left to sample in round 1
Stopping early: no data left to sample in round 8
Stopping early: no data left to sample in round 1
Stopping early: no data left to sample in round 8
Stopping early: no data left to sample in round 1
Stopping early: no data left to sample in round 8
Stopping early: no data left to sample in round 1
Stopping early: no data left to sample in round 8
Stopping early: no data left to sam

In [3]:
import pandas as pd
import numpy as np
from math import log, ceil, floor, log2, exp
from collections import defaultdict
import random

def hoeffding_confidence(chosen_arm, sample_means, num_pulls):
    chosen_mean = sample_means.get(chosen_arm, 0)
    confidences = []
    for arm, mean in sample_means.items():
        if arm == chosen_arm:
            continue
        delta = chosen_mean - mean
        if delta <= 0:
            confidences.append(0.0)
        else:
            n = min(num_pulls.get(arm, 1), num_pulls.get(chosen_arm, 1))
            conf = 1 - exp(-0.5 * n * delta**2)
            confidences.append(conf)
    return min(confidences) if confidences else 1.0

def exponential_gap_elimination(arm_pulls, delta=0.01):
    S = list(arm_pulls.keys())
    r = 1
    total_cost, total_pulls = 0, 0
    sample_means, num_pulls = {}, {}

    while len(S) > 1:
        ε_r = 2 ** (-r / 4)
        δ_r = delta / (50 * r ** 3)
        t_r = ceil((2 / (ε_r ** 2)) * log(2 / δ_r))

        t_r = min(t_r, min(len(arm_pulls[arm]) - num_pulls.get(arm, 0) for arm in S))

        if t_r <= 0:
            break

        round_means, valid_arms = {}, []
        for arm in S:
            start = num_pulls.get(arm, 0)
            pulls = arm_pulls[arm][start:start + t_r]
            if not pulls:
                continue
            rewards = [r for r, _ in pulls]
            costs = [c for _, c in pulls]
            sample_means[arm] = np.mean(rewards)
            round_means[arm] = sample_means[arm]
            num_pulls[arm] = start + len(rewards)
            total_cost += sum(costs)
            total_pulls += len(rewards)
            valid_arms.append(arm)

        if not valid_arms:
            break

        best_arm = max(round_means, key=round_means.get)
        S = [arm for arm in valid_arms if round_means[arm] >= round_means[best_arm] - ε_r]
        r += 1

    chosen = S[0] if S else None
    confidence = hoeffding_confidence(chosen, sample_means, num_pulls)
    return chosen, total_pulls, total_cost, confidence

def sequential_halving(arm_pulls, total_budget):
    S = list(arm_pulls.keys())
    R = ceil(log2(len(S)))
    budget_used, total_pulls = 0, 0
    sample_means, num_pulls = {}, {}

    for r in range(R):
        if len(S) <= 1:
            break
        mean_cost = np.mean([np.mean([c for _, c in arm_pulls[arm]]) for arm in S])
        t_r = max(floor((total_budget - budget_used) / mean_cost / len(S)), 1)
        t_r = min(t_r, min(len(arm_pulls[arm]) - num_pulls.get(arm, 0) for arm in S))

        if t_r <= 0:
            break

        round_means, valid_arms = {}, []
        for arm in S:
            start = num_pulls.get(arm, 0)
            pulls = arm_pulls[arm][start:start + t_r]
            if not pulls:
                continue
            rewards = [r for r, _ in pulls]
            costs = [c for _, c in pulls]
            cost_sum = sum(costs)
            if budget_used + cost_sum > total_budget:
                continue
            sample_means[arm] = np.mean(rewards)
            round_means[arm] = sample_means[arm]
            num_pulls[arm] = start + len(rewards)
            budget_used += cost_sum
            total_pulls += len(rewards)
            valid_arms.append(arm)

        if not valid_arms:
            break
        S = [arm for arm, _ in sorted(round_means.items(), key=lambda x: x[1], reverse=True)[:ceil(len(valid_arms) / 2)]]

    chosen = S[0] if S else None
    confidence = hoeffding_confidence(chosen, sample_means, num_pulls)
    return chosen, total_pulls, budget_used, confidence

def generate_dataset(num_arms=6, trials=800, reward_means=None, costs=None):
    data = []
    for trial in range(trials):
        for arm in range(num_arms):
            reward = np.clip(np.random.normal(loc=reward_means[arm], scale=0.05), 0, 1)
            cost = costs[arm]
            data.append((trial, arm, reward, cost))
    df = pd.DataFrame(data, columns=["trial", "arm", "reward", "cost"])
    return df

def run_scenario_1(num_cases=5000):
    ege_correct, ege_conf, ege_cost = 0, 0, 0
    sh_correct, sh_conf, sh_cost = 0, 0, 0
    num_arms, trials, delta, budget = 6, 800, 0.01, 500

    for _ in range(num_cases):
        base = random.uniform(0.1, 0.3)
        reward_means = [base + i * random.uniform(0.1, 0.2) for i in range(num_arms)]
        true_best = np.argmax(reward_means)
        costs = [round(random.uniform(0.04, 0.1), 3) for _ in range(num_arms)]
        df = generate_dataset(num_arms, trials, reward_means, costs)

        arm_pulls = defaultdict(list)
        for _, row in df.iterrows():
            arm_pulls[int(row["arm"])].append((row["reward"], row["cost"]))

        ege_arm, _, ecost, econf = exponential_gap_elimination(arm_pulls, delta=delta)
        sh_arm, _, scost, sconf = sequential_halving(arm_pulls, total_budget=budget)

        ege_correct += int(ege_arm == true_best)
        ege_conf += econf
        ege_cost += ecost

        sh_correct += int(sh_arm == true_best)
        sh_conf += sconf
        sh_cost += scost

    print("\n--- Scenario 1: High Reward Gap ---")
    print("EGE: Accuracy = {:.2f}%, Avg Confidence = {:.3f}, Avg Cost = {:.2f}".format(
        100 * ege_correct / num_cases, ege_conf / num_cases, ege_cost / num_cases))
    print("SH : Accuracy = {:.2f}%, Avg Confidence = {:.3f}, Avg Cost = {:.2f}".format(
        100 * sh_correct / num_cases, sh_conf / num_cases, sh_cost / num_cases))

# Run this function to evaluate Scenario 1
run_scenario_1()



--- Scenario 1: High Reward Gap ---
EGE: Accuracy = 15.98%, Avg Confidence = 0.140, Avg Cost = 221.99
SH : Accuracy = 99.66%, Avg Confidence = 0.793, Avg Cost = 336.55
