# Multi Arm Bandit Algorithms

## Implemnetation of EGE and SH Algorithms

In [1]:
# ============================================
# Imports and Setup
# ============================================
import pandas as pd
import numpy as np
from math import log, ceil, floor, log2, exp
from collections import defaultdict

# ============================================
# Data Preprocessing Function
# ============================================
def load_and_prepare_data(filepath):
    """
    Load dataset and organize it into a dictionary:
    arm_id -> list of (reward, cost) tuples
    """
    df = pd.read_csv(filepath)
    df = df.sort_values(by="trial")

    arm_pulls = defaultdict(list)
    for _, row in df.iterrows():
        arm = int(row["arm"])
        arm_pulls[arm].append((row["reward"], row["cost"]))

    return df, arm_pulls

# ============================================
# Confidence Estimator
# ============================================
def hoeffding_confidence(chosen_arm, sample_means, num_pulls):
    """
    Calculate Hoeffding-based lower confidence bound for selected arm
    to estimate the probability that it is the best arm.
    """
    chosen_mean = sample_means.get(chosen_arm, 0)
    confidences = []
    for arm, mean in sample_means.items():
        if arm == chosen_arm:
            continue
        delta = chosen_mean - mean
        if delta <= 0:
            confidences.append(0.0)
        else:
            n = min(num_pulls.get(arm, 1), num_pulls.get(chosen_arm, 1))
            conf = 1 - exp(-0.5 * n * delta**2)
            confidences.append(conf)
    return min(confidences) if confidences else 1.0

# ============================================
# Exponential-Gap Elimination (Fixed Confidence)
# ============================================
def exponential_gap_elimination(arm_pulls, delta=0.05):
    """
    Implements the Exponential-Gap Elimination algorithm for best arm identification
    under a fixed confidence setting.
    """
    S = list(arm_pulls.keys())
    r = 1
    total_cost = 0
    total_pulls = 0
    sample_means = {}
    num_pulls = {}

    while len(S) > 1:
        ε_r = 2 ** (-r / 4)
        δ_r = delta / (50 * r ** 3)
        t_r = ceil((2 / (ε_r ** 2)) * log(2 / δ_r))

        max_available = min([len(arm_pulls[arm]) - num_pulls.get(arm, 0) for arm in S])
        if max_available == 0:
            print(f"Stopping early: no data left to sample in round {r}")
            break
        t_r = min(t_r, max_available)

        round_means = {}
        valid_arms = []

        for arm in S:
            start = num_pulls.get(arm, 0)
            available = len(arm_pulls[arm]) - start
            arm_tr = min(t_r, available)
            if arm_tr <= 0:
                continue

            pulls = arm_pulls[arm][start:start + arm_tr]
            rewards = [r for r, _ in pulls]
            costs = [c for _, c in pulls]

            if not rewards:
                continue

            avg = np.mean(rewards)
            sample_means[arm] = avg
            round_means[arm] = avg
            num_pulls[arm] = start + len(rewards)
            total_cost += sum(costs)
            total_pulls += len(rewards)
            valid_arms.append(arm)

        if len(valid_arms) == 0:
            print(f"Stopping early: no arms with data in round {r}")
            break

        best_arm = max(round_means, key=round_means.get)
        p_hat_star = round_means[best_arm]
        S = [arm for arm in valid_arms if round_means[arm] >= p_hat_star - ε_r]
        r += 1

    chosen_arm = S[0] if S else None
    confidence = hoeffding_confidence(chosen_arm, sample_means, num_pulls)
    return chosen_arm, total_pulls, total_cost, confidence

# ============================================
# Sequential Halving (Fixed Budget)
# ============================================
def sequential_halving(arm_pulls, total_budget):
    """
    Implements the Sequential Halving algorithm for best arm identification
    under a fixed budget constraint.
    """
    S = list(arm_pulls.keys())
    R = ceil(log2(len(S)))
    budget_used = 0
    total_pulls = 0
    sample_means = {}
    num_pulls = {}

    for r in range(R):
        if len(S) <= 1:
            break

        avg_costs = {arm: np.mean([c for _, c in arm_pulls[arm]]) for arm in S}
        mean_cost_per_arm = sum(avg_costs.values()) / len(S)
        max_total_pulls = floor((total_budget - budget_used) / mean_cost_per_arm)
        t_r = max(floor(max_total_pulls / len(S)), 1)

        max_available = min([len(arm_pulls[arm]) - num_pulls.get(arm, 0) for arm in S])
        if max_available == 0:
            print(f"Stopping early: no data left to sample in round {r}")
            break
        t_r = min(t_r, max_available)

        round_means = {}
        valid_arms = []

        for arm in S:
            start = num_pulls.get(arm, 0)
            available = len(arm_pulls[arm]) - start
            arm_tr = min(t_r, available)
            if arm_tr <= 0:
                continue

            pulls = arm_pulls[arm][start:start + arm_tr]
            rewards = [r for r, _ in pulls]
            costs = [c for _, c in pulls]
            cost_sum = sum(costs)

            if not rewards or (budget_used + cost_sum > total_budget):
                continue

            sample_means[arm] = np.mean(rewards)
            round_means[arm] = sample_means[arm]
            num_pulls[arm] = start + len(rewards)
            budget_used += cost_sum
            total_pulls += len(rewards)
            valid_arms.append(arm)

        if len(valid_arms) == 0:
            print(f"Stopping early: no arms with data in round {r}")
            break

        sorted_arms = sorted(round_means.items(), key=lambda x: x[1], reverse=True)
        S = [arm for arm, _ in sorted_arms[:ceil(len(valid_arms) / 2)]]

        if budget_used >= total_budget:
            break

    chosen_arm = S[0] if S else None
    confidence = hoeffding_confidence(chosen_arm, sample_means, num_pulls)
    return chosen_arm, total_pulls, budget_used, confidence

# ============================================
# Evaluation Function
# ============================================
def run_algorithms_on_dataset(filepath, delta=0.05, budget=500):
    """
    Run both EGE and SH algorithms on a dataset and print comparative results.
    """
    print(f"\n--- Running on dataset: {filepath.split('/')[-1]} ---")
    df, arm_pulls = load_and_prepare_data(filepath)

    print("Running Exponential-Gap Elimination...")
    ege_arm, ege_pulls, ege_cost, ege_conf = exponential_gap_elimination(arm_pulls, delta=delta)

    print("Running Sequential Halving...")
    sh_arm, sh_pulls, sh_cost, sh_conf = sequential_halving(arm_pulls, total_budget=budget)

    # True best arm
    true_means = df.groupby("arm")["reward"].mean().to_dict()
    best_true_arm = max(true_means, key=true_means.get)

    results = {
        'ege': {
            'pulls': ege_pulls,
            'cost': ege_cost,
            'confidence': ege_conf,
            'correct': ege_arm == best_true_arm
        },
        'sh': {
            'pulls': sh_pulls,
            'cost': sh_cost,
            'confidence': sh_conf,
            'correct': sh_arm == best_true_arm
        }
    }

    return results
# ============================================


## Testing

## 1. Normal Distribution

### Case1:
Arm0: 0.4000, Arm1: 0.4998, Arm2: 0.6000, Arm3: 0.7000, Arm4: 0.7988

In [3]:

dataset = "/content/drive/MyDrive/Business Analytics/Dataset/D4/Case1_Normal.csv"
results = run_algorithms_on_dataset(dataset, delta=0.05, budget=500)
print(results)



--- Running on dataset: Case1_Normal.csv ---
Running Exponential-Gap Elimination...
Running Sequential Halving...
Stopping early: no arms with data in round 2
{'ege': {'pulls': 1677, 'cost': np.float64(130.0329999999999), 'confidence': 0.9999995827882721, 'correct': True}, 'sh': {'pulls': 7203, 'cost': np.float64(499.9299999999994), 'confidence': 1.0, 'correct': True}}


### Case2:
Arm0: 0.1082, Arm1: 0.2999, Arm2: 0.7990, Arm3: 0.4001, Arm4: 0.2004

In [None]:
dataset = "/content/drive/MyDrive/Business Analytics/Dataset/D4/Case2_Normal.csv"
run_algorithms_on_dataset(dataset, delta=0.01, budget=100)


--- Running on dataset: Case2_Normal.csv ---
Running Exponential-Gap Elimination...
Running Sequential Halving...
Stopping early: no arms with data in round 1

=== COMPARISON RESULTS ===
True Best Arm: 2 (Mean = 0.799)
[EGE] Arm: 2 | Pulls: 1447 | Cost: 112.49 | Confidence: 1.000 | Correct? True
[SH ] Arm: 2  | Pulls: 1440 | Cost: 99.94 | Confidence: 1.000 | Correct? True


### Case 3:
Arm0: 0.7099, Arm1: 0.7497, Arm2: 0.6700, Arm3: 0.4401, Arm4: 0.5297

In [None]:
dataset = "/content/drive/MyDrive/Business Analytics/Dataset/D4/Case3_Normal.csv"
run_algorithms_on_dataset(dataset, delta=0.2, budget=1000)


--- Running on dataset: Case3_Normal.csv ---
Running Exponential-Gap Elimination...
Stopping early: no data left to sample in round 13
Running Sequential Halving...

=== COMPARISON RESULTS ===
True Best Arm: 1 (Mean = 0.750)
[EGE] Arm: 0 | Pulls: 17525 | Cost: 1297.49 | Confidence: 0.000 | Correct? False
[SH ] Arm: 0  | Pulls: 14409 | Cost: 1000.00 | Confidence: 0.600 | Correct? False


### Case 4:
Arm0: 0.7100, Arm1: 0.7507, Arm2: 0.6703, Arm3: 0.4406, Arm4: 0.5290

In [None]:
dataset = "/content/drive/MyDrive/Business Analytics/Dataset/D4/Case4_Normal.csv"
run_algorithms_on_dataset(dataset, delta=0.01, budget=100)


--- Running on dataset: Case4_Normal.csv ---
Running Exponential-Gap Elimination...
Stopping early: no data left to sample in round 15
Running Sequential Halving...

=== COMPARISON RESULTS ===
True Best Arm: 1 (Mean = 0.751)
[EGE] Arm: 1 | Pulls: 33181 | Cost: 2500.96 | Confidence: 0.963 | Correct? True
[SH ] Arm: 0  | Pulls: 1441 | Cost: 99.99 | Confidence: 0.994 | Correct? False


## Exponential Distribution

### Case 1:
Arm0: 0.2925, Arm1: 0.4262, Arm2: 0.5334, Arm3: 0.6441, Arm4: 0.6962

In [None]:
dataset = "/content/drive/MyDrive/Business Analytics/Dataset/D4/Exponential/Case1_Exponential"
run_algorithms_on_dataset(dataset, delta=0.05, budget=100)


--- Running on dataset: Case1_Exponential ---
Running Exponential-Gap Elimination...
Stopping early: no data left to sample in round 13
Running Sequential Halving...

=== COMPARISON RESULTS ===
True Best Arm: 4 (Mean = 0.696)
[EGE] Arm: 4 | Pulls: 15901 | Cost: 1055.05 | Confidence: 0.991 | Correct? True
[SH ] Arm: 4  | Pulls: 1441 | Cost: 99.98 | Confidence: 1.000 | Correct? True


###Case 2:
Arm0: 0.1005, Arm1: 0.1855, Arm2: 0.3293, Arm3: 0.5642, Arm4: 0.4517

In [None]:
dataset = "/content/drive/MyDrive/Business Analytics/Dataset/D4/Exponential/Case2_Exponential"
run_algorithms_on_dataset(dataset, delta=0.05, budget=500)


--- Running on dataset: Case2_Exponential ---
Running Exponential-Gap Elimination...
Running Sequential Halving...

=== COMPARISON RESULTS ===
True Best Arm: 3 (Mean = 0.564)
[EGE] Arm: 3 | Pulls: 20732 | Cost: 1242.11 | Confidence: 1.000 | Correct? True
[SH ] Arm: 3  | Pulls: 7205 | Cost: 499.99 | Confidence: 1.000 | Correct? True


###Case 3:
Arm0: 0.1008, Arm1: 0.5976, Arm2: 0.5334, Arm3: 0.4412, Arm4: 0.2869

In [None]:
dataset = "/content/drive/MyDrive/Business Analytics/Dataset/D4/Exponential/Case3_Exponential"
run_algorithms_on_dataset(dataset, delta=0.05, budget=500)


--- Running on dataset: Case3_Exponential ---
Running Exponential-Gap Elimination...
Stopping early: no data left to sample in round 13
Running Sequential Halving...
Stopping early: no arms with data in round 2

=== COMPARISON RESULTS ===
True Best Arm: 1 (Mean = 0.598)
[EGE] Arm: 1 | Pulls: 15317 | Cost: 1242.30 | Confidence: 1.000 | Correct? True
[SH ] Arm: 2  | Pulls: 7203 | Cost: 499.93 | Confidence: 0.988 | Correct? False


In [6]:
import pandas as pd
import numpy as np
from tqdm import tqdm

def generate_bandit_data(difference_range, base_reward=0.5, reward_noise=0.1, seed=None):
    """
    Generate 6-arm bandit data with ~800 trials each and controllable reward differences.
    """

    if seed is not None:
        np.random.seed(seed)

    # Fixed parameters
    n_arms = 6
    trials_per_arm = 800
    n_trials = trials_per_arm

    min_diff, max_diff = difference_range

    # Choose a random total difference within the specified range
    total_difference = np.random.uniform(min_diff, max_diff)

    # Generate arm means spread across the total difference
    spacing = total_difference / (n_arms - 1)
    arm_means = [base_reward - total_difference/2 + i * spacing for i in range(n_arms)]

    # Add some randomness to make it more realistic
    random_adjustments = np.random.normal(0, total_difference * 0.1, n_arms)
    random_adjustments = random_adjustments - np.mean(random_adjustments)
    arm_means = [mean + adj for mean, adj in zip(arm_means, random_adjustments)]

    # Ensure they stay within reasonable bounds [0, 1]
    arm_means = [max(0.01, min(0.99, mean)) for mean in arm_means]

    # Generate data
    data = []

    for trial in range(n_trials):
        for arm in range(n_arms):
            # Generate reward with noise around the arm's mean
            reward = np.random.normal(arm_means[arm], reward_noise)
            reward = max(0, reward)

            # Generate cost between 0.040 and 0.1 for each arm
            cost = np.random.uniform(0.040, 0.1)

            data.append({
                'trial': trial,
                'arm': arm,
                'reward': reward,
                'cost': cost
            })

    df = pd.DataFrame(data)
    return df

def run_batch_experiments(n_datasets=5000, delta=0.1, budget=1000):
    """
    Generate 5000 datasets and run algorithms on each without saving files.

    Parameters:
    -----------
    n_datasets : int, default=5000
        Number of datasets to generate and test
    delta : float, default=0.1
        Delta parameter for algorithms
    budget : int, default=1000
        Budget parameter for algorithms
    """

    # Store results for analysis
    results = []

    print(f"Running batch experiments on {n_datasets} datasets...")
    print(f"Parameters: delta={delta}, budget={budget}")
    print(f"Reward difference range: 0.1 to 0.2 units")

    for i in tqdm(range(n_datasets), desc="Processing datasets"):
        try:
            # Generate dataset with difference range 0.1-0.2
            df = generate_bandit_data(difference_range=(0.1, 0.2), seed=i)

            # Run algorithms directly on the dataframe
            result = run_algorithms_on_dataset(df, delta=delta, budget=budget)

            # Store result with dataset info
            result_info = {
                'dataset_id': i,
                'result': result
            }
            results.append(result_info)

        except Exception as e:
            print(f"Error processing dataset {i}: {str(e)}")
            continue

    print(f"\nCompleted processing {len(results)} out of {n_datasets} datasets")
    return results

def analyze_batch_results(results):
    """
    Analyze the results from batch experiments.

    Parameters:
    -----------
    results : list
        List of result dictionaries from run_batch_experiments
    """

    print("=== Batch Experiment Analysis ===")
    print(f"Total datasets processed: {len(results)}")

    # Initialize data storage for both algorithms
    ege_data = {'pulls': [], 'cost': [], 'confidence': [], 'correct': []}
    sh_data = {'pulls': [], 'cost': [], 'confidence': [], 'correct': []}

    # Extract data from results
    for result_info in results:
        result = result_info['result']

        # Parse EGE results
        if 'ege' in result:
            ege = result['ege']
            ege_data['pulls'].append(ege.get('pulls', 0))
            ege_data['cost'].append(ege.get('cost', 0))
            ege_data['confidence'].append(ege.get('confidence', 0))
            ege_data['correct'].append(ege.get('correct', False))

        # Parse SH results
        if 'sh' in result:
            sh = result['sh']
            sh_data['pulls'].append(sh.get('pulls', 0))
            sh_data['cost'].append(sh.get('cost', 0))
            sh_data['confidence'].append(sh.get('confidence', 0))
            sh_data['correct'].append(sh.get('correct', False))

    # Calculate statistics for EGE
    print("\n=== EXPONENTIAL-GAP ELIMINATION (EGE) ===")
    if ege_data['pulls']:
        print(f"Average Pulls: {np.mean(ege_data['pulls']):.2f}")
        print(f"Average Cost: {np.mean(ege_data['cost']):.2f}")
        print(f"Average Confidence: {np.mean(ege_data['confidence']):.3f}")
        print(f"Correct Percentage: {(sum(ege_data['correct']) / len(ege_data['correct']) * 100):.2f}%")
        print(f"Total Runs: {len(ege_data['pulls'])}")
    else:
        print("No EGE data found")

    # Calculate statistics for SH
    print("\n=== SEQUENTIAL HALVING (SH) ===")
    if sh_data['pulls']:
        print(f"Average Pulls: {np.mean(sh_data['pulls']):.2f}")
        print(f"Average Cost: {np.mean(sh_data['cost']):.2f}")
        print(f"Average Confidence: {np.mean(sh_data['confidence']):.3f}")
        print(f"Correct Percentage: {(sum(sh_data['correct']) / len(sh_data['correct']) * 100):.2f}%")
        print(f"Total Runs: {len(sh_data['pulls'])}")
    else:
        print("No SH data found")

    # Summary comparison
    print("\n=== ALGORITHM COMPARISON ===")
    if ege_data['pulls'] and sh_data['pulls']:
        print(f"EGE vs SH - Average Pulls: {np.mean(ege_data['pulls']):.2f} vs {np.mean(sh_data['pulls']):.2f}")
        print(f"EGE vs SH - Average Cost: {np.mean(ege_data['cost']):.2f} vs {np.mean(sh_data['cost']):.2f}")
        print(f"EGE vs SH - Average Confidence: {np.mean(ege_data['confidence']):.3f} vs {np.mean(sh_data['confidence']):.3f}")
        print(f"EGE vs SH - Correct %: {(sum(ege_data['correct']) / len(ege_data['correct']) * 100):.2f}% vs {(sum(sh_data['correct']) / len(sh_data['correct']) * 100):.2f}%")

    # Return detailed statistics
    analysis_results = {
        'ege': {
            'avg_pulls': np.mean(ege_data['pulls']) if ege_data['pulls'] else 0,
            'avg_cost': np.mean(ege_data['cost']) if ege_data['cost'] else 0,
            'avg_confidence': np.mean(ege_data['confidence']) if ege_data['confidence'] else 0,
            'correct_percentage': (sum(ege_data['correct']) / len(ege_data['correct']) * 100) if ege_data['correct'] else 0,
            'total_runs': len(ege_data['pulls'])
        },
        'sh': {
            'avg_pulls': np.mean(sh_data['pulls']) if sh_data['pulls'] else 0,
            'avg_cost': np.mean(sh_data['cost']) if sh_data['cost'] else 0,
            'avg_confidence': np.mean(sh_data['confidence']) if sh_data['confidence'] else 0,
            'correct_percentage': (sum(sh_data['correct']) / len(sh_data['correct']) * 100) if sh_data['correct'] else 0,
            'total_runs': len(sh_data['pulls'])
        }
    }

    return analysis_results

# Main execution function
def main():
    """
    Main function to run the complete batch experiment.
    """

    # Run batch experiments
    results = run_batch_experiments(
        n_datasets=5000,
        delta=0.1,
        budget=1000
    )

    # Analyze results
    analysis = analyze_batch_results(results)

    return results, analysis

# Execute the batch experiments
if __name__ == "__main__":
    results, analysis = main()

Running batch experiments on 5000 datasets...
Parameters: delta=0.1, budget=1000
Reward difference range: 0.1 to 0.2 units


Processing datasets:   0%|          | 2/5000 [00:00<04:24, 18.91it/s]

Error processing dataset 0: 'DataFrame' object has no attribute 'split'
Error processing dataset 1: 'DataFrame' object has no attribute 'split'
Error processing dataset 2: 'DataFrame' object has no attribute 'split'


Processing datasets:   0%|          | 4/5000 [00:00<04:16, 19.51it/s]

Error processing dataset 3: 'DataFrame' object has no attribute 'split'


Processing datasets:   0%|          | 6/5000 [00:00<04:46, 17.40it/s]

Error processing dataset 4: 'DataFrame' object has no attribute 'split'
Error processing dataset 5: 'DataFrame' object has no attribute 'split'
Error processing dataset 6: 'DataFrame' object has no attribute 'split'


Processing datasets:   0%|          | 8/5000 [00:00<04:42, 17.70it/s]

Error processing dataset 7: 'DataFrame' object has no attribute 'split'


Processing datasets:   0%|          | 10/5000 [00:00<04:30, 18.44it/s]

Error processing dataset 8: 'DataFrame' object has no attribute 'split'
Error processing dataset 9: 'DataFrame' object has no attribute 'split'
Error processing dataset 10: 'DataFrame' object has no attribute 'split'


Processing datasets:   0%|          | 12/5000 [00:00<04:36, 18.02it/s]

Error processing dataset 11: 'DataFrame' object has no attribute 'split'


Processing datasets:   0%|          | 13/5000 [00:00<04:48, 17.27it/s]

Error processing dataset 12: 'DataFrame' object has no attribute 'split'





KeyboardInterrupt: 