In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Task a) Initialize k=3 normal distributions with random parameters
def initialize_arms(k=3, mu_range=(-1, 1), sigma_range=(0.2, 1.2)):
    mus = np.random.uniform(mu_range[0], mu_range[1], k)
    sigmas = np.random.uniform(sigma_range[0], sigma_range[1], k)
    return mus, sigmas

# Task b) & c) Algorithm for prediction-based bandit problem with strategy variants
def run_prediction_bandit(strategy, T=300, k=3, T1=100):
    mus, sigmas = initialize_arms(k)
    for i in range(k):
        print(f"Arm {i} has µ = {round(mus[i], 4)} and σ = {round(sigmas[i], 4)}")
    break_line = "-"*100
    print(break_line)
    predictions = []
    errors = []
    actions = []

    # Store observed values to estimate mean of each arm
    observations = {i: [] for i in range(k)}

    for t in range(T):
        if strategy == 'phased':
            if t < T1:
                # Exploration: sample each arm uniformly
                action = np.random.choice(k)
            else:
                # Exploitation: choose arm with lowest observed std deviation
                means = [np.mean(observations[i]) if observations[i] else 0 for i in range(k)]
                action = np.argmin([np.std(observations[i]) if observations[i] else float('inf') for i in range(k)])

        elif strategy == 'epsilon_greedy':
            epsilon = 0.1
            if np.random.rand() < epsilon:
                action = np.random.choice(k)
            else:
                means = [np.mean(observations[i]) if observations[i] else 0 for i in range(k)]
                action = np.argmin([abs(means[i]) for i in range(k)])

        elif strategy == 'ucb':
            # Upper Confidence Bound strategy
            means = [np.mean(observations[i]) if observations[i] else 0 for i in range(k)]
            counts = [len(observations[i]) for i in range(k)]
            ucb_values = [means[i] - np.sqrt(2 * np.log(t + 1) / counts[i]) if counts[i] > 0 else -np.inf for i in range(k)]
            action = np.argmax(ucb_values)
        elif strategy == 'random':
            action = np.random.choice(k)




        else:
            raise ValueError("Unknown strategy")

        # Sample feedback x from the chosen arm
        x = np.random.normal(mus[action], sigmas[action])

        # Make prediction: estimate based on current empirical mean of selected arm
        prediction = np.mean(observations[action]) if observations[action] else 0

        # Calculate prediction error
        error = abs(x - prediction)

        # Update records
        predictions.append(prediction)
        errors.append(error)
        actions.append(action)
        observations[action].append(x)

    cumulative_error = sum(errors)

    return cumulative_error

# Task c) Experiment protocol to test strategy variants
def run_experiments(strategies, runs=30, T=300):
    results = {strategy: [] for strategy in strategies}
    for strategy in strategies:
        for _ in range(runs):
            error = run_prediction_bandit(strategy=strategy, T=T)
            results[strategy].append(error)
    return results

# Plotting function to compare strategies
def plot_strategy_results(results):
    labels = list(results.keys())
    data = [results[strategy] for strategy in labels]

    plt.figure(figsize=(10, 6))
    plt.boxplot(data, labels=labels)
    plt.ylabel("Cumulative Prediction Error")
    plt.title("Comparison of Prediction Strategies over Multiple Runs")
    plt.grid(True)
    plt.show()


In [None]:
# Main execution
strategies = ['phased', 'epsilon_greedy', 'ucb', 'random']
experiment_results = run_experiments(strategies)
plot_strategy_results(experiment_results)

# Print average prediction error per strategy
for strategy in strategies:
    avg_error = np.mean(experiment_results[strategy])
    std_error = np.std(experiment_results[strategy])
    print(f"Strategy: {strategy}, Mean Error: {avg_error:.2f}, Std Dev: {std_error:.2f}")