In [None]:
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from scipy import stats
import os

# Set random seed for reproducibility
np.random.seed(42)

# Create figures directory if it doesn't exist
os.makedirs('../../figures', exist_ok=True)

- what to do
    - give raphs for each learning rate
    - the change of regret?, total payoff, decisions?
    - horizontal axis must be the number of rounds
    - vertical axis must be one of them.

In [3]:
# Define variables and learning rates
k = 10  # actions
n = 1000  #  rounds

# Three important learning rates to compare:
# 1. Random 
epsilon_random = 0.001  

# 2. Theoretical optimal
epsilon_optimal = np.sqrt(np.log(k) / n)

# 3. FTL
epsilon_ftl = 1000  

print(f"Number of actions (k): {k}")
print(f"Number of rounds (n): {n}")
print(f"Random epsilon: {epsilon_random:.6f}")
print(f"Theoretical optimal epsilon: {epsilon_optimal:.6f}")
print(f"FTL epsilon: {epsilon_ftl}")

Number of actions (k): 10
Number of rounds (n): 1000
Random epsilon: 0.001000
Theoretical optimal epsilon: 0.047985
FTL epsilon: 1000


In [1]:
# Implement Exponential Weights Algorithm
class ExponentialWeights:
    # Three Parameters:
    
    def __init__(self, k, epsilon, n):
        self.k = k
        self.epsilon = epsilon
        self.n = n
        self.weights = np.ones(k)  # Initialize weights to 1
        self.cumulative_payoffs = np.zeros(k)  # Track cumulative payoffs
        self.regret_history = []  # Track regret over time
        self.total_payoff = 0  # Track total payoff
        self.action_history = []  # Track actions taken
        
    def select_action(self):
        """Select action based on current weights"""
        if self.epsilon == 0 or np.all(self.weights == 0):
            # Random selection when epsilon is 0
            action = np.random.randint(0, self.k)
        else:
            # Normalize weights to get probabilities
            probabilities = self.weights / np.sum(self.weights)
            action = np.random.choice(self.k, p=probabilities)
        
        self.action_history.append(action)
        return action
    
    def update_weights(self, payoffs):
        """
        Update weights based on received payoffs
        
        Parameters:
        - payoffs: array of payoffs for each action in this round
        """
        # Update cumulative payoffs
        self.cumulative_payoffs += payoffs
        
        # Update total payoff (payoff of selected action)
        selected_action = self.action_history[-1]
        self.total_payoff += payoffs[selected_action]
        
        # Update weights: w_i = w_i * exp(epsilon * payoff_i)
        if self.epsilon > 0:
            self.weights *= np.exp(self.epsilon * payoffs)
        
        # Calculate regret: max cumulative payoff - our cumulative payoff
        max_cumulative = np.max(self.cumulative_payoffs)
        our_cumulative = self.cumulative_payoffs[selected_action]
        regret = max_cumulative - our_cumulative
        self.regret_history.append(regret)
    
    def run_algorithm(self, payoff_generator):
        """
        Run the algorithm for n rounds
        
        Parameters:
        - payoff_generator: function that generates payoffs for each round
        """
        for round_num in range(self.n):
            # Select action
            action = self.select_action()
            
            # Generate payoffs for this round
            payoffs = payoff_generator(round_num)
            
            # Update weights
            self.update_weights(payoffs)
        
        return {
            'regret_history': self.regret_history,
            'total_payoff': self.total_payoff,
            'action_history': self.action_history,
            'cumulative_payoffs': self.cumulative_payoffs
        }

# A. Adversarial Fair Payoffs

In [2]:
# A. Adversarial Fair Payoffs Implementation
class AdversarialFairPayoffs:
    """
    Adversarial Fair Payoffs model:
    - In each round, draw a payoff x ~ U[0,1]
    - Assign this payoff to the action with smallest total payoff so far
    - All other actions get 0 payoff
    """
    
    def __init__(self, k):
        self.k = k
        self.cumulative_payoffs = np.zeros(k)
    
    def generate_payoffs(self, round_num):
        """
        Generate payoffs for a given round
        
        Returns:
        - payoffs: array of payoffs for each action
        """
        # Draw a random payoff from uniform distribution [0,1]
        payoff = np.random.uniform(0, 1)
        
        # Find the action with smallest cumulative payoff
        min_action = np.argmin(self.cumulative_payoffs)
        
        # Create payoff vector: only the min action gets the payoff, others get 0
        payoffs = np.zeros(self.k)
        payoffs[min_action] = payoff
        
        # Update cumulative payoffs
        self.cumulative_payoffs += payoffs
        
        return payoffs

# Test Adversarial Fair Payoffs
print("Testing Adversarial Fair Payoffs Model...")
print("=" * 50)

# Create payoff generator
adversarial_generator = AdversarialFairPayoffs(k)

# Test with different learning rates
ew_random_adv = ExponentialWeights(k, epsilon_random, n)
ew_optimal_adv = ExponentialWeights(k, epsilon_optimal, n)
ew_ftl_adv = ExponentialWeights(k, epsilon_ftl, n)

# Run algorithms
results_random_adv = ew_random_adv.run_algorithm(adversarial_generator.generate_payoffs)
results_optimal_adv = ew_optimal_adv.run_algorithm(adversarial_generator.generate_payoffs)
results_ftl_adv = ew_ftl_adv.run_algorithm(adversarial_generator.generate_payoffs)

# Create results dictionary for adversarial payoffs
results_dict_adv = {
    f'Random (ε={epsilon_random:.6f})': results_random_adv,
    f'Optimal (ε={epsilon_optimal:.6f})': results_optimal_adv,
    f'FTL (ε={epsilon_ftl})': results_ftl_adv
}

# Plot comparison for adversarial payoffs
plot_learning_rate_comparison(results_dict_adv, "Exponential Weights: Learning Rate Comparison (Adversarial Fair Payoffs)")

# Print summary statistics for adversarial payoffs
print("\nAdversarial Fair Payoffs - Summary Statistics:")
print("-" * 50)
for name, results in results_dict_adv.items():
    final_regret = results['regret_history'][-1]
    total_payoff = results['total_payoff']
    print(f"{name}:")
    print(f"  Final Regret: {final_regret:.4f}")
    print(f"  Total Payoff: {total_payoff:.4f}")
    print(f"  Best Action: Action {np.argmax(results['cumulative_payoffs'])}")
    print()


Testing Adversarial Fair Payoffs Model...


NameError: name 'k' is not defined

# B. Bernoulli Payoffs Implementation

In [3]:
class BernoulliPayoffs:
    """
    Bernoulli Payoffs model:
    - Fix a probability p_j for each action j with p_j in [0, 1/2]
    - In each round, draw payoff for each action j as v_j ~ B(p_j)
    """
    
    def __init__(self, k):
        self.k = k
        # Generate probabilities for each action (all in [0, 1/2])
        self.probabilities = np.random.uniform(0, 0.5, k)
        print(f"Bernoulli probabilities for each action: {self.probabilities}")
    
    def generate_payoffs(self, round_num):
        """
        Generate payoffs for a given round
        
        Returns:
        - payoffs: array of payoffs for each action (0 or 1)
        """
        # Generate Bernoulli payoffs for each action
        payoffs = np.random.binomial(1, self.probabilities)
        return payoffs

# Test Bernoulli Payoffs
print("\nTesting Bernoulli Payoffs Model...")
print("=" * 50)

# Create payoff generator
bernoulli_generator = BernoulliPayoffs(k)

# Test with different learning rates
ew_random_bern = ExponentialWeights(k, epsilon_random, n)
ew_optimal_bern = ExponentialWeights(k, epsilon_optimal, n)
ew_ftl_bern = ExponentialWeights(k, epsilon_ftl, n)

# Run algorithms
results_random_bern = ew_random_bern.run_algorithm(bernoulli_generator.generate_payoffs)
results_optimal_bern = ew_optimal_bern.run_algorithm(bernoulli_generator.generate_payoffs)
results_ftl_bern = ew_ftl_bern.run_algorithm(bernoulli_generator.generate_payoffs)

# Create results dictionary for Bernoulli payoffs
results_dict_bern = {
    f'Random (ε={epsilon_random:.6f})': results_random_bern,
    f'Optimal (ε={epsilon_optimal:.6f})': results_optimal_bern,
    f'FTL (ε={epsilon_ftl})': results_ftl_bern
}

# Plot comparison for Bernoulli payoffs
plot_learning_rate_comparison(results_dict_bern, "Exponential Weights: Learning Rate Comparison (Bernoulli Payoffs)")

# Print summary statistics for Bernoulli payoffs
print("\nBernoulli Payoffs - Summary Statistics:")
print("-" * 50)
for name, results in results_dict_bern.items():
    final_regret = results['regret_history'][-1]
    total_payoff = results['total_payoff']
    print(f"{name}:")
    print(f"  Final Regret: {final_regret:.4f}")
    print(f"  Total Payoff: {total_payoff:.4f}")
    print(f"  Best Action: Action {np.argmax(results['cumulative_payoffs'])}")
    print()

# Show the true probabilities for reference
print("True Bernoulli probabilities for each action:")
for i, prob in enumerate(bernoulli_generator.probabilities):
    print(f"  Action {i}: {prob:.4f}")
print(f"Best action (highest probability): Action {np.argmax(bernoulli_generator.probabilities)}")


Testing Bernoulli Payoffs Model...


NameError: name 'k' is not defined

# Data in the wild

# Adversarial generative model