In [1]:
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from scipy import stats
import os

# Set random seed for reproducibility
np.random.seed(42)

# Create figures directory if it doesn't exist
os.makedirs('../../figures', exist_ok=True)

- what to do
    - give raphs for each learning rate
    - the change of regret?, total payoff, decisions?
    - horizontal axis must be the number of rounds
    - vertical axis must be one of them.

In [2]:
# Define variables and learning rates
k = 10  # actions
n = 1000  #  rounds

# Three important learning rates to compare:
# 1. Random 
epsilon_random = 0.001  

# 2. Theoretical optimal
epsilon_optimal = np.sqrt(np.log(k) / n)

# 3. FTL
epsilon_ftl = 1000  

print(f"Number of actions (k): {k}")
print(f"Number of rounds (n): {n}")
print(f"Random epsilon: {epsilon_random:.6f}")
print(f"Theoretical optimal epsilon: {epsilon_optimal:.6f}")
print(f"FTL epsilon: {epsilon_ftl}")

Number of actions (k): 10
Number of rounds (n): 1000
Random epsilon: 0.001000
Theoretical optimal epsilon: 0.047985
FTL epsilon: 1000


- plot
    - regret for each round
    - total payoff for each round
    - action chosen for each round

In [1]:
# Implement Exponential Weights Algorithm
class ExponentialWeights:
    # Three Parameters:
    
    def __init__(self, k, epsilon, n):
        self.k = k
        self.epsilon = epsilon
        self.n = n
        self.weights = np.ones(k)  # Initialize weights to 1
        self.cumulative_payoffs = np.zeros(k)  # Track cumulative payoffs
        self.regret_history = []  # Track regret over time
        self.total_payoff = 0  # Track total payoff
        self.action_history = []  # Track actions taken
        
    def select_action(self):
        """Select action based on current weights"""
        if self.epsilon == 0 or np.all(self.weights == 0):
            # Random selection when epsilon is 0
            action = np.random.randint(0, self.k)
        else:
            # Normalize weights to get probabilities
            probabilities = self.weights / np.sum(self.weights)
            action = np.random.choice(self.k, p=probabilities)
        
        self.action_history.append(action)
        return action
    
    def update_weights(self, payoffs):
        """
        Update weights based on received payoffs
        
        Parameters:
        - payoffs: array of payoffs for each action in this round
        """
        # Update cumulative payoffs
        self.cumulative_payoffs += payoffs
        
        # Update total payoff (payoff of selected action)
        selected_action = self.action_history[-1]
        self.total_payoff += payoffs[selected_action]
        
        # Update weights: w_i = w_i * exp(epsilon * payoff_i)
        if self.epsilon > 0:
            self.weights *= np.exp(self.epsilon * payoffs)
        
        # Calculate regret: max cumulative payoff - our cumulative payoff
        max_cumulative = np.max(self.cumulative_payoffs)
        our_cumulative = self.cumulative_payoffs[selected_action]
        regret = max_cumulative - our_cumulative
        self.regret_history.append(regret)
    
    def run_algorithm(self, payoff_generator):
        """
        Run the algorithm for n rounds
        
        Parameters:
        - payoff_generator: function that generates payoffs for each round
        """
        for round_num in range(self.n):
            # Select action
            action = self.select_action()
            
            # Generate payoffs for this round
            payoffs = payoff_generator(round_num)
            
            # Update weights
            self.update_weights(payoffs)
        
        return {
            'regret_history': self.regret_history,
            'total_payoff': self.total_payoff,
            'action_history': self.action_history,
            'cumulative_payoffs': self.cumulative_payoffs
        }

# A. Adversarial Fair Payoffs

In [2]:
# A. Adversarial Fair Payoffs Implementation
class AdversarialFairPayoffs:
    """
    Adversarial Fair Payoffs model:
    - In each round, draw a payoff x ~ U[0,1]
    - Assign this payoff to the action with smallest total payoff so far
    - All other actions get 0 payoff
    """
    
    def __init__(self, k):
        self.k = k
        self.cumulative_payoffs = np.zeros(k)
    
    def generate_payoffs(self, round_num):
        """
        Generate payoffs for a given round
        
        Returns:
        - payoffs: array of payoffs for each action
        """
        # Draw a random payoff from uniform distribution [0,1]
        payoff = np.random.uniform(0, 1)
        
        # Find the action with smallest cumulative payoff
        min_action = np.argmin(self.cumulative_payoffs)
        
        # Create payoff vector: only the min action gets the payoff, others get 0
        payoffs = np.zeros(self.k)
        payoffs[min_action] = payoff
        
        # Update cumulative payoffs
        self.cumulative_payoffs += payoffs
        
        return payoffs

# Test Adversarial Fair Payoffs
print("Testing Adversarial Fair Payoffs Model...")
print("=" * 50)

# Create payoff generator
adversarial_generator = AdversarialFairPayoffs(k)

# Test with different learning rates
ew_random_adv = ExponentialWeights(k, epsilon_random, n)
ew_optimal_adv = ExponentialWeights(k, epsilon_optimal, n)
ew_ftl_adv = ExponentialWeights(k, epsilon_ftl, n)

# Run algorithms
results_random_adv = ew_random_adv.run_algorithm(adversarial_generator.generate_payoffs)
results_optimal_adv = ew_optimal_adv.run_algorithm(adversarial_generator.generate_payoffs)
results_ftl_adv = ew_ftl_adv.run_algorithm(adversarial_generator.generate_payoffs)

# Create results dictionary for adversarial payoffs
results_dict_adv = {
    f'Random (ε={epsilon_random:.6f})': results_random_adv,
    f'Optimal (ε={epsilon_optimal:.6f})': results_optimal_adv,
    f'FTL (ε={epsilon_ftl})': results_ftl_adv
}

# Plot comparison for adversarial payoffs
plot_learning_rate_comparison(results_dict_adv, "Exponential Weights: Learning Rate Comparison (Adversarial Fair Payoffs)")

# Print summary statistics for adversarial payoffs
print("\nAdversarial Fair Payoffs - Summary Statistics:")
print("-" * 50)
for name, results in results_dict_adv.items():
    final_regret = results['regret_history'][-1]
    total_payoff = results['total_payoff']
    print(f"{name}:")
    print(f"  Final Regret: {final_regret:.4f}")
    print(f"  Total Payoff: {total_payoff:.4f}")
    print(f"  Best Action: Action {np.argmax(results['cumulative_payoffs'])}")
    print()


Testing Adversarial Fair Payoffs Model...


NameError: name 'k' is not defined

# B. Bernoulli Payoffs Implementation

In [3]:
class BernoulliPayoffs:
    """
    Bernoulli Payoffs model:
    - Fix a probability p_j for each action j with p_j in [0, 1/2]
    - In each round, draw payoff for each action j as v_j ~ B(p_j)
    """
    
    def __init__(self, k):
        self.k = k
        # Generate probabilities for each action (all in [0, 1/2])
        self.probabilities = np.random.uniform(0, 0.5, k)
        print(f"Bernoulli probabilities for each action: {self.probabilities}")
    
    def generate_payoffs(self, round_num):
        """
        Generate payoffs for a given round
        
        Returns:
        - payoffs: array of payoffs for each action (0 or 1)
        """
        # Generate Bernoulli payoffs for each action
        payoffs = np.random.binomial(1, self.probabilities)
        return payoffs

# Test Bernoulli Payoffs
print("\nTesting Bernoulli Payoffs Model...")
print("=" * 50)

# Create payoff generator
bernoulli_generator = BernoulliPayoffs(k)

# Test with different learning rates
ew_random_bern = ExponentialWeights(k, epsilon_random, n)
ew_optimal_bern = ExponentialWeights(k, epsilon_optimal, n)
ew_ftl_bern = ExponentialWeights(k, epsilon_ftl, n)

# Run algorithms
results_random_bern = ew_random_bern.run_algorithm(bernoulli_generator.generate_payoffs)
results_optimal_bern = ew_optimal_bern.run_algorithm(bernoulli_generator.generate_payoffs)
results_ftl_bern = ew_ftl_bern.run_algorithm(bernoulli_generator.generate_payoffs)

# Create results dictionary for Bernoulli payoffs
results_dict_bern = {
    f'Random (ε={epsilon_random:.6f})': results_random_bern,
    f'Optimal (ε={epsilon_optimal:.6f})': results_optimal_bern,
    f'FTL (ε={epsilon_ftl})': results_ftl_bern
}

# Plot comparison for Bernoulli payoffs
plot_learning_rate_comparison(results_dict_bern, "Exponential Weights: Learning Rate Comparison (Bernoulli Payoffs)")

# Print summary statistics for Bernoulli payoffs
print("\nBernoulli Payoffs - Summary Statistics:")
print("-" * 50)
for name, results in results_dict_bern.items():
    final_regret = results['regret_history'][-1]
    total_payoff = results['total_payoff']
    print(f"{name}:")
    print(f"  Final Regret: {final_regret:.4f}")
    print(f"  Total Payoff: {total_payoff:.4f}")
    print(f"  Best Action: Action {np.argmax(results['cumulative_payoffs'])}")
    print()

# Show the true probabilities for reference
print("True Bernoulli probabilities for each action:")
for i, prob in enumerate(bernoulli_generator.probabilities):
    print(f"  Action {i}: {prob:.4f}")
print(f"Best action (highest probability): Action {np.argmax(bernoulli_generator.probabilities)}")


Testing Bernoulli Payoffs Model...


NameError: name 'k' is not defined

# Data in the wild

In [None]:
# まず, ノーマルとRUSHのどちらかのコンディションが状態移遷する.

# 以下が, それぞれについてデータによって決められている. i.i.d.である
# normal_hit_prob_per_spinは, ノーマルコンディションでの1回転当たりの勝ち確率.
# rush_hit_prob_per_spinは, RUSHコンディションでの1回転当たりの勝ち確率.
# rush_every_prov_given_hitは, ノーマルコンディションで当たりが出た後に, RUSHコンディションに移行する確率.
# rush_continue_probは, RUSHコンディションで当たりが出た後に, 次の当たりが出るまで継続する確率.
# rush_st_spinsは, RUSHコンディションで当たりが出た後に, 次の当たりが出るまでのスピン数.
# payouts_normal_balls は, ノーマルコンディションでの当たりの賞金.
# payouts_rush_balls は, RUSHコンディションでの当たりの賞金.

#　グラフを作成する.　一流誌に載せるようなグラフ(状態遷移とpayoffのグラフ)

# ここからExponential Weights Algorithmを適用する.

# Adversarial generative model

In [6]:
# Load Pachinko data
import pandas as pd

# Load the Pachinko data
pachinko_data = pd.read_csv('/Users/harashimakoshi/CS332/332Project2/data/data_pachinko.csv')

print("Pachinko Data Overview:")
print("=" * 50)
print(f"Number of machines: {len(pachinko_data)}")
print("\nData columns:")
for col in pachinko_data.columns:
    print(f"  - {col}")

print("\nFirst few rows:")
print(pachinko_data.head())

# Display machine names
print("\nMachine Names:")
for i, machine in enumerate(pachinko_data['machine']):
    print(f"  {i}: {machine}")


Pachinko Data Overview:
Number of machines: 5

Data columns:
  - machine
  - maker
  - normal_hit_prob_per_spin
  - rush_hit_prob_per_spin
  - rush_entry_prob_given_hit
  - rush_continuation_prob
  - rush_st_spins
  - payouts_normal_balls
  - payouts_rush_balls
  - source_urls

First few rows:
                                 machine        maker  \
0                 e Tokyo Ghoul W (スマパチ)        Bisty   
1  PF Gundam Unicorn Reappearance 129ver       SANKYO   
2                 P Madoka Magica 3 (LT)      KYORAKU   
3                e 新世紀エヴァンゲリオン 〜はじまりの記憶〜        Bisty   
4              e Re:ゼロから始める異世界生活 season2  DAITO GIKEN   

   normal_hit_prob_per_spin  rush_hit_prob_per_spin  \
0                  0.002501                0.010493   
1                  0.007704                0.024631   
2                  0.005003                0.016077   
3                  0.003333                0.010000   
4                  0.003125                0.009900   

   rush_entry_prob_given_hit  r

In [11]:
import pandas as pd
import numpy as np

In [12]:
# Pachinko Machine Payoff Generator
class PachinkoPayoffGenerator:
    """
    Pachinko machine payoff generator based on normal/rush state transitions
    """
    
    def __init__(self, machine_data, n_rounds=1000):
        self.machine_data = machine_data
        self.n_rounds = n_rounds
        
        # Extract parameters from machine data
        self.normal_hit_prob = machine_data['normal_hit_prob_per_spin']
        self.rush_hit_prob = machine_data['rush_hit_prob_per_spin']
        self.rush_entry_prob = machine_data['rush_entry_prob_given_hit']
        self.rush_continue_prob = machine_data['rush_continuation_prob']
        self.rush_st_spins = machine_data['rush_st_spins']
        
        # Parse payouts (assuming they are comma-separated strings)
        self.payouts_normal = [int(x) for x in machine_data['payouts_normal_balls'].split(',')]
        self.payouts_rush = [int(x) for x in machine_data['payouts_rush_balls'].split(',')]
        
        # Initialize state
        self.current_state = 'normal'  # 'normal' or 'rush'
        self.rush_spins_remaining = 0
        
        print(f"Machine: {machine_data['machine']}")
        print(f"Normal hit prob: {self.normal_hit_prob:.6f}")
        print(f"Rush hit prob: {self.rush_hit_prob:.6f}")
        print(f"Rush entry prob: {self.rush_entry_prob:.4f}")
        print(f"Rush continue prob: {self.rush_continue_prob:.4f}")
        print(f"Rush ST spins: {self.rush_st_spins}")
        print(f"Normal payouts: {self.payouts_normal}")
        print(f"Rush payouts: {self.payouts_rush}")
    
    def generate_payoffs(self, round_num):
        """
        Generate payoffs for a given round based on current state
        """
        payoffs = np.zeros(2)  # [normal_action, rush_action]
        
        if self.current_state == 'normal':
            # Normal state: can choose to play or not
            if np.random.random() < self.normal_hit_prob:
                # Hit in normal state
                payoff = np.random.choice(self.payouts_normal)
                payoffs[0] = payoff  # Normal action gets payoff
                
                # Check if we enter rush state
                if np.random.random() < self.rush_entry_prob:
                    self.current_state = 'rush'
                    self.rush_spins_remaining = self.rush_st_spins
                    print(f"Round {round_num}: Entered RUSH state! Spins remaining: {self.rush_spins_remaining}")
            else:
                # No hit in normal state
                payoffs[0] = 0
                
        elif self.current_state == 'rush':
            # Rush state: guaranteed hits with higher probability
            if self.rush_spins_remaining > 0:
                if np.random.random() < self.rush_hit_prob:
                    # Hit in rush state
                    payoff = np.random.choice(self.payouts_rush)
                    payoffs[1] = payoff  # Rush action gets payoff
                    
                    # Check if rush continues
                    if np.random.random() < self.rush_continue_prob:
                        self.rush_spins_remaining -= 1
                    else:
                        self.current_state = 'normal'
                        self.rush_spins_remaining = 0
                        print(f"Round {round_num}: Exited RUSH state")
                else:
                    # No hit in rush state
                    payoffs[1] = 0
                    self.rush_spins_remaining -= 1
            else:
                # Rush state ended
                self.current_state = 'normal'
                payoffs[1] = 0
        
        return payoffs


In [13]:
# Enhanced Exponential Weights Algorithm for Pachinko
class PachinkoExponentialWeights:
    """
    Exponential Weights Algorithm adapted for Pachinko machines
    with 2 actions: Normal play vs Rush play
    """
    
    def __init__(self, epsilon, n_rounds=1000):
        self.epsilon = epsilon
        self.n_rounds = n_rounds
        self.k = 2  # Two actions: normal and rush
        
        # Initialize weights
        self.weights = np.ones(self.k)
        self.cumulative_payoffs = np.zeros(self.k)
        self.regret_history = []
        self.total_payoff = 0
        self.action_history = []
        self.state_history = []  # Track state transitions
        self.payoff_history = []  # Track payoffs over time
        
    def select_action(self):
        """Select action based on current weights"""
        if self.epsilon == 0 or np.all(self.weights == 0):
            action = np.random.randint(0, self.k)
        else:
            probabilities = self.weights / np.sum(self.weights)
            action = np.random.choice(self.k, p=probabilities)
        
        self.action_history.append(action)
        return action
    
    def update_weights(self, payoffs, current_state):
        """Update weights based on received payoffs"""
        # Update cumulative payoffs
        self.cumulative_payoffs += payoffs
        
        # Update total payoff (payoff of selected action)
        selected_action = self.action_history[-1]
        self.total_payoff += payoffs[selected_action]
        
        # Update weights: w_i = w_i * exp(epsilon * payoff_i)
        if self.epsilon > 0:
            self.weights *= np.exp(self.epsilon * payoffs)
        
        # Calculate regret: max cumulative payoff - our cumulative payoff
        max_cumulative = np.max(self.cumulative_payoffs)
        our_cumulative = self.cumulative_payoffs[selected_action]
        regret = max_cumulative - our_cumulative
        self.regret_history.append(regret)
        
        # Store state and payoff info
        self.state_history.append(current_state)
        self.payoff_history.append(payoffs.copy())
    
    def run_algorithm(self, payoff_generator):
        """Run the algorithm for n rounds"""
        for round_num in range(self.n_rounds):
            # Select action
            action = self.select_action()
            
            # Generate payoffs for this round
            payoffs = payoff_generator(round_num)
            
            # Get current state from generator
            current_state = payoff_generator.current_state
            
            # Update weights
            self.update_weights(payoffs, current_state)
        
        return {
            'regret_history': self.regret_history,
            'total_payoff': self.total_payoff,
            'action_history': self.action_history,
            'cumulative_payoffs': self.cumulative_payoffs,
            'state_history': self.state_history,
            'payoff_history': self.payoff_history
        }


In [14]:
# Visualization functions for Pachinko analysis
def plot_pachinko_results(results, machine_name, epsilon):
    """Create comprehensive visualizations for Pachinko EW results"""
    
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    fig.suptitle(f'Exponential Weights Algorithm - {machine_name} (ε={epsilon:.6f})', fontsize=16)
    
    # 1. Regret over time
    axes[0, 0].plot(results['regret_history'], linewidth=2)
    axes[0, 0].set_title('Regret Over Time')
    axes[0, 0].set_xlabel('Round')
    axes[0, 0].set_ylabel('Cumulative Regret')
    axes[0, 0].grid(True, alpha=0.3)
    
    # 2. Total payoff over time
    cumulative_payoff = np.cumsum([results['payoff_history'][i][results['action_history'][i]] 
                                  for i in range(len(results['action_history']))])
    axes[0, 1].plot(cumulative_payoff, linewidth=2, color='green')
    axes[0, 1].set_title('Total Payoff Over Time')
    axes[0, 1].set_xlabel('Round')
    axes[0, 1].set_ylabel('Cumulative Payoff')
    axes[0, 1].grid(True, alpha=0.3)
    
    # 3. Action selection over time
    action_colors = ['blue', 'red']
    action_names = ['Normal', 'Rush']
    for i in range(2):
        action_rounds = [j for j, action in enumerate(results['action_history']) if action == i]
        if action_rounds:
            axes[0, 2].scatter(action_rounds, [i] * len(action_rounds), 
                             c=action_colors[i], alpha=0.6, s=1, label=action_names[i])
    axes[0, 2].set_title('Action Selection Over Time')
    axes[0, 2].set_xlabel('Round')
    axes[0, 2].set_ylabel('Action')
    axes[0, 2].set_yticks([0, 1])
    axes[0, 2].set_yticklabels(['Normal', 'Rush'])
    axes[0, 2].legend()
    axes[0, 2].grid(True, alpha=0.3)
    
    # 4. State transitions
    state_colors = {'normal': 'lightblue', 'rush': 'orange'}
    for i, state in enumerate(results['state_history']):
        axes[1, 0].scatter(i, 0, c=state_colors[state], alpha=0.7, s=10)
    axes[1, 0].set_title('State Transitions Over Time')
    axes[1, 0].set_xlabel('Round')
    axes[1, 0].set_ylabel('State')
    axes[1, 0].set_yticks([])
    axes[1, 0].grid(True, alpha=0.3)
    
    # 5. Payoff distribution by action
    normal_payoffs = [results['payoff_history'][i][0] for i in range(len(results['payoff_history']))]
    rush_payoffs = [results['payoff_history'][i][1] for i in range(len(results['payoff_history']))]
    
    axes[1, 1].hist(normal_payoffs, bins=20, alpha=0.7, label='Normal', color='blue')
    axes[1, 1].hist(rush_payoffs, bins=20, alpha=0.7, label='Rush', color='red')
    axes[1, 1].set_title('Payoff Distribution by Action')
    axes[1, 1].set_xlabel('Payoff')
    axes[1, 1].set_ylabel('Frequency')
    axes[1, 1].legend()
    axes[1, 1].grid(True, alpha=0.3)
    
    # 6. Cumulative payoffs by action
    normal_cumulative = np.cumsum(normal_payoffs)
    rush_cumulative = np.cumsum(rush_payoffs)
    
    axes[1, 2].plot(normal_cumulative, label='Normal', color='blue', linewidth=2)
    axes[1, 2].plot(rush_cumulative, label='Rush', color='red', linewidth=2)
    axes[1, 2].set_title('Cumulative Payoffs by Action')
    axes[1, 2].set_xlabel('Round')
    axes[1, 2].set_ylabel('Cumulative Payoff')
    axes[1, 2].legend()
    axes[1, 2].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(f'../../figures/pachinko_ew_{machine_name.replace(" ", "_")}_eps{epsilon:.6f}.png', 
                dpi=300, bbox_inches='tight')
    plt.show()

def plot_machine_comparison(results_dict, title):
    """Compare results across different machines"""
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    fig.suptitle(title, fontsize=16)
    
    colors = plt.cm.Set1(np.linspace(0, 1, len(results_dict)))
    
    # Regret comparison
    for i, (name, results) in enumerate(results_dict.items()):
        axes[0, 0].plot(results['regret_history'], label=name, color=colors[i], linewidth=2)
    axes[0, 0].set_title('Regret Comparison')
    axes[0, 0].set_xlabel('Round')
    axes[0, 0].set_ylabel('Cumulative Regret')
    axes[0, 0].legend()
    axes[0, 0].grid(True, alpha=0.3)
    
    # Total payoff comparison
    for i, (name, results) in enumerate(results_dict.items()):
        cumulative_payoff = np.cumsum([results['payoff_history'][j][results['action_history'][j]] 
                                     for j in range(len(results['action_history']))])
        axes[0, 1].plot(cumulative_payoff, label=name, color=colors[i], linewidth=2)
    axes[0, 1].set_title('Total Payoff Comparison')
    axes[0, 1].set_xlabel('Round')
    axes[0, 1].set_ylabel('Cumulative Payoff')
    axes[0, 1].legend()
    axes[0, 1].grid(True, alpha=0.3)
    
    # Final regret comparison
    final_regrets = [results['regret_history'][-1] for results in results_dict.values()]
    machine_names = list(results_dict.keys())
    axes[1, 0].bar(machine_names, final_regrets, color=colors)
    axes[1, 0].set_title('Final Regret by Machine')
    axes[1, 0].set_ylabel('Final Regret')
    axes[1, 0].tick_params(axis='x', rotation=45)
    axes[1, 0].grid(True, alpha=0.3)
    
    # Final total payoff comparison
    final_payoffs = [results['total_payoff'] for results in results_dict.values()]
    axes[1, 1].bar(machine_names, final_payoffs, color=colors)
    axes[1, 1].set_title('Final Total Payoff by Machine')
    axes[1, 1].set_ylabel('Total Payoff')
    axes[1, 1].tick_params(axis='x', rotation=45)
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(f'../../figures/pachinko_machine_comparison.png', dpi=300, bbox_inches='tight')
    plt.show()


In [15]:
# Run EW Algorithm on Pachinko Machines
print("Running Exponential Weights Algorithm on Pachinko Machines")
print("=" * 60)

# Parameters
n_rounds = 1000
epsilon_values = [0.001, 0.01, 0.1]  # Different learning rates to test

# Store results for all machines
all_results = {}

# Test on each machine
for machine_idx in range(len(pachinko_data)):
    machine_data = pachinko_data.iloc[machine_idx]
    machine_name = machine_data['machine']
    
    print(f"\nTesting Machine {machine_idx + 1}: {machine_name}")
    print("-" * 50)
    
    # Test different learning rates for this machine
    machine_results = {}
    
    for epsilon in epsilon_values:
        print(f"\nTesting with ε = {epsilon:.3f}")
        
        # Create payoff generator for this machine
        payoff_gen = PachinkoPayoffGenerator(machine_data, n_rounds)
        
        # Create and run EW algorithm
        ew_algorithm = PachinkoExponentialWeights(epsilon, n_rounds)
        results = ew_algorithm.run_algorithm(payoff_gen.generate_payoffs)
        
        # Store results
        machine_results[f'ε={epsilon:.3f}'] = results
        
        # Print summary
        print(f"  Final Regret: {results['regret_history'][-1]:.4f}")
        print(f"  Total Payoff: {results['total_payoff']:.4f}")
        print(f"  Normal Action Payoff: {results['cumulative_payoffs'][0]:.4f}")
        print(f"  Rush Action Payoff: {results['cumulative_payoffs'][1]:.4f}")
        print(f"  Best Action: {'Normal' if results['cumulative_payoffs'][0] > results['cumulative_payoffs'][1] else 'Rush'}")
        
        # Count state transitions
        normal_states = sum(1 for state in results['state_history'] if state == 'normal')
        rush_states = sum(1 for state in results['state_history'] if state == 'rush')
        print(f"  Normal States: {normal_states}, Rush States: {rush_states}")
    
    # Store machine results
    all_results[machine_name] = machine_results
    
    # Plot results for this machine (using optimal epsilon)
    optimal_epsilon = 0.01  # Use middle value for visualization
    if f'ε={optimal_epsilon:.3f}' in machine_results:
        plot_pachinko_results(machine_results[f'ε={optimal_epsilon:.3f}'], 
                             machine_name, optimal_epsilon)


Running Exponential Weights Algorithm on Pachinko Machines

Testing Machine 1: e Tokyo Ghoul W (スマパチ)
--------------------------------------------------

Testing with ε = 0.001
Machine: e Tokyo Ghoul W (スマパチ)
Normal hit prob: 0.002501
Rush hit prob: 0.010493
Rush entry prob: 0.5100
Rush continue prob: 0.7500
Rush ST spins: 130
Normal payouts: [300, 1500]
Rush payouts: [3000, 6000]


AttributeError: 'function' object has no attribute 'current_state'

In [16]:
# Compare results across machines and learning rates
print("\n" + "="*60)
print("COMPARATIVE ANALYSIS")
print("="*60)

# Create comparison plots for each learning rate
for epsilon in epsilon_values:
    print(f"\nComparing machines with ε = {epsilon:.3f}")
    print("-" * 40)
    
    # Collect results for this epsilon across all machines
    epsilon_results = {}
    for machine_name, machine_results in all_results.items():
        if f'ε={epsilon:.3f}' in machine_results:
            epsilon_results[machine_name] = machine_results[f'ε={epsilon:.3f}']
    
    # Plot comparison
    plot_machine_comparison(epsilon_results, f'Machine Comparison (ε = {epsilon:.3f})')
    
    # Print summary statistics
    print(f"\nSummary for ε = {epsilon:.3f}:")
    for machine_name, results in epsilon_results.items():
        final_regret = results['regret_history'][-1]
        total_payoff = results['total_payoff']
        best_action = 'Normal' if results['cumulative_payoffs'][0] > results['cumulative_payoffs'][1] else 'Rush'
        print(f"  {machine_name}: Regret={final_regret:.2f}, Payoff={total_payoff:.2f}, Best={best_action}")

# Create summary table
print("\n" + "="*80)
print("SUMMARY TABLE")
print("="*80)

summary_data = []
for machine_name, machine_results in all_results.items():
    for epsilon_str, results in machine_results.items():
        epsilon_val = float(epsilon_str.split('=')[1])
        summary_data.append({
            'Machine': machine_name,
            'Epsilon': epsilon_val,
            'Final_Regret': results['regret_history'][-1],
            'Total_Payoff': results['total_payoff'],
            'Normal_Payoff': results['cumulative_payoffs'][0],
            'Rush_Payoff': results['cumulative_payoffs'][1],
            'Best_Action': 'Normal' if results['cumulative_payoffs'][0] > results['cumulative_payoffs'][1] else 'Rush'
        })

summary_df = pd.DataFrame(summary_data)
print(summary_df.to_string(index=False))

# Save results
summary_df.to_csv('../../data/pachinko_ew_results.csv', index=False)
print(f"\nResults saved to: ../../data/pachinko_ew_results.csv")



COMPARATIVE ANALYSIS

Comparing machines with ε = 0.001
----------------------------------------


NameError: name 'plt' is not defined

In [None]:
# Compare results across machines and learning rates
print("\n" + "="*60)
print("COMPARATIVE ANALYSIS")
print("="*60)

# Create comparison plots for each learning rate
for epsilon in epsilon_values:
    print(f"\nComparing machines with ε = {epsilon:.3f}")
    print("-" * 40)
    
    # Collect results for this epsilon across all machines
    epsilon_results = {}
    for machine_name, machine_results in all_results.items():
        if f'ε={epsilon:.3f}' in machine_results:
            epsilon_results[machine_name] = machine_results[f'ε={epsilon:.3f}']
    
    # Plot comparison
    plot_machine_comparison(epsilon_results, f'Machine Comparison (ε = {epsilon:.3f})')
    
    # Print summary statistics
    print(f"\nSummary for ε = {epsilon:.3f}:")
    for machine_name, results in epsilon_results.items():
        final_regret = results['regret_history'][-1]
        total_payoff = results['total_payoff']
        best_action = 'Normal' if results['cumulative_payoffs'][0] > results['cumulative_payoffs'][1] else 'Rush'
        print(f"  {machine_name}: Regret={final_regret:.2f}, Payoff={total_payoff:.2f}, Best={best_action}")

# Create summary table
print("\n" + "="*80)
print("SUMMARY TABLE")
print("="*80)

summary_data = []
for machine_name, machine_results in all_results.items():
    for epsilon_str, results in machine_results.items():
        epsilon_val = float(epsilon_str.split('=')[1])
        summary_data.append({
            'Machine': machine_name,
            'Epsilon': epsilon_val,
            'Final_Regret': results['regret_history'][-1],
            'Total_Payoff': results['total_payoff'],
            'Normal_Payoff': results['cumulative_payoffs'][0],
            'Rush_Payoff': results['cumulative_payoffs'][1],
            'Best_Action': 'Normal' if results['cumulative_payoffs'][0] > results['cumulative_payoffs'][1] else 'Rush'
        })

summary_df = pd.DataFrame(summary_data)
print(summary_df.to_string(index=False))

# Save results
summary_df.to_csv('../../data/pachinko_ew_results.csv', index=False)
print(f"\nResults saved to: ../../data/pachinko_ew_results.csv")


In [None]:
# State Transition Analysis
print("\n" + "="*60)
print("STATE TRANSITION ANALYSIS")
print("="*60)

def analyze_state_transitions(results, machine_name):
    """Analyze state transition patterns"""
    state_history = results['state_history']
    action_history = results['action_history']
    
    # Count transitions
    normal_to_rush = 0
    rush_to_normal = 0
    total_transitions = 0
    
    for i in range(1, len(state_history)):
        if state_history[i] != state_history[i-1]:
            total_transitions += 1
            if state_history[i-1] == 'normal' and state_history[i] == 'rush':
                normal_to_rush += 1
            elif state_history[i-1] == 'rush' and state_history[i] == 'normal':
                rush_to_normal += 1
    
    # Count state durations
    normal_durations = []
    rush_durations = []
    current_duration = 1
    current_state = state_history[0]
    
    for i in range(1, len(state_history)):
        if state_history[i] == current_state:
            current_duration += 1
        else:
            if current_state == 'normal':
                normal_durations.append(current_duration)
            else:
                rush_durations.append(current_duration)
            current_state = state_history[i]
            current_duration = 1
    
    # Add final duration
    if current_state == 'normal':
        normal_durations.append(current_duration)
    else:
        rush_durations.append(current_duration)
    
    print(f"\n{machine_name}:")
    print(f"  Total transitions: {total_transitions}")
    print(f"  Normal → Rush: {normal_to_rush}")
    print(f"  Rush → Normal: {rush_to_normal}")
    print(f"  Avg Normal duration: {np.mean(normal_durations):.2f}")
    print(f"  Avg Rush duration: {np.mean(rush_durations):.2f}")
    
    return {
        'total_transitions': total_transitions,
        'normal_to_rush': normal_to_rush,
        'rush_to_normal': rush_to_normal,
        'avg_normal_duration': np.mean(normal_durations),
        'avg_rush_duration': np.mean(rush_durations)
    }

# Analyze state transitions for each machine
state_analysis = {}
for machine_name, machine_results in all_results.items():
    # Use middle epsilon for analysis
    if 'ε=0.010' in machine_results:
        state_analysis[machine_name] = analyze_state_transitions(
            machine_results['ε=0.010'], machine_name
        )
