In [1]:
import numpy as np
import matplotlib.pyplot as plt
from typing import List, Tuple

In [2]:
class Bandit:
    """A single bandit arm with a true reward probability."""

    def __init__(self, true_mean: float):
        self.true_mean = true_mean
        self.estimated_mean = 0.0
        self.num_pulls = 0

    def pull(self) -> float:
        """Pull the arm and get a reward (1 or 0 with probability true_mean)."""
        return 1 if np.random.random() < self.true_mean else 0

    def update(self, reward: float):
        """Update the estimated mean after receiving a reward."""
        self.num_pulls += 1
        # Incremental update formula
        self.estimated_mean += (reward - self.estimated_mean) / self.num_pulls