# Online Advertising as Multi-armed Bandit (MAB)

### Set up the MAB

In [7]:
import numpy as np

# Initialization
num_arms = 3
num_trials = 10000

# Probability distribution of each arm
mu = [7, 10, 6]
sigma = [0.45, 0.65, 0.35]

# Counters for each arm
counts = np.zeros(num_arms)
rewards = np.zeros(num_arms)

# Select initial arm
a = np.random.choice(num_arms)

### $\epsilon$-stratgey

In [8]:
# Epsilon for epsilon-greedy algorithm
eps = 0.1

for t in range(num_trials):
    # Select arm
    if np.random.rand() > eps:  # Exploit
        a = np.argmax(rewards / (counts + 1e-5))  # Add a small constant to avoid division by zero
    else:  # Explore
        a = np.random.choice(num_arms)

    # Simulate click-through rate
    reward = np.random.normal(mu[a], sigma[a])
    
    # Update counters
    counts[a] += 1
    rewards[a] += reward

# Estimated click-through rates
estimates = rewards / counts

### Print estimated click-through rates

In [9]:
print("Estimated click-through rates: ", estimates)

Estimated click-through rates:  [ 6.99670105 10.00601168  6.00450476]
