In [2]:
import numpy as np
from itertools import compress
from statsmodels.stats.proportion import proportions_ztest 

np.random.seed(0)

class Bandit:
    def __init__(self, mean):
        self.mean = mean
        self.rewards = []
    
    def pull(self):
        reward = np.random.binomial(1, self.mean) 
        self.rewards.append(reward)
        return reward

original = [Bandit(0.02), Bandit(0.04), Bandit(0.06), Bandit(0.08), Bandit(0.10)]

bandits = original
simulations = 500

num_bandits = len(bandits)
num_iter = 0

while num_bandits > 1 and num_iter < 10:
    num_iter = num_iter + 1
    
    for sim in range(simulations):
        for bandit in range(num_bandits):
            bandits[bandit].pull()

    n_reward = [np.sum(bandit.rewards) for bandit in bandits]
    max_reward = max(n_reward)
    nobs = len(bandits[0].rewards)

    p_value = list(map(lambda a : proportions_ztest((a, max_reward), (nobs, nobs))[1], n_reward))
    bandits = list(compress(bandits, np.array(p_value) > 0.05))
    num_bandits = len(bandits)
    
evaluations = len(bandits[0].rewards)
total_evaluations = np.sum([len(bandit.rewards) for bandit in original])
total_reward = np.sum([np.sum(bandit.rewards) for bandit in original])
avg_reward = total_reward / total_evaluations

  import pandas.util.testing as tm


In [3]:
print('Numero de evaluaciones del bandido seleccionado: ',evaluations)
print('Numero de evaluaciones totales: ',total_evaluations)
print('Numero de evaluaciones sub-óptimas: ',total_evaluations-evaluations)
print('Numero de recompensas: ',total_reward)
print('Porcentaje de recompensas: ',"{:.2%}". format(avg_reward))
print('Porcentaje de recompensas del bandido óptimo: ',"{:.2%}". format(0.10))

Numero de evaluaciones del bandido seleccionado:  3500
Numero de evaluaciones totales:  8500
Numero de evaluaciones sub-óptimas:  5000
Numero de recompensas:  692
Porcentaje de recompensas:  8.14%
Porcentaje de recompensas del bandido óptimo:  10.00%
