In [180]:
import numpy as np
import time
import random

In [62]:
# Each bandit has a specified probability of success (chance) and payoff
# Use this structure: https://stackoverflow.com/questions/13411668/global-dictionary-within-a-class-python
# Overview of class: https://docs.python.org/3/tutorial/classes.html

class Bandits():
    def __init__( self, scenarioName ):
        np.random.seed(int(time.time()))
        self.scenarioName = scenarioName
        self.allNames     = []
        self.bandits      = {}
        
    def addBandit(self, name, award, chance, draws, payoff = None, tries = 0):
        
        self.name    = name
        self.award   = award
        self.chance  = chance
        # Total number of draws for bandit
        self.draws   = draws
        self.payoff  = award * np.array( np.random.binomial( n = 1, p = chance, size = draws ) )
        # Number of times bandit was chosen
        self.tries = 0
        # Estimate after each try
        self.estimate = [0]
        self.bandits[self.name] = { 'payoff': self.payoff,
                                    'tries' : self.tries,
                                    'estimate' : self.estimate }
        
    def addBandits( self, names, awards, chances, draws ):

        for i in range( 0, len(names)):
            self.addBandit( name = names[i], award = awards[i], chance = chances[i], draws = 10 )
            # Add the new names to allNames
            self.allNames.append(names[i])

In [None]:
# Function calculates upper confidence bound of a bandit.
def calcConfidenceBound(bandit):
    
    ucb = bandit['estimate'][bandit['tries']] + \
    np.sqrt( ( 2 * np.log( t ) )/ ( 1 + bandit['tries'] ) )
    
    return ucb

In [91]:
bandits = Bandits('5Bandits')

In [92]:
# Pass list of bandit attributes
names   = ['Bandit1','Bandit2','Bandit3','Bandit4','Bandit5']
awards  = [1] * 5
chances = [.1,.3,.5,.7,.9]

bandits.addBandits( names = names, awards = awards, chances = chances, draws = 10)

In [79]:
# Beta test upper confidence bound approach
t = 1

In [214]:
estimates = list( \
    map( lambda x: bandits.bandits[x]['estimate'][bandits.bandits[x]['tries']] + \
    np.sqrt( ( 2 * np.log( t ) )/ ( 1 + bandits.bandits[x]['tries'] ) ), bandits.bandits ) \
    )

In [215]:
estimates

[0.0, 0.0, 0.0, 0.0, 0.0]

In [207]:
max(estimates)

0.0

In [222]:
# Find the bandit with the maximum UCB
banditKey = max( bandits.bandits, key=lambda x: bandits.bandits[x]['estimate'][bandits.bandits[x]['tries']] + \
    np.sqrt( ( 2 * np.log( t ) )/ ( 1 + bandits.bandits[x]['tries'] ) ) )

In [230]:
# Find the bandit with the maximum UCB
banditKey = max( bandits.bandits, key=lambda x: calcConfidenceBound(bandits.bandits[x]) )

In [241]:
# Find the maximum UCB
ucbMax = max( list( map( lambda x: calcConfidenceBound(bandits.bandits[x]), bandits.bandits ) ) )

In [255]:
# Identify bandit or bandits with the maximum UCB. Break ties randomly. This ensures the bandit that is chosen in the
# case of ties is independent of bandit ordering. 
BanditsUCBMax = list()
for key, value in bandits.bandits.items():
    if calcConfidenceBound(value) == ucbMax:
        BanditsUCBMax.append(key)
banditPulled = random.choice(BanditsUCBMax)

In [229]:
def calcConfidenceBound(bandit):
    
    ucb = bandit['estimate'][bandit['tries']] + \
    np.sqrt( ( 2 * np.log( t ) )/ ( 1 + bandit['tries'] ) )
    
    return ucb

In [227]:
calcConfidenceBound( bandits.bandits['Bandit1'] )

0.0

In [None]:
# At the beginning, try each bandit once first

In [84]:
exampleUCB

0.0

In [None]:
# Method for getting the key of the value with highest upper confidence bound
# https://stackoverflow.com/questions/3282823/get-the-key-corresponding-to-the-minimum-value-within-a-dictionary