In [290]:
import numpy as np
import time
import random

In [291]:
# Each bandit has a specified probability of success (chance) and payoff
# Use this structure: https://stackoverflow.com/questions/13411668/global-dictionary-within-a-class-python
# Overview of class: https://docs.python.org/3/tutorial/classes.html

class Bandits():
    def __init__( self, scenarioName ):
        np.random.seed(int(time.time()))
        self.scenarioName = scenarioName
        self.allNames     = []
        self.bandits      = {}
        
    def addBandit(self, name, award, chance, draws, payoff = None, tries = 0):
        
        self.name    = name
        self.award   = award
        self.chance  = chance
        # Total number of draws for bandit
        self.draws   = draws
        self.payoff  = award * np.array( np.random.binomial( n = 1, p = chance, size = draws ) )
        # Number of times bandit was chosen
        self.tries = 0
        # Estimate after each try
        self.estimate = [0]
        self.bandits[self.name] = { 'payoff': self.payoff,
                                    'tries' : self.tries,
                                    'estimate' : self.estimate }
        
    def addBandits( self, names, awards, chances, draws ):

        for i in range( 0, len(names)):
            self.addBandit( name = names[i], award = awards[i], chance = chances[i], draws = 10 )
            # Add the new names to allNames
            self.allNames.append(names[i])

In [292]:
# Function calculates upper confidence bound of a bandit.
def CalcConfidenceBound(bandit):
    
    ucb = bandit['estimate'][bandit['tries']] + \
    np.sqrt( ( 2 * np.log( t ) )/ ( 1 + bandit['tries'] ) )
    
    return ucb

In [302]:
# Function determines which bandit has the highest upper confidence bound, pulls bandit, and
def ChooseBandit(bandits, t ):
    
    # Find the maximum UCB
    ucbMax = max( list( map( lambda x: CalcConfidenceBound(bandits.bandits[x]), bandits.bandits ) ) )
    
    # Identify bandit or bandits with the maximum UCB. Break ties randomly. This ensures the bandit that is chosen in the
    # case of ties is independent of bandit ordering. 
    BanditsUCBMax = list()
    for key, value in bandits.bandits.items():
        if CalcConfidenceBound(value) == ucbMax:
            BanditsUCBMax.append(key)
    pullBandit = random.choice(BanditsUCBMax)
    
    # Select bandit payoff, update estimate
    # t is initialized at 1 to prevent sqrt(0) in UCB calculation. t-1 is current payoff position.
    reward = bandits.bandits[pullBandit]['payoff'][t-1]
    
    # Calculate running average of reward across all draws 
    # Approach: https://math.stackexchange.com/questions/106313/regular-average-calculated-accumulatively
    # Total number of bandit pulls
    pulls = len(bandits.bandits[pullBandit]['estimate'])
    # Average up to, but no including, the latest reward
    currAverage = bandits.bandits[pullBandit]['estimate'][pulls-1]
    bandits.bandits[pullBandit]['estimate'] * len(bandits.bandits[pullBandit]['estimate'])

    newEstimate = ( pulls * currAverage + reward ) / ( pulls + 1 )
    bandits.bandits[pullBandit]['estimate'].append(newEstimate)
    
    return bandits

In [303]:
bandits = Bandits('5Bandits')

In [304]:
# Pass list of bandit attributes
names   = ['Bandit1','Bandit2','Bandit3','Bandit4','Bandit5']
awards  = [1] * 5
chances = [.1,.3,.5,.7,.9]

bandits.addBandits( names = names, awards = awards, chances = chances, draws = 10)

In [305]:
# Beta test upper confidence bound approach
t = 1

In [306]:
bandits = ChooseBandit(bandits,t)

In [307]:
bandits.bandits

{'Bandit1': {'payoff': array([0, 1, 0, 0, 0, 0, 1, 0, 0, 0]),
  'tries': 0,
  'estimate': [0]},
 'Bandit2': {'payoff': array([0, 0, 0, 0, 0, 1, 1, 0, 0, 0]),
  'tries': 0,
  'estimate': [0]},
 'Bandit3': {'payoff': array([0, 1, 0, 0, 0, 1, 1, 1, 1, 0]),
  'tries': 0,
  'estimate': [0]},
 'Bandit4': {'payoff': array([0, 1, 1, 1, 0, 1, 1, 1, 1, 1]),
  'tries': 0,
  'estimate': [0]},
 'Bandit5': {'payoff': array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
  'tries': 0,
  'estimate': [0, 0.5]}}

In [None]:
# At the beginning, try each bandit once first