## Bandit Classes and Engine

In [1]:
import numpy as np
rand = np.random.rand

In [2]:
class Bandits(object):
    """
    This class represents N bandits machines.

    parameters:
        p_array: a (n,) Numpy array of probabilities >0, <1.

    methods:
        pull( i ): return the results, 0 or 1, of pulling 
                   the ith bandit.
    """
    def __init__(self, p_array):
        self.p = p_array
        self.optimal = np.argmax(p_array)
        
    def pull(self, i):
        #i is which arm to pull
        return rand() < self.p[i]
    
    def __len__(self):
        return len(self.p)

In [3]:
class BanditEngine(object):

    """
    Implements a online, learning strategy to solve
    the Multi-Armed Bandit problem.
    
    parameters:
        bandits: a Bandit class with .pull method
        choice_function: accepts a self argument (which gives access to all the variables), and 
        returns and int between 0 and n-1
    methods:
        sample_bandits(n): sample and train on n pulls.

    attributes:
        N: the cumulative number of samples
        choices: the historical choices as a (N,) array
        bb_score: the historical score as a (N,) array

    """
    
    def __init__(self, bandits, choice_function, learning_rate=1):
        
            self.bandits = bandits
            self.choice_function = choice_function
            self.learning_rate = learning_rate
            
            n_bandits = len(self.bandits)
            self.wins = np.zeros(n_bandits, dtype=np.float64)
            self.trials = np.zeros(n_bandits, dtype=np.float64)
            self.N = 0
            self.choices = []
            self.score = []
        
    def sample_bandits(self, n=1):
        
        score = np.zeros(n)
        choices = np.zeros(n)
        
        for k in range(n):
            #sample from the bandits's priors, and select the largest sample
            choice = self.choice_function(self)
            
            #sample the chosen bandit
            result = self.bandits.pull(choice)
            self.wins[choice] = self.learning_rate * self.wins[choice] + result
            self.trials[choice] = self.learning_rate * self.trials[choice] + 1
            score[k] = result 
            self.N += 1
            choices[k] = choice
            
        self.score = np.r_[self.score, score]
        self.choices = np.r_[self.choices, choices]
        return 