# Thompson Sampling on Bernoulli

In [7]:
import numpy as np

In [8]:
NUM_OF_BANDITS = 10
NUM_OF_TRIALS = 5000

In [23]:
class TS_Bandit:
    def __init__(self, probability):
        # For log purposes
        self.result = []
        
        # latent variables
        self.probability = probability
        
        # Bandit will have the property of beta distribution as the conjugate prior of the bernoulli distribution
        self.alpha = 1
        self.beta = 1        
    
    def update_posterior(self, reward):
        if reward:
            self.alpha += 1
        else:
            self.beta += 1
    
    def pull_bandit_arm(self):
        # Generate random bernoulli as reward
        # parameter:
        # p = success probability
        # f(x) = p^x * (1-p)^1-x
        reward = np.random.binomial(n=1, p=self.probability, size=None)
        
        self.update_posterior(reward)
        
        return reward
        

In [29]:
def sample_reward_probability(bandit_list):
    return [np.random.beta(bandit.alpha, bandit.beta) for bandit in bandit_list]

In [30]:
temp_bandit = TS_Bandit(0.5)

In [31]:
for i in range (10000):
    reward = temp_bandit.pull_bandit_arm()

print(reward)
print(f'{temp_bandit.alpha} || {temp_bandit.beta}')

0
4934 || 5068


In [32]:
bandit_list = [
    TS_Bandit(0.5),
    TS_Bandit(0.6),
    TS_Bandit(0.7),
    TS_Bandit(0.8),
    TS_Bandit(0.85)
]

In [34]:
sample_reward_probability(bandit_list)

[0.5383431170179358,
 0.49892870276328183,
 0.36410706427474815,
 0.24940233176366455,
 0.01596542331156315]