# MAB Definition

## Class for simulating a slot machine

In [5]:
import numpy as np


# Class for a single slot machine. Rewards are Gaussian.
class GaussianBandit(object):
    def __init__(self, mean=0, stdev=1):
        self.mean = mean
        self.stdev = stdev

    def pull_lever(self):
        reward = np.random.normal(self.mean, self.stdev)
        return np.round(reward, 1)


## define a gaussian bandit game

In [6]:
class GaussianBanditGame(object):
    def __init__(self, bandits):
        self.bandits = bandits
        np.random.shuffle(self.bandits)
        self.reset_game()

    def play(self, choice):
        reward = self.bandits[choice - 1].pull_lever()
        self.rewards.append(reward)
        self.total_reward += reward
        self.n_played += 1
        return reward

    def user_play(self):
        self.reset_game()
        print("Game started. " + "Enter 0 as input to end the game.")
        while True:
            print(f" -- Round {self.n_played}")
            choice = int(input(f"Choose a machine " + f"from 1 to {len(self.bandits)}: "))

            if choice in range(1, len(self.bandits) + 1):
                reward = self.play(choice)
                print(f"Machine {choice} gave " + f"a reward of {reward}.")
                avg_rew = self.total_reward/self.n_played
                print(f"Your average reward " + f"so far is {avg_rew}.")
            else:
                # game has finished
                break
        print("Game has ended.")

        if self.n_played > 0:
            print(f"Total reward is {self.total_reward}" + f" after {self.n_played} round(s).")
            avg_rew = self.total_reward/self.n_played
            print(f"Average reward is {avg_rew}.")

    def reset_game(self):
        self.rewards = []
        self.total_reward = 0
        self.n_played = 0

## Consume the Multi Armed Bandits

In [7]:
slotA = GaussianBandit(5, 3)
slotB = GaussianBandit(6, 2)
slotC = GaussianBandit(1, 5)
game = GaussianBanditGame([slotA, slotB, slotC])

In [8]:
game.user_play()

Game started. Enter 0 as input to end the game.
 -- Round 0
Machine 2 gave a reward of 7.4.
Your average reward so far is 7.4.
 -- Round 1
Machine 3 gave a reward of 3.7.
Your average reward so far is 5.550000000000001.
 -- Round 2
Machine 1 gave a reward of 8.5.
Your average reward so far is 6.533333333333334.
 -- Round 3
Machine 3 gave a reward of 6.4.
Your average reward so far is 6.5.
 -- Round 4
Machine 2 gave a reward of 7.3.
Your average reward so far is 6.659999999999999.
 -- Round 5
Machine 1 gave a reward of 8.2.
Your average reward so far is 6.916666666666667.
 -- Round 6
Machine 3 gave a reward of -3.8.
Your average reward so far is 5.385714285714286.
 -- Round 7
Machine 2 gave a reward of 4.4.
Your average reward so far is 5.2625.
 -- Round 8
Machine 1 gave a reward of -8.6.
Your average reward so far is 3.7222222222222223.
 -- Round 9
Machine 2 gave a reward of 6.4.
Your average reward so far is 3.9899999999999998.
 -- Round 10
Machine 3 gave a reward of 2.7.
Your average

# Bernoulli Bandit for Online Advertisement

In [10]:
class BernoulliBandit(object):
    def __init__(self, p):
        self.p = p
    def display_ad(self):
        reward = np.random.binomial(n=1, p=self.p)
        return reward

In [11]:
adA = BernoulliBandit(0.004)
adB = BernoulliBandit(0.016)
adC = BernoulliBandit(0.02)
adD = BernoulliBandit(0.028)
adE = BernoulliBandit(0.031)
ads = [adA, adB, adC, adD, adE]