
# Problem

Given multiple slot machiones and fixed cash, we have to maximize the chances of winning

In [1]:

import numpy as np

In [2]:
# We have 1000 in cash and each turn costs 1
number_of_turns = 1000


number_of_slot_machines = 6

#To keep track of wins and losses
number_of_positive_rewards = np.zeros(number_of_slot_machines)
number_of_negative_rewards = np.zeros(number_of_slot_machines)

#To get same results when running the notebook again
np.random.seed(33)

#Creating conversion rates for slot machines
conversion_rates = np.random.uniform(0.01, 0.15, number_of_slot_machines)

#This will never be known in real life!
for i in range(6):
  print('Conversion rate for slot machine {0}: {1:.2%}'.format(i, conversion_rates[i]))

Conversion rate for slot machine 0: 4.48%
Conversion rate for slot machine 1: 7.30%
Conversion rate for slot machine 2: 6.75%
Conversion rate for slot machine 3: 4.64%
Conversion rate for slot machine 4: 13.19%
Conversion rate for slot machine 5: 3.59%


## Data Set

In [3]:
#To get same results when running the notebook again
np.random.seed(55)

#2D array for wins and losses. 1 means win and vice-vers
outcomes = np.zeros((number_of_turns, number_of_slot_machines))

for turn_index in range(number_of_turns): #for each turn
    for slot_machine_index in range(number_of_slot_machines): #for each slot machine
        #if random number less than conversion rate then we've won else zero by default
        if np.random.rand() <= conversion_rates[slot_machine_index]:
            outcomes[turn_index][slot_machine_index] = 1


print(outcomes[0:15, 0:6]) 

[[0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0.]]


In [4]:
#conversion rate of each machine
for i in range(6):
  print('Mean for column {0}: {1:.2%}'.format(i, np.mean(outcomes[:, i])))

Mean for column 0: 4.10%
Mean for column 1: 7.10%
Mean for column 2: 6.30%
Mean for column 3: 4.90%
Mean for column 4: 12.30%
Mean for column 5: 3.40%


## Thompson Sampling

In [6]:
#for each turn
for turn_index in range(number_of_turns):
    index_of_machine_to_play = -1
    max_beta = -1 #want to find maximum value of beta

    #for each slot machine
    for slot_machine_index in range(number_of_slot_machines): 
        a = number_of_positive_rewards[slot_machine_index] + 1
        b = number_of_negative_rewards[slot_machine_index] + 1

        #random value from beta distribution
        random_beta = np.random.beta(a, b)

        #maximum beta
        if random_beta > max_beta:
            max_beta = random_beta
            index_of_machine_to_play = slot_machine_index #set the machine to play to the current machine
    
    #record of wins and losses
    if outcomes[turn_index][index_of_machine_to_play] == 1:
        number_of_positive_rewards[index_of_machine_to_play] += 1
    else:
        number_of_negative_rewards[index_of_machine_to_play] += 1

#times a machine was played
number_of_times_played = number_of_positive_rewards + number_of_negative_rewards 
for slot_machine_index in range(number_of_slot_machines): #for each slot machine
    print('Slot machine {0} was played {1} times'.format(slot_machine_index, number_of_times_played[slot_machine_index]))

#best machine
print('\nThe best slot machine to play is machine {}!'.format(np.argmax(number_of_times_played)))

Slot machine 0 was played 58.0 times
Slot machine 1 was played 50.0 times
Slot machine 2 was played 72.0 times
Slot machine 3 was played 105.0 times
Slot machine 4 was played 1622.0 times
Slot machine 5 was played 93.0 times

The best slot machine to play is machine 4!


### Compare with naive random sampling approach

In [7]:
#wins from thompson
total_wins_thompson_sampling = np.sum(number_of_positive_rewards)

#determine how many times we would win if we used naive approach
total_wins_random_sampling = 0
for turn_index in range(number_of_turns):
  index_of_machine_to_play = np.random.randint(0, number_of_slot_machines) #randomly choose machine
  if outcomes[turn_index][index_of_machine_to_play] == 1:
    total_wins_random_sampling += 1


print('Total wins with Thompson Sampling: {0:.0f}'.format(total_wins_thompson_sampling))
print('Total wins with Random Sampling: {0:.0f}'.format(total_wins_random_sampling))

Total wins with Thompson Sampling: 211
Total wins with Random Sampling: 49
