In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


Define Enviornmnet

In [None]:
number_of_turns = 10000

number_of_slot_machines = 10

# define arrays where we can keep track of our wins (positive rewards)
# and losses (negavitve rewards) for each slot machine
# they will be used to deterine the shape of the beta distribution
number_of_positive_rewards = np.zeros(number_of_slot_machines)
number_of_negative_rewards = np.zeros(number_of_slot_machines)

np.random.seed(33)

# create a random conversion rate between 1% and 15% for each slot machine
conversion_rates = np.random.uniform(low=0.01, high=0.15, size=number_of_slot_machines)

# Show conversion rates for each slot machine. Remember that in a real-world scenario
# the decision-maker would not know this information!
for i in range(number_of_slot_machines):
    print("Slot machine {} has a conversion rate of {:.1f}%".format(i, conversion_rates[i]*100))



Slot machine 0 has a conversion rate of 4.5%
Slot machine 1 has a conversion rate of 7.3%
Slot machine 2 has a conversion rate of 6.8%
Slot machine 3 has a conversion rate of 4.6%
Slot machine 4 has a conversion rate of 13.2%
Slot machine 5 has a conversion rate of 3.6%
Slot machine 6 has a conversion rate of 1.3%
Slot machine 7 has a conversion rate of 14.3%
Slot machine 8 has a conversion rate of 10.5%
Slot machine 9 has a conversion rate of 7.8%


Create the data set

In [None]:
# define a seed for the random number generator (to ensure that results are reproducible)
np.random.seed(55)

# The dataset is a matrix with one row for each turn, and one column for each slot machine.
# Each item in t0e matrix represents the outcome of what would happen if we were to play a particular slot machine on that particular turn.
# While a value of "0" indicates that would lose. The number of "wins" for each slot machine is determined by its conversion rate.
outcomes = np.zeros((number_of_turns, number_of_slot_machines)) #create a two-dimensional numpy array, and fill it with zeros
for turn_index in range(number_of_turns): #for each turn
    for slot_machine_index in range(number_of_slot_machines): #for each slot machine
        # Get a random number between 0.0 and 1.0.
        # if the random number is less than or equal to this slot machine's conversion rate, then set the outcome to "1"
        # Otherwise, the outcome will be "0" because the entire matrix was initially filled with zeros.
        if np.random.random() <= conversion_rates[slot_machine_index]:
            outcomes[turn_index][slot_machine_index] = 1

# display the first 15 rows of data
print(outcomes[:15])





[[0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 1.]]


In [None]:
# Show means (i.e., conversion ratess) for each column (i.e., for each slot machine)
for i in range(number_of_slot_machines):
    print("Mean for column {0}: {1:.2%}".format(i, np.mean(outcomes[:,i]) ) )

Mean for column 0: 4.53%
Mean for column 1: 7.32%
Mean for column 2: 6.70%
Mean for column 3: 4.31%
Mean for column 4: 13.09%
Mean for column 5: 3.80%
Mean for column 6: 1.14%
Mean for column 7: 14.10%
Mean for column 8: 10.73%
Mean for column 9: 8.17%


Thompson Sampling

In [None]:
# for each turn
for turn_index in range(number_of_turns):
  index_of_machine_to_play = -1
  max_beta = -1

  for slot_machine_index in range(number_of_slot_machines):
    # Define the shape parameters for the beta distribution. The shape will depend on the number
    # of wins and losses that have thus far been observed for this particular slot machine.
    a = number_of_positive_rewards[slot_machine_index] + 1
    b = number_of_negative_rewards[slot_machine_index] + 1

    # Get a ramdp, value from the beta distribution whose shape is defined by number of
    # wins and losses that have thus far been observed for this slot machine
    random_beta = np.random.beta(a,b)

    # if this is the largest beta value thus far observed for this iteration
    if random_beta > max_beta:
      max_beta = random_beta #update the max beta value thus far observed
      index_of_machine_to_play = slot_machine_index #set the machine to play to the current machine

  # play the selected slot machine, and record whether we win or lose
  if outcomes[turn_index][index_of_machine_to_play] == 1:
    number_of_positive_rewards[index_of_machine_to_play] += 1
  else:
    number_of_negative_rewards[index_of_machine_to_play] += 1

# computer and display the total number of times each slot machine was played
number_of_times_played = number_of_positive_rewards + number_of_negative_rewards
for slot_machine_index in range(number_of_slot_machines):
  print("Slot machine {0} has been played {1} times".format(slot_machine_index, number_of_times_played[slot_machine_index]))

# indentify and display the best slot machine to play
print('\nOVerall Conclusion: The best slot machine to play is machine {}!'.format(np.argmax(number_of_times_played)))

Slot machine 0 has been played 142.0 times
Slot machine 1 has been played 86.0 times
Slot machine 2 has been played 99.0 times
Slot machine 3 has been played 178.0 times
Slot machine 4 has been played 380.0 times
Slot machine 5 has been played 83.0 times
Slot machine 6 has been played 57.0 times
Slot machine 7 has been played 8327.0 times
Slot machine 8 has been played 590.0 times
Slot machine 9 has been played 58.0 times

OVerall Conclusion: The best slot machine to play is machine 7!


In [None]:
#compute total number of wins using Thompson Sampling strategy
total_wins_thompson_sampling = np.sum(number_of_positive_rewards)

#display results
print('Total wins with Thompson Sampling: {0:.0f}'.format(total_wins_thompson_sampling))

Total wins with Thompson Sampling: 1324
