In [1]:
from scipy.optimize import linear_sum_assignment
import numpy as np
from mab.environment import Environment
import random

#Customers
customerParams = zip([200,200,200,200], [50,50,50,50])
#discounts
discounts = np.array([0.0, 0.5, 0.10, 0.25])
#Prices
prices = np.array([250, 400])
#Conversion Rates for item 1
conv_rate1 = [0.3, 0.4, 0.25, 0.45]
#Conversion Rates for item 2 [discountedPrice x Class]
conv_rate2 = np.array([[0.1, 0.2, 0.15, 0.2],
                    [0.15, 0.25, 0.2, 0.25],
                    [0.2, 0.35, 0.25, 0.4],
                    [0.4, 0.45, 0.35, 0.6]])

n_classes = 4


In [2]:
def simulation_offline(env, fractions):
    reward = 0
    promo = -1
    c = random.randint(0, 3)
    while env.customers[c] == 0:
        c = random.randint(0, 3)
    env.customers[c] -= 1
    rand = random.random() # random variable to decide whether the customer buys or not
    if rand < conv_rate1[c]: # case when the customer buys the first item
        reward = prices[0]
        promo_p = np.cumsum(fractions[c])
        rand = random.random()
        for j in range(len(promo_p)):
            if rand < promo_p[j]:
                promo = j
                break
        rand = random.random()
        if rand < conv_rate2[promo][c]:
            reward += prices[1]*(1-discounts[promo])
    return c, promo, reward

def simulation_day(env, fractions):
    rewards = [0, 0, 0, 0]
    customer_numbers = np.array([num for num in env.customers])
    for i in range(np.array(env.customers).sum()):
        c, promo, reward = simulation_offline(env, fractions)
        rewards[c] += reward
    print(np.array(rewards) / customer_numbers)
    return np.array(rewards) / customer_numbers

In [3]:
testDistributions = np.array([[1,1,1,1], [5,3,2,1], [1,2,3,5]]) #Different kinds of distributions we want to test
fractionSamples = 2
temp = np.array([[np.random.dirichlet(dist) for x in range(fractionSamples)] for dist in testDistributions])
candidate_fractions = np.reshape(temp, (len(testDistributions)*fractionSamples, n_classes))
candidate_fractions

array([[0.1411255 , 0.01991622, 0.52607155, 0.31288673],
       [0.02068627, 0.17163491, 0.53461154, 0.27306728],
       [0.30082679, 0.35136909, 0.3172457 , 0.03055842],
       [0.5675038 , 0.17659871, 0.15551516, 0.10038232],
       [0.13494996, 0.10091749, 0.36392393, 0.40020863],
       [0.08435941, 0.19475364, 0.44666284, 0.27422411]])

In [4]:
from mab.ts_learner import TS_Learner
ts_learner = TS_Learner(candidate_fractions)
# iterate over a year
for i in range(365):
    
    
    fraction_idxs = ts_learner.select_fractions()
    print("Day", i+1, "- Selected Fractions:", fraction_idxs)
    fractions = []
    for j in range(4):
        fractions.append(candidate_fractions[fraction_idxs[j]])
    # set env for the day
    customerParams = zip([200,200,200,200], [50,50,50,50])
    env = Environment(customerParams, discounts, prices, conv_rate1, conv_rate2) #fractions, fraction_idxs)    
    rewards = simulation_day(env, fractions)
    for j in range(4):
        ts_learner.update(fraction_idxs[j], rewards[j], j)
print()
print("Winners:")
print(ts_learner.select_fractions())
    

60 - Selected Fractions: [4, 4, 1, 1]
[ 98.78172589 123.47058824  69.52054795 177.18894009]
Day 161 - Selected Fractions: [4, 1, 1, 1]
[ 85.6302521  131.1         66.55172414 162.11711712]
Day 162 - Selected Fractions: [2, 0, 1, 1]
[ 96.22727273 120.          80.31847134 177.90393013]
Day 163 - Selected Fractions: [4, 4, 1, 4]
[115.18518519 155.03378378  76.91891892 162.71186441]
Day 164 - Selected Fractions: [4, 0, 1, 4]
[ 81.59574468 142.         109.79166667 186.54618474]
Day 165 - Selected Fractions: [0, 4, 1, 1]
[115.33333333 131.06666667  79.02912621 171.93333333]
Day 166 - Selected Fractions: [4, 4, 1, 1]
[ 91.88811189 155.23622047  72.56578947 160.83682008]
Day 167 - Selected Fractions: [4, 4, 1, 4]
[106.3        134.6031746  100.26666667 165.54744526]
Day 168 - Selected Fractions: [0, 4, 1, 4]
[ 88.89344262 164.27272727  73.51648352 179.68553459]
Day 169 - Selected Fractions: [4, 4, 1, 1]
[107.7254902  136.94736842  94.90825688 158.15668203]
Day 170 - Selected Fractions: [4, 4

In [5]:
candidate_fractions

array([[0.1411255 , 0.01991622, 0.52607155, 0.31288673],
       [0.02068627, 0.17163491, 0.53461154, 0.27306728],
       [0.30082679, 0.35136909, 0.3172457 , 0.03055842],
       [0.5675038 , 0.17659871, 0.15551516, 0.10038232],
       [0.13494996, 0.10091749, 0.36392393, 0.40020863],
       [0.08435941, 0.19475364, 0.44666284, 0.27422411]])

In [6]:
rewards = np.zeros([4,len(candidate_fractions)])

In [7]:
customer_off = [200, 200, 200, 200]
for i in range(4):
    rewards[i][:] += customer_off[i]*prices[0]*conv_rate1[i]
    for j in range(len(candidate_fractions)):
        for k in range(len(candidate_fractions[j])):
            rewards[i][j] += customer_off[i]*conv_rate1[i]*conv_rate2[k][i]*candidate_fractions[j][k]*(1-discounts[k])*prices[1]
            
            

In [8]:
rewards

array([[19899.96395667, 19634.1961421 , 17944.9707144 , 18074.4650106 ,
        19959.18486098, 19457.01620125],
       [29664.84599358, 29156.94271182, 26858.63541905, 26990.14108507,
        29257.95604104, 28782.89659443],
       [16973.18625096, 16744.68377677, 15693.25591045, 15782.53424569,
        16845.43781449, 16592.24486653],
       [35492.3789171 , 34773.55371839, 30853.66448686, 31022.39167282,
        35125.60225165, 34214.96011984]])