In [1]:
from scipy.optimize import linear_sum_assignment
import numpy as np
from mab.environment import Environment
import random

#Customers
customerParams = zip([200,200,200,200], [50,50,50,50])
#discounts
discounts = np.array([0.0, 0.5, 0.10, 0.25])
#Prices
prices = np.array([250, 400])
#Conversion Rates for item 1
conv_rate1 = [0.3, 0.4, 0.25, 0.45]
#Conversion Rates for item 2 [discountedPrice x Class]
conv_rate2 = np.array([[0.1, 0.2, 0.15, 0.2],
                    [0.15, 0.25, 0.2, 0.25],
                    [0.2, 0.35, 0.25, 0.4],
                    [0.4, 0.45, 0.35, 0.6]])

n_classes = 4


In [2]:
def simulation_offline(env, fractions):
    reward = 0
    promo = -1
    c = random.randint(0, 3)
    while env.customers[c] == 0:
        c = random.randint(0, 3)
    env.customers[c] -= 1
    rand = random.random() # random variable to decide whether the customer buys or not
    if rand < conv_rate1[c]: # case when the customer buys the first item
        reward = prices[0]
        promo_p = np.cumsum(fractions[c])
        rand = random.random()
        for j in range(len(promo_p)):
            if rand < promo_p[j]:
                promo = j
                break
        rand = random.random()
        if rand < conv_rate2[promo][c]:
            reward += prices[1]*(1-discounts[promo])
    return c, promo, reward

def simulation_day(env, fractions):
    rewards = [0, 0, 0, 0]
    customer_numbers = np.array([num for num in env.customers])
    for i in range(np.array(env.customers).sum()):
        c, promo, reward = simulation_offline(env, fractions)
        rewards[c] += reward
    print(np.array(rewards) / customer_numbers)
    return np.array(rewards) / customer_numbers

In [3]:
testDistributions = np.array([[1,1,1,1], [5,3,2,1], [1,2,3,5]]) #Different kinds of distributions we want to test
fractionSamples = 2
temp = np.array([[np.random.dirichlet(dist) for x in range(fractionSamples)] for dist in testDistributions])
candidate_fractions = np.reshape(temp, (len(testDistributions)*fractionSamples, n_classes))
candidate_fractions

array([[0.48589393, 0.07648257, 0.00532058, 0.43230292],
       [0.02641697, 0.58954782, 0.026008  , 0.35802721],
       [0.42334113, 0.28502798, 0.23445409, 0.0571768 ],
       [0.43576017, 0.35961781, 0.12232586, 0.08229616],
       [0.05241566, 0.10008279, 0.2621244 , 0.58537715],
       [0.01326206, 0.22166788, 0.32594037, 0.43912968]])

In [4]:
from mab.ts_learner import TS_Learner
ts_learner = TS_Learner(candidate_fractions)
# iterate over a year
for i in range(365):
    
    
    fraction_idxs = ts_learner.select_fractions()
    print("Day", i+1, "- Selected Fractions:", fraction_idxs)
    fractions = []
    for j in range(4):
        fractions.append(candidate_fractions[fraction_idxs[j]])
    # set env for the day
    customerParams = zip([200,200,200,200], [50,50,50,50])
    env = Environment(customerParams, discounts, prices, conv_rate1, conv_rate2, fractions, fraction_idxs)    
    rewards = simulation_day(env, fractions)
    for j in range(4):
        ts_learner.update(fraction_idxs[j], rewards[j], j)
print()
print("Winners:")
print(ts_learner.select_fractions())
    

Day 1 - Selected Fractions: [3, 5, 1, 2]
[ 94.33035714 169.16201117  79.75       155.68807339]
3
5
1
2
Day 2 - Selected Fractions: [4, 4, 2, 0]
[105.10309278 145.01992032  84.85232068 177.71084337]
4
4
2
0
Day 3 - Selected Fractions: [0, 1, 5, 5]
[ 93.10344828 128.92156863  97.72321429 175.42635659]
0
1
5
5
Day 4 - Selected Fractions: [1, 0, 0, 4]
[ 92.72300469 152.55395683  81.75675676 179.20930233]
1
0
0
4
Day 5 - Selected Fractions: [2, 2, 4, 0]
[102.3943662  101.4953271   71.66023166 183.33333333]
2
2
4
0
Day 6 - Selected Fractions: [5, 3, 3, 3]
[ 82.1        118.56666667  73.87978142 144.9382716 ]
5
3
3
3
Day 7 - Selected Fractions: [0, 5, 2, 1]
[101.98019802 167.06521739  78.38842975 153.03191489]
0
5
2
1
Day 8 - Selected Fractions: [2, 0, 5, 4]
[ 85.08474576 120.          72.69565217 177.62711864]
2
0
5
4
Day 9 - Selected Fractions: [4, 5, 1, 4]
[105.30172414 149.58158996  86.59217877 178.56115108]
4
5
1
4
Day 10 - Selected Fractions: [4, 5, 3, 5]
[ 96.76829268 164.35185185  91.

In [5]:
candidate_fractions

array([[0.48589393, 0.07648257, 0.00532058, 0.43230292],
       [0.02641697, 0.58954782, 0.026008  , 0.35802721],
       [0.42334113, 0.28502798, 0.23445409, 0.0571768 ],
       [0.43576017, 0.35961781, 0.12232586, 0.08229616],
       [0.05241566, 0.10008279, 0.2621244 , 0.58537715],
       [0.01326206, 0.22166788, 0.32594037, 0.43912968]])

In [7]:
rewards = np.zeros([4,len(candidate_fractions)])

In [8]:
customer_off = [200, 200, 200, 200]
for i in range(4):
    rewards[i][:] += customer_off[i]*prices[0]*conv_rate1[i]
    for j in range(len(candidate_fractions)):
        for k in range(len(candidate_fractions[j])):
            rewards[i][j] += customer_off[i]*conv_rate1[i]*conv_rate2[k][i]*candidate_fractions[j][k]*(1-discounts[k])*prices[1]
            
            

In [9]:
rewards

array([[19439.37998412, 18814.73726126, 17953.58369123, 17814.11652961,
        20653.03949826, 20000.62727183],
       [28138.15440033, 26656.11437742, 26830.30180995, 26349.17953555,
        29700.07854811, 28999.62826679],
       [16404.17986483, 15755.02539342, 15695.30094919, 15509.03734309,
        17110.20239544, 16755.28446749],
       [33414.86986354, 31480.27182476, 30795.47119216, 30174.29431429,
        36208.00735706, 34931.08042508]])

In [10]:
for i in range(4):
    print(np.argmax(rewards[i]))

4
4
4
4


In [11]:
    env = Environment(customerParams, discounts, prices, conv_rate1, conv_rate2, [], [])    



ValueError: not enough values to unpack (expected 2, got 0)

In [12]:
def aaa():
    print(conv_rate1)
aaa()

[0.3, 0.4, 0.25, 0.45]
