In [1]:
from scipy.optimize import linear_sum_assignment
import numpy as np
from mab.environment import Environment

#Customers
customerParams = zip([200,200,200,200], [50,50,50,50])
#discounts
discounts = np.array([0.0, 0.5, 0.10, 0.25])
#Prices
prices = np.array([250, 400])
#Conversion Rates for item 1
conv_rate1 = [0.3, 0.4, 0.25, 0.45]
#Conversion Rates for item 2 [discountedPrice x Class]
conv_rate2 = np.array([[0.1, 0.2, 0.15, 0.2],
                    [0.15, 0.25, 0.2, 0.25],
                    [0.2, 0.35, 0.25, 0.4],
                    [0.4, 0.45, 0.35, 0.6]])

n_classes = 4


In [2]:
testDistributions = np.array([[1,1,1,1], [5,3,2,1], [1,2,3,5]]) #Different kinds of distributions we want to test
fractionSamples = 2
temp = np.array([[np.random.dirichlet(dist) for x in range(fractionSamples)] for dist in testDistributions])
candidate_fractions = np.reshape(temp, (len(testDistributions)*fractionSamples, n_classes))
candidate_fractions

array([[0.18968759, 0.0443646 , 0.03487403, 0.73107378],
       [0.43865535, 0.13902183, 0.18398583, 0.23833699],
       [0.68532442, 0.17903725, 0.12732931, 0.00830902],
       [0.44389737, 0.27066957, 0.22433461, 0.06109845],
       [0.00643465, 0.11718596, 0.55636583, 0.32001356],
       [0.11558527, 0.08617108, 0.04052628, 0.75771737]])

In [3]:
# will try bandit
# a round = a day, reward[c] = profit for the day for given customer class
from mab.ts_learner import TS_Learner
ts_learner = TS_Learner(len(candidate_fractions))
fraction_idxs = ts_learner.select_fractions()
ts_learner.update(4, 1, 3)


4


In [4]:
from mab.ts_learner import TS_Learner
ts_learner = TS_Learner(len(candidate_fractions))
# iterate over a year
for i in range(365):
    
    
    fraction_idxs = ts_learner.select_fractions()
    print("Day", i+1, "- Selected Fractions:", fraction_idxs)
    fractions = []
    for j in range(4):
        fractions.append(candidate_fractions[fraction_idxs[j]])
    # set env for the day
    customerParams = zip([200,200,200,200], [50,50,50,50])
    env = Environment(customerParams, discounts, prices, conv_rate1, conv_rate2, fractions, fraction_idxs)    
    rewards = env.round_day()
    for j in range(4):
        ts_learner.update(fraction_idxs[j], rewards[j], j)
print()
print("Winners:")
print(ts_learner.select_fractions())
    

Day 1 - Selected Fractions: [4, 4, 1, 0]
[104.12162162 151.10091743  86.99619772 191.27906977]
4
4
1
0
Day 2 - Selected Fractions: [5, 2, 5, 2]
[ 60.38461538 142.39263804  98.23170732 137.98701299]
5
2
5
2
Day 3 - Selected Fractions: [2, 0, 0, 5]
[ 89.74137931 161.11111111  86.26943005 152.42424242]
2
0
0
5
Day 4 - Selected Fractions: [3, 3, 3, 1]
[ 72.21804511 153.33333333  81.20535714 133.4       ]
3
3
3
1
Day 5 - Selected Fractions: [1, 5, 2, 4]
[100.48148148 133.82165605  80.78817734 167.30392157]
1
5
2
4
Day 6 - Selected Fractions: [0, 1, 4, 3]
[116.06557377 119.80099502  77.21311475 156.03773585]
0
1
4
3
Day 7 - Selected Fractions: [0, 3, 5, 0]
[ 97.55102041 124.5106383   92.41610738 172.68707483]
0
3
5
0
Day 8 - Selected Fractions: [4, 0, 5, 0]
[103.7890625  148.6013986  106.01851852 166.900369  ]
4
0
5
0
Day 9 - Selected Fractions: [1, 0, 5, 4]
[ 90.51546392 139.22222222  87.16216216 166.87116564]
1
0
5
4
Day 10 - Selected Fractions: [4, 4, 4, 0]
[117.47706422 159.1         99.

In [5]:
candidate_fractions

array([[0.18968759, 0.0443646 , 0.03487403, 0.73107378],
       [0.43865535, 0.13902183, 0.18398583, 0.23833699],
       [0.68532442, 0.17903725, 0.12732931, 0.00830902],
       [0.44389737, 0.27066957, 0.22433461, 0.06109845],
       [0.00643465, 0.11718596, 0.55636583, 0.32001356],
       [0.11558527, 0.08617108, 0.04052628, 0.75771737]])

In [6]:
rewards = np.zeros([4,len(candidate_fractions)])

In [7]:
customer_off = [200, 200, 200, 200]
for i in range(4):
    rewards[i][:] += customer_off[i]*prices[0]*conv_rate1[i]
    for j in range(len(candidate_fractions)):
        for k in range(len(candidate_fractions[j])):
            rewards[i][j] += customer_off[i]*conv_rate1[i]*conv_rate2[k][i]*candidate_fractions[j][k]*(1-discounts[k])*prices[1]
            
            

In [8]:
rewards

array([[20949.49349356, 18813.85724065, 17576.93320994, 17961.59324993,
        19933.97591116, 21063.15119906],
       [29638.58597837, 27792.09822348, 26475.44212803, 26844.77755646,
        29574.2396236 , 29676.28256444],
       [17152.86243045, 16173.21514305, 15530.65200103, 15703.30384738,
        16937.39329943, 17179.48243114],
       [36360.75394207, 32529.43234882, 30024.79739927, 30811.24552639,
        35468.38714188, 36520.22581647]])

In [9]:
for i in range(4):
    print(np.argmax(rewards[i]))

5
5
5
5
