In [50]:
from scipy.optimize import linear_sum_assignment
import Environment
import Shop
from mab.ucb_learner import *
from utilities import *

In [52]:
shop_ucb = Shop.Shop()

env = Environment.Environment(shop_ucb.prices1, shop_ucb.prices2, shop_ucb.discounts)

shop_ucb.set_conv_rate(env.true_conv1, env.true_conv2)

arms = [(i, j) for i in range(shop_ucb.n_classes) for j in range(len(shop_ucb.discounts))]
n_arms = len(arms)
shop_ucb.set_assignment_learner('UCB', n_arms)

In [53]:
price1_index = 1
price2_index = 3

price1 = env.prices1[price1_index]
price2 = env.prices2[price2_index]

max_ind_reward = price1 + price2
generated_promo_ratios = np.array([0.4, 0.25, 0.2, 0.15])

In [57]:
days = 365

u_rewards_per_class_per_day = []

customers_em = np.array([0, 0, 0, 0])
customers_ie = np.array([25, 25, 25, 25])

for day in range(days):
    if day == 0:
        shop_ucb.set_expected_customers(customers_ie)
    else:
        customers_em = (customers_em*(day-1) + env.customers) / day
        shop_ucb.set_expected_customers((customers_em + 0.99).astype(int))
    
    # do the matching and get promo fractions #############
    
    # get expected list of promos
    en_promos = shop_ucb.customers.sum()*generated_promo_ratios
    en_promos = en_promos.astype(int)
    en_promos[0] += shop_ucb.customers.sum() - en_promos.sum()

    #print(shop_ucb.customers.sum() - en_promos.sum())

    expected_promos = list(enumerate(en_promos))
    expected_promos = np.concatenate([np.ones(c).astype(int)*p for p,c in expected_promos])

    # get expected list of customers
    expected_customers = list(enumerate(shop_ucb.customers))
    expected_customers = np.concatenate([np.ones(c).astype(int)*p for p,c in expected_customers])


    #print(len(expected_customers))
    



    # generate the connections and their expected weights

    graph = np.zeros((len(expected_customers), len(expected_promos)))
    for i in range(len(expected_customers)):
        for j in range(len(expected_promos)):
            arm_index = arms.index((expected_customers[i], expected_promos[j]))
            graph[i,j] = shop_ucb.assignment_learner.confidence[arm_index] + shop_ucb.assignment_learner.empirical_means[arm_index]
            if graph[i,j] == np.inf:
                graph[i,j] = 1e3

    # match according to the weights
    matched_c, matched_p = linear_sum_assignment(-graph)
    matched_tuples = [(expected_customers[c], expected_promos[p]) for c,p in zip(matched_c, matched_p)]
    
    # calculate the promo fractions for the simulation
    promo_fractions = np.zeros((shop_ucb.n_classes, len(shop_ucb.discounts)))

    for t in matched_tuples:
        promo_fractions[t[0], t[1]] += 1
   
    promo_fractions /= shop_ucb.customers.reshape(4,1)  
    #######################################################

    print(promo_fractions)
    print("**************\n***************")
 
    # simulate the day like previous steps
    env.generate_next_day_customers()

    enum_customers = list(enumerate(env.customers))
    customers = np.concatenate([np.ones(c).astype(int)*p for p,c in enum_customers])
    np.random.shuffle(customers)

    revenue_ucb = []
    ucb_total_reward = 0
    ucb_norm_reward = 0
    c_count = 0

    for cust in customers:
        c_count += 1
        ts_ind_reward = 0
        ucb_ind_reward = 0

        #select the promo for the customer with given class
        selected_promo = np.random.choice([0, 1, 2, 3], 1, p=promo_fractions[cust])

        ucb_buy1 = env.round1(cust, price1)
        ucb_buy2 = 0
        if ucb_buy1:
            ucb_buy2 = env.round2(cust, selected_promo, price2)

        ### TODO should we consider all the shopping for reward or just item 2

        ucb_ind_reward = ucb_buy1*price1 + ucb_buy2*price2*(1-shop_ucb.discounts[selected_promo])

        norm_ucb_ind_reward = ucb_ind_reward / max_ind_reward

        shop_ucb.assignment_learner.update(arms.index((cust, selected_promo)), norm_ucb_ind_reward)

        revenue_ucb.append(ucb_ind_reward)
    
    u_rewards_per_class_per_day.append(np.mean(revenue_ucb))




[(0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (2, 0), (2, 0), (2, 0), (2, 0), (2, 0), (2, 3), (2, 3), (2, 3), (2, 3), (2, 3), (2, 3), (2, 3), (2, 3), (2, 3), (2, 3), (2, 0), (2, 0), (2, 0), (2, 0), (2, 0), (2, 0), (2, 0), (2, 0), (2, 0), (2, 0), (3, 2), (3, 2), (3, 2), (3, 2), (3, 2), (3, 2), (3, 2), (3, 2), (3, 2), (3, 2), (3, 2), (3, 2), (3, 2), (3, 2), (3, 2), (3, 2), (3, 2), (3, 2), (3, 2), (3, 2), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3)]


In [55]:
shop_ucb.assignment_learner.confidence

array([0.29691989, 0.67924819, 0.48851187, 0.69086011, 0.47249071,
       0.56087073, 0.46145827, 0.567355  , 0.44152972, 0.39886827,
       0.3965955 , 0.7296296 , 0.3648148 , 0.48435424, 0.50165803,
       0.63804257])

In [56]:
shop_ucb.customers


array([28, 23, 27, 26])

In [40]:
promo_fractions / shop_ucb.customers

array([[0.        , 0.        , 0.74074074, 0.31818182],
       [0.81481481, 0.12      , 0.        , 0.        ],
       [0.7037037 , 0.        , 0.        , 0.36363636],
       [0.        , 0.88      , 0.        , 0.        ]])

In [43]:
promo_fractions / shop_ucb.customers.reshape(4,1)

array([[0.        , 0.        , 0.74074074, 0.25925926],
       [0.88      , 0.12      , 0.        , 0.        ],
       [0.7037037 , 0.        , 0.        , 0.2962963 ],
       [0.        , 1.        , 0.        , 0.        ]])