Consider the case in which all the users belong to class C1, and no information about the advertising and pricing curves is known beforehand. Apply the GP-UCB and GP-TS algorithms when using GPs to model the two advertising curves, reporting the plots of the average (over a sufficiently large number of runs) value and standard deviation of the cumulative regret, cumulative reward, instantaneous regret, and instantaneous reward.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from collections import deque

from utils.User_Classes import UserClass
from utils.Clairvoyant import find_optimal_bid_for_class
from utils.tools import calculate_margin, calculate_price_index, fun
from utils.learners.TS_Learner import TS_Learner
#from p3.bidding_enviroment import BiddingEnvironment, fun
from utils.tools import fun
#from p3.GPTS_learner import  GPTS_Learner3

import warnings
warnings.filterwarnings("ignore")

# Environment

In [4]:
#TODO: move to p3
class Princing_Environment_3(): #m
    def __init__(self, n_arms, user_class):
        self.user_class = user_class
        self.time = 0
        self.n_arms = n_arms

    def round(self, user_class, price, n):
        successes = np.random.binomial(n, user_class.get_conversion_probabilities(price)) # Number of samples n
        return successes

In [None]:
class Bidding_Environment_3:
    def __init__(self, bids, access_sigma, cost_sigma, user_class, n_arms):
        self.bids = bids
        self.acc_means = self.initialize_accesses(user_classes=user_class, bids=bids)
        self.cost_means = self.initialize_cost(user_classes=user_class, bids=bids)
        self.acc_sigmas = np.ones(len(bids)) * access_sigma
        self.cost_sigmas = np.ones(len(bids)) * cost_sigma
        self.n_arms = n_arms

    #TODO: probabily remove
    def initialize_means(self, user_class, bids, price):
        means = np.zeros(len(bids))
        for i in range(len(means)):
                means[i] = fun(user_class, bids[i], price)
        return means
    
    def initialize_accesses(self, user_class, bids):
        means = np.zeros(len(bids))
        for j, b in enumerate(bids):
            means[j] = user_class.get_click_bids(b)
        return means

    def initialize_cost(self, user_class, bids):
        means = np.zeros(len(bids))
        for j, b in enumerate(bids):
            means[j] = user_class.get_cost_per_click(b)
        return means

    def round(self, pulled_arm):
        sample_accesses = np.random.normal(self.acc_means[pulled_arm], self.acc_sigmas[pulled_arm])
        sample_cost = np.random.normal(self.cost_means[pulled_arm], self.cost_sigmas[pulled_arm])
        #Handle the exceptions
        if sample_cost > self.bids[pulled_arm]:
            sample_cost = self.bids[pulled_arm]
        if sample_cost < 0:
            sample_cost = 0
        if int(sample_accesses) < 0:
            sample_accesses = self.acc_means[pulled_arm]

        return int(sample_accesses), sample_cost

In [2]:
#Environment
n_arms = 100

prices = [50, 100, 150, 200, 250]
bids = np.linspace(0.01, 3.0, n_arms)
access_sigma = 50
cost_sigma = 10

user = UserClass(name='C1')

T = 20
n_experiments = 5

gpts_rewards_per_experiment = []
gpucb_rewards_per_experiment = []


#optimum_bid = find_optimal_bid_for_class(user.user_index, calculate_price_index(prices[3]))

In [None]:
# Create Environment
pr_env = Princing_Environment_3(n_arms, user)
bid_env = Bidding_Environment_3(bids, access_sigma, cost_sigma, user, n_arms)

In [None]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
import numpy as np
from utils.learners.Learner import Learner

#This learner consider the access and the cost
class GPTS_Learner3(Learner):
    def __init__(self, n_arms, arms):
        super().__init__(n_arms)
        self.arms = arms
        self.acc_means = np.zeros(self.n_arms)
        self.acc_sigmas = np.ones(self.n_arms)
        self.cost_means = np.zeros(self.n_arms)
        self.cost_sigmas = np.ones(self.n_arms)
        self.pulled_arms = []
        self.collected_clicks = []
        self.collected_costs = []
        alpha_acc = 1000
        kernel_acc = C(100, (100, 1e6)) * RBF(10, (1e-1, 1e6))
        self.gp_acc = GaussianProcessRegressor(kernel=kernel_acc, alpha=alpha_acc, normalize_y=False, n_restarts_optimizer=1)

        alpha_cost = 0.3
        kernel_cost = C(0.1, (1, 1e2)) * RBF(0.1, (1, 1e2))
        self.gp_cost = GaussianProcessRegressor(kernel=kernel_cost, alpha=alpha_cost, normalize_y=False, n_restarts_optimizer=1)

    def update_observations_gpts(self, pulled_arm, clicks, costs):
        # self.rewards_per_arm[pulled_arm].append(reward)
        self.collected_clicks = np.append(self.collected_clicks, clicks)
        self.collected_costs = np.append(self.collected_costs, costs)
        self.pulled_arms.append(self.arms[pulled_arm])

    def update_model(self):
        x = np.atleast_2d(self.pulled_arms).T
        y = self.collected_clicks
        self.gp_acc.fit(x, y)
        self.acc_means, self.acc_sigmas = self.gp_acc.predict(np.atleast_2d(self.arms).T, return_std=True)
        self.acc_sigmas = np.maximum(self.acc_sigmas, 30)

        x = np.atleast_2d(self.pulled_arms).T
        y = self.collected_costs
        self.gp_cost.fit(x, y)
        self.cost_means, self.cost_sigmas = self.gp_cost.predict(np.atleast_2d(self.arms).T, return_std=True)
        self.cost_sigmas = np.maximum(self.cost_sigmas, 0.01)

    def update(self, pulled_arm, clicks, costs):
        self.t += 1
        self.update_observations_gpts(pulled_arm, clicks, costs)
        self.update_model()

    def pull_arm(self, conv_rate, margin):
        """
        if self.t < self.n_arms:
            return self.t  # % self.n_arms
        try:
            conv_rate = pricing_learner.beta_parameters[price_idx, 0] / (pricing_learner.beta_parameters[price_idx, 0]
                                                                         + pricing_learner.beta_parameters[
                                                                             price_idx, 1])
        except ZeroDivisionError:
            conv_rate = 0
            print('DIV 0')
        #poisson = pricing_learner.poisson_vector[price_idx, 0] + 1
        """
        

        exp_rew = np.random.normal(self.acc_means * (np.ones(shape=self.n_arms) * margin * conv_rate - self.cost_means), 50) 
        #Probabily change in self.acc_means * (np.ones(shape=self.n_arms) in self.means

        bid_idx = np.argmax(exp_rew)

        return bid_idx

In [5]:
for e in tqdm(range(n_experiments), desc='Number of experiments'):
    # Create Learner
    pricing_learner = TS_Learner(n_arms)
    bidding_learner = GPTS_Learner3(n_arms, arms=bids)
    #bidding_learner = GPUCB_Learner3(n_arms, arms=bids)
    ts_rewards = []

    # generate empty deque
    arms = []
    bids_p = []
    first = True

    # Simulate experiments
    for d in range(T):

        # choose arms
        pricing_pulled_arm = pricing_learner.pull_arm()
        print("Pricing pulled arm:", pricing_pulled_arm)
        arms.append(pricing_pulled_arm)
        #Calculate the conversion rate of the price choose
        conv_rate =pricing_learner.beta_parameters[pricing_pulled_arm, 0] / (pricing_learner.beta_parameters[pricing_pulled_arm, 0]
            + pricing_learner.beta_parameters[pricing_pulled_arm, 1])

        # Fixed the price, I can find the bid that maximize the expected reward
        bidding_pulled_arm = bidding_learner.pull_arm(conv_rate=conv_rate, margin=calculate_margin(prices[pricing_pulled_arm]))

        # simulate accesses
        # empty daily reward
        ts_daily = {'reward': 0, 'successes': 0, 'clicks': 0}
        cost = 0 #costpc - ntrials = n_accesses

        n_accesses, cost = bid_env.round(pulled_arm=bidding_pulled_arm, user_c=i)
        # quanti acquisti
        ts_successes = pr_env.round(user, pricing_pulled_arm, n_accesses)
        # aggiorno beta
        #TODO: create a TS_Learner for the pricing that handles update of accesses and cost
        pricing_learner.update(pricing_pulled_arm, ts_successes, n_trials)

        # aggiorno reward_giornaliera
        ts_daily['reward'] += (ts_successes + ts_class_returns) * margin(prices[pricing_pulled_arm]) - n_trials * costpc
        ts_daily['successes'] += ts_successes
        ts_daily['clicks'] += n_trials

        # work on dequy
        ts_dicty = {'arm': pricing_pulled_arm, 'sample': ts_daily['successes']}
        ts_dequy.append(ts_dicty)
        pricing_learner.update_poisson(ts_dequy.popleft())
        bidding_learner.update(pulled_arm=bidding_pulled_arm, costs=costpc, clicks=ts_daily['clicks'])
        # save daily reward
        ts_rewards.append(ts_daily['reward'])
        if check_convergence(arms)[0] and first:
            Didi = {'arm': check_convergence(arms)[1], 'day': d, 'exp': e}
            first = False
            conv_arms.append(Didi)
            # print(check_convergence(arms)[1])
        bids_p.append(bidding_pulled_arm)
        if check_convergence(bids_p, crit=0.7)[0]:
            print('Conv bid', check_convergence(bids_p, crit=0.7)[1])

    final_bids.append(bids_p)
    # print(arms)
    ts_rewards.insert(0, 0)
    ts_final_rewards.append(ts_rewards)
final_bids = np.array(final_bids)



Number of experiments:   0%|          | 0/5 [00:00<?, ?it/s]




78


IndexError: list index out of range

# Test

In [None]:
from p1.pricing_environment import *

In [None]:
n_arms = 5

env = Environment_Pricing_2(n_arms=n_arms, p = p)
prices = env.prices

T = 365 #time steps for each experiment

n_experiments = 1000

ts_rewards_per_experiment = [] #list to store the collected rewards for TS_Learner over each experiment
pulled_arm_number = [0 for i in range(0,5)]
# Loop over the experiments
for e in tqdm(range(0, n_experiments)):
    env_pr = Environment_Pricing_2(n_arms=n_arms, p = p)
    ts_learner = TS_Learner(n_arms=n_arms)
    for t in range(0,T):
        #Thompson sampling
        pulled_arm = ts_learner.pull_arm()
        reward = env_pr.round(class_index=0, price_index=pulled_arm, bid=1)
        #print(reward)
        ts_learner.update(pulled_arm, reward/34627)
        ts_learner.update_observations(pulled_arm, reward)
        pulled_arm_number[pulled_arm] += 1


    ts_rewards_per_experiment.append(ts_learner.collected_rewards)

# Compute the mean and standard deviation of the cumulative reward at each round
mean_cum_reward_ts = np.mean(ts_rewards_per_experiment, axis=0)
#std_cum_reward_ts = np.std(ts_rewards_per_experiment, axis=0)

reward_ts = mean_cum_reward_ts

#std_cum_reward_ucb = np.std(ucb_reward_per_experiment, axis=0)

# Trash

In [None]:
class Pricing_Environment():
    def __init__(self, n_arms, probabilities, prices=None):
        self.n_arms = n_arms
        self.probabilities = probabilities
        self.prices = prices

    def round(self, pulled_arm, user_c, n_trials=1):
        successes = np.random.binomial(n_trials, self.probabilities[user_c.index][pulled_arm])
        number_returns = np.random.poisson(successes * self.poissons[user_c.index])
        marg = self.margins[pulled_arm] * (number_returns + successes)
        return successes, number_returns

In [None]:
class Bidding_Enviroment():
    def __init__(self, bids, acc_sigma, cost_sigma, user_classes, n_arms):
        self.bids = bids
        self.acc_means = self.initialize_accesses(user_classes=user_classes, bids=bids)
        self.cost_means = self.initialize_cost(user_classes=user_classes, bids=bids)
        self.acc_sigmas = np.ones(len(bids)) * acc_sigma
        self.cost_sigmas = np.ones(len(bids)) * cost_sigma
        self.n_arms = n_arms

    def initialize_accesses(self, user_classes, bids):
        means = np.zeros(shape=(len(user_classes), len(bids)))
        for ii, c in enumerate(user_classes):
            for j, b in enumerate(bids):
                means[ii, j] = c.clicks(b)
        return means

    def initialize_cost(self, user_classes, bids):
        means = np.zeros(shape=(len(user_classes), len(bids)))
        for ii, c in enumerate(user_classes):
            for j, b in enumerate(bids):
                means[ii, j] = cost_per_click(b)
        return means

    def round(self, pulled_arm, user_c):
        sample_accesses = np.random.normal(self.acc_means[user_c, pulled_arm], self.acc_sigmas[pulled_arm])
        sample_cost = np.random.normal(self.cost_means[user_c, pulled_arm], self.cost_sigmas[pulled_arm])
        if sample_cost > self.bids[pulled_arm]:
            sample_cost = self.bids[pulled_arm]
        if sample_cost < 0:
            sample_cost = 0
        if int(sample_accesses) < 0:
            sample_accesses = self.acc_means[user_c, pulled_arm]

        return int(sample_accesses), sample_cost

In [None]:
def fun(user_class, bid, price):
    mean_per_bid = user_class.get_click_bids(bid) * (user_class.get_conversion_per_price(price) * calculate_margin(price)) - user_class.get_click_bids(bid) * user_class.get_cost_per_click(bid)    #sample_cost
    return mean_per_bid

class BiddingEnvironment:
    def __init__(self, bids, sigma, user_class, price, n_arms):
        self.bids = bids
        self.means = self.initialize_means(user_class=user_class, bids=bids, price=price)
        self.sigmas = np.ones(len(bids)) * sigma
        self.price = price
        self.n_arms = n_arms

    def initialize_means(self, user_class, bids, price):
        means = np.zeros(len(bids))
        for i in range(len(means)):
                means[i] = fun(user_class, bids[i], price)
        return means

    def round(self, pulled_arm):
        return np.random.normal(self.means[pulled_arm], self.sigmas[pulled_arm])