# **Team**

> Balestrieri Niccolò - 10936955 <br>
  Bertogalli Andrea - 10702303 <br>
  Cavalieri Francesco - 11020855    
  Tombini Nicolò - 10912627


# Adversarial (highly non-stationary) environment

In [None]:
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
from tqdm import tqdm
from scipy import stats as scipystats
from scipy import optimize


np.random.seed(42)

In [None]:
class Environment:
  def __init__(self):
    raise NotImplementedError

  def round(self, a_t):
    raise NotImplementedError

## Pricing

### Environments definition

#### Adversarial Bandit

In [None]:
class AdversarialBanditPricingEnvironment(Environment):
    def __init__(self, time_steps, cost, prices,  sigma=0.05, drift_range=(-0.005, 0.005)):
        self.time_steps = time_steps
        self.sigma = sigma
        self.drift_range = drift_range
        self.mu = 0.3
        self.current_step = 0
        self.cost = cost
        self.prices = prices

    def update_means(self):
        drift = np.random.uniform(self.drift_range[0], self.drift_range[1])
        self.mu += drift

        self.mu = np.clip(self.mu, 0, 1)

    def purchase_probabilities(self):
      probabilities = []
      for price in self.prices:
        base_probability = 1 - price
        time_randomness = np.random.normal(self.mu, self.sigma)
        probabilities.append(base_probability + time_randomness)

      return np.clip(probabilities, 0, 1)

    def get_buying_probability(self, price):
      base_probability = 1 - price
      time_randomness = np.random.normal(self.mu, self.sigma)
      time_randomness = np.clip(time_randomness, 0, 1)

      return np.clip(base_probability + time_randomness, 0, 1)

    def round(self, p_t, n_t):
      d_t = np.random.binomial(n_t, self.get_buying_probability(p_t))

      r_t = (p_t - self.cost) * d_t

      probs = self.purchase_probabilities()
      possible_rewards = [(self.prices[i] - self.cost) * np.random.binomial(n_t, probs[i]) for i in range(len(self.prices))]
      l_t = max(possible_rewards) - r_t

      if self.current_step < self.time_steps:
          self.update_means()
          self.current_step += 1

      stats = {
          'possible_rewards': possible_rewards,
          'max_reward': max(possible_rewards),
      }

      return l_t, stats

    def is_terminated(self):
      return self.current_step >= self.time_steps


#### Adversarial Expert

In [None]:
class AdversarialExpertPricingEnvironment(Environment):
    def __init__(self, time_steps, cost, prices,  sigma=0.05, drift_range=(-0.01, 0.01)):
        self.time_steps = time_steps
        self.sigma = sigma
        self.drift_range = drift_range
        self.mu = 0.5
        self.current_step = 0
        self.cost = cost
        self.prices = prices

    def update_means(self):
        drift = np.random.uniform(self.drift_range[0], self.drift_range[1])
        self.mu += drift

        self.mu = np.clip(self.mu, 0, 1)

    def purchase_probabilities(self):
      probabilities = []
      for price in self.prices:
        base_probability = 1 - price
        time_randomness = np.random.normal(self.mu, self.sigma)
        probabilities.append(base_probability + time_randomness)

      return np.clip(probabilities, 0, 1)

    def get_buying_probability(self, price):
      base_probability = 1 - price
      time_randomness = np.random.normal(self.mu, self.sigma)

      time_randomness = np.clip(time_randomness, 0, 1)
      return np.clip(base_probability + time_randomness, 0, 1)

    def round(self, n_t):
      probs = self.purchase_probabilities()
      possible_rewards = [(self.prices[i] - self.cost) * np.random.binomial(n_t, probs[i]) for i in range(len(self.prices))]
      l_t = max(possible_rewards) - possible_rewards

      if self.current_step < self.time_steps:
          self.update_means()
          self.current_step += 1

      stats = {
          'possible_rewards': possible_rewards,
          'max_reward': max(possible_rewards),
      }

      return l_t, stats

    def is_terminated(self):
      return self.current_step >= self.time_steps

#### Probability over time

In [None]:
rounds = 100
prices = np.linspace(0, 1, 100)
adversarial_pricing_env = AdversarialBanditPricingEnvironment(time_steps=rounds, cost = 0, prices = prices)

allProbabilities = []

while not adversarial_pricing_env.is_terminated():
  probabilities = adversarial_pricing_env.purchase_probabilities()
  allProbabilities.append(probabilities)
  adversarial_pricing_env.round(0.1, 10)

In [None]:
fig = go.Figure()

for i in [0, 70, 90]:
    y = [allProbabilities[j][i] for j in range(len(prices))]
    fig.add_trace(go.Scatter(x=list(range(rounds)), y=y, mode='lines', name=f'Price {round(prices[i], 2)}'))

fig.update_layout(
    title='User Buying Probability Over Time',
    title_x=0.5,
    xaxis_title='Time',
    yaxis_title='Buying Probability',
    xaxis=dict(tickmode='linear', tick0=0, dtick=5, range=[0, rounds]),
    yaxis=dict(range=[0, 1]),
    legend=dict(title='Price'),
    font=dict(size=12, family="Arial"),
)

fig.show()

### Agents definition

In [None]:
class Agent:
  def __init__(self):
    raise NotImplementedError

  def pull_arm(self):
    raise NotImplementedError

  def update(self, r_t):
    raise NotImplementedError

In [None]:
class EXP3Agent(Agent):
    def __init__(self, K, learning_rate):
        self.K = K
        self.learning_rate = learning_rate
        self.weights = np.ones(K)
        self.a_t = None
        self.x_t = np.ones(K)/K
        self.N_pulls = np.zeros(K)
        self.t = 0

    def pull_arm(self):
        self.x_t = self.weights/sum(self.weights)
        self.a_t = np.random.choice(np.arange(self.K), p=self.x_t)
        return self.a_t

    def update(self, l_t):
        l_t_tilde = l_t/self.x_t[self.a_t]
        self.weights[self.a_t] *= np.exp(-self.learning_rate*l_t_tilde)
        self.N_pulls[self.a_t] += 1
        self.t += 1

In [None]:
class HedgeAgent(Agent):
    def __init__(self, K, learning_rate):
        self.K = K
        self.learning_rate = learning_rate
        self.weights = np.ones(K)
        self.x_t = np.ones(K)/K
        self.N_pulls = np.zeros(K)
        self.a_t = None
        self.t = 0

    def pull_arm(self):
        self.x_t = self.weights/sum(self.weights)
        self.a_t = np.random.choice(np.arange(self.K), p=self.x_t)
        return self.a_t

    def update(self, l_t):
        self.weights *= np.exp(-self.learning_rate*l_t)
        self.N_pulls[self.a_t] += 1
        self.t += 1

### EXP3 Evaluation

In [None]:
TRIALS = 8
K = 20
COST = 0.3
CUSTOMERS = 30
T = 7000
MIN_PRICE, MAX_PRICE = 0, 1
PRICES = np.linspace(MIN_PRICE, MAX_PRICE, K)
LR = np.sqrt(np.log(K) / (K * T))

Here we test the algorithm EXP3 against an adversarial environment (Bandit feedback)

In [None]:
stats_per_trial = {
    "Cumulative regret": [],
    "Cumulative reward": [],
    "Cumulative expected regret": [],
    "Clairvoyant rewards": [],
    "Clairvoyant losses":[],
    "Best price": [],
    "Best price index": [],
    "Agent pulls": []
}

for trial in range(TRIALS):
    np.random.seed(trial)

    agent = EXP3Agent(K, LR)
    env = AdversarialBanditPricingEnvironment(
        time_steps=T,
        cost=COST,
        prices=PRICES
    )

    rewards = []
    max_rewards = []
    agent_losses = []
    expected_agent_losses = []

    for t in range(T):
        a_t = agent.pull_arm()
        l_t, stats = env.round(PRICES[a_t], CUSTOMERS)
        agent.update(l_t)

        rewards.append(stats['possible_rewards'])
        max_rewards.append(stats['max_reward'])
        agent_losses.append(l_t)

    cumulative_rewards = np.sum(np.array(rewards), axis = 0)
    best_price = PRICES[np.argmax(cumulative_rewards)]
    agent_pulls = np.array(agent.N_pulls)

    clairvoyant_rewards = np.array(rewards)[:, np.argmax(cumulative_rewards)]
    clairvoyant_losses = np.array([max_reward - clairvoyant_reward for max_reward, clairvoyant_reward in zip(max_rewards, clairvoyant_rewards)])

    cumulative_regret = np.cumsum(np.array(agent_losses) - clairvoyant_losses)

    stats_per_trial["Cumulative regret"].append(cumulative_regret)
    stats_per_trial["Cumulative reward"].append(cumulative_rewards)
    stats_per_trial["Clairvoyant rewards"].append(clairvoyant_rewards)
    stats_per_trial["Clairvoyant losses"].append(clairvoyant_losses)
    stats_per_trial["Best price"].append(best_price)
    stats_per_trial["Best price index"].append(np.argmax(cumulative_rewards))
    stats_per_trial["Agent pulls"].append(agent_pulls)

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.cm as cm

def generate_colormap_colors(n):
    colormap = plt.colormaps['tab10']
    return [f'rgba({int(colormap(i)[0] * 255)}, {int(colormap(i)[1] * 255)}, {int(colormap(i)[2] * 255)}, 1)' for i in range(n)]

colors = generate_colormap_colors(TRIALS)

fig = make_subplots(rows=1, cols=2, subplot_titles=('Cumulative Regret per Trial', 'Cumulative Reward per Trial'))

for i in range(TRIALS):

    fig.add_trace(go.Scatter(
        x=list(range(T)),
        y=stats_per_trial["Cumulative reward"][i],
        mode='lines',
        name=f'Trial {i+1}',
        line=dict(color=colors[i]),
        showlegend = False
    ), row=1, col=1)

    fig.add_trace(go.Scatter(
        x=list(range(T)),
        y=stats_per_trial["Cumulative regret"][i],
        mode='lines',
        name=f'Trial {i+1}',
        line=dict(color=colors[i]),
        showlegend=True,
    ), row=1, col=2)




fig.update_layout(
    legend=dict(title='Trials'),
    font=dict(size=12, family="Arial"),
    title='Metrics per Trial',
    title_x=0.5,
    xaxis_title='t',

)

fig.update_yaxes(title_text='Cumulative Regret', row=1, col=2)
fig.update_yaxes(title_text='Cumulative Reward', row=1, col=1)

fig.show()


In [None]:
cumulative_regret = np.array(stats_per_trial["Cumulative regret"])
average_regret = np.mean(cumulative_regret, axis = 0)
regret_sd = np.std(cumulative_regret, axis = 0)
lower_bound = average_regret - regret_sd / np.sqrt(TRIALS)
upper_bound = average_regret + regret_sd / np.sqrt(TRIALS)
t_values = np.arange(T)

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=np.concatenate([t_values, t_values[::-1]]),
    y=np.concatenate([lower_bound, upper_bound[::-1]]),
    fill='toself',
    fillcolor='rgba(0, 0, 255, 0.2)',
    line=dict(color='rgba(255,255,255,0)'),
    hoverinfo='skip',
    showlegend=True,
    name='Uncertainty'
))


fig.add_trace(go.Scatter(
    x=t_values,
    y=average_regret,
    mode='lines',
    line=dict(color='blue'),
    name='Average Regret'
))

fig.update_layout(
    title='Cumulative regret of EXP3',
    xaxis_title='t',
    yaxis_title='Regret',
    width=600,
    legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1),
    hovermode='x unified'
)

fig.show()

In [None]:
import plotly.graph_objs as go

agent_pulls = stats_per_trial["Agent pulls"]
best_price = stats_per_trial["Best price"]

heatmap = go.Heatmap(
    z=agent_pulls,
    x=PRICES,
    y=list(range(1, len(agent_pulls) + 1)),
    colorscale='Viridis'
)


scatter = go.Scatter(
    x=[best_price[i] for i in range(TRIALS)],
    y=list(range(1, TRIALS + 1)),
    mode='markers',
    marker=dict(
        size=10,
        color='red',
        symbol='circle'
    ),
    name='Best Price'
)

layout = go.Layout(
    title="Number of pulls per arm",
    xaxis=dict(title="Arms"),
    yaxis=dict(title="Trials"),
    width=600,
)

fig = go.Figure(data=[heatmap, scatter], layout=layout)

fig.show()


### Hedge Evaluation

In [None]:
TRIALS = 8
K = 20
COST = 0.3
CUSTOMERS = 30
T = 7000
MIN_PRICE, MAX_PRICE = 0, 1
PRICES = np.linspace(MIN_PRICE, MAX_PRICE, K)
LR = np.sqrt(np.log(K) / T)

Here we test the algorithm Hedge against an adversarial environment (Adversarial feedback)

In [None]:
stats_per_trial = {
    "Cumulative regret": [],
    "Cumulative reward": [],
    "Cumulative expected regret": [],
    "Clairvoyant rewards": [],
    "Clairvoyant losses":[],
    "Best price": [],
    "Best price index": [],
    "Agent pulls": []
}

for trial in range(TRIALS):
    np.random.seed(trial)

    agent = HedgeAgent(K, LR)
    env = AdversarialExpertPricingEnvironment(
        time_steps=T,
        cost=COST,
        prices=PRICES
    )

    expected_agent_losses = []
    rewards = []
    max_rewards = []
    agent_losses = []

    for t in range(T):
        a_t = agent.pull_arm()
        l_t, stats = env.round(CUSTOMERS)
        agent.update(l_t)

        rewards.append(stats['possible_rewards'])
        max_rewards.append(stats['max_reward'])
        agent_losses.append(l_t[a_t])
        expected_agent_losses.append(np.dot(l_t, agent.x_t))


    cumulative_rewards = np.sum(np.array(rewards), axis = 0)
    best_price = PRICES[np.argmax(cumulative_rewards)]
    agent_pulls = np.array(agent.N_pulls)

    clairvoyant_rewards = np.array(rewards)[:, np.argmax(cumulative_rewards)]
    clairvoyant_losses = np.array([max_reward - clairvoyant_reward for max_reward, clairvoyant_reward in zip(max_rewards, clairvoyant_rewards)])

    cumulative_regret = np.cumsum(np.array(agent_losses) - clairvoyant_losses)
    cumulative_expected_regret = np.cumsum(np.array(expected_agent_losses) - clairvoyant_losses)

    stats_per_trial["Cumulative regret"].append(cumulative_regret)
    stats_per_trial["Cumulative reward"].append(cumulative_rewards)
    stats_per_trial["Clairvoyant rewards"].append(clairvoyant_rewards)
    stats_per_trial["Clairvoyant losses"].append(clairvoyant_losses)
    stats_per_trial["Best price"].append(best_price)
    stats_per_trial["Best price index"].append(np.argmax(cumulative_rewards))
    stats_per_trial["Agent pulls"].append(agent_pulls)
    stats_per_trial["Cumulative expected regret"].append(cumulative_expected_regret)

In [None]:
import plotly.graph_objs as go

agent_pulls = stats_per_trial["Agent pulls"]
best_price = stats_per_trial["Best price"]

heatmap = go.Heatmap(
    z=agent_pulls,
    x=PRICES,
    y=list(range(1, len(agent_pulls) + 1)),
    colorscale='Viridis'
)

scatter = go.Scatter(
    x=[best_price[i] for i in range(TRIALS)],
    y=list(range(1, TRIALS + 1)),
    mode='markers',
    marker=dict(
        size=10,
        color='red',
        symbol='circle'
    ),
    name='Best Price'
)

layout = go.Layout(
    title="Number of pulls per arm",
    xaxis=dict(
        title="Arms",
    ),
    yaxis=dict(title="Trials"),
    width=600,
)

fig = go.Figure(data=[heatmap, scatter], layout=layout)

fig.show()


In [None]:
import plotly.graph_objects as go
import matplotlib.cm as cm

def generate_colormap_colors(n):
    colormap = plt.colormaps['tab10']
    return [f'rgba({int(colormap(i)[0] * 255)}, {int(colormap(i)[1] * 255)}, {int(colormap(i)[2] * 255)}, 1)' for i in range(n)]

colors = generate_colormap_colors(TRIALS)

fig_regret = go.Figure()
for i in range(TRIALS):
    fig_regret.add_trace(go.Scatter(
        x=list(range(T)),
        y=stats_per_trial["Cumulative regret"][i],
        mode='lines',
        name=f'Trial {i+1}',
        line=dict(color=colors[i])
    ))

fig_regret.update_layout(
    title='Cumulative Regret per Trial',
    title_x=0.5,
    xaxis_title='t',
    yaxis_title='Cumulative Regret',
    legend=dict(title='Trials'),
    font=dict(size=12, family="Arial"),
)

fig_regret.show()

fig_expected_regret = go.Figure()
for i in range(TRIALS):
    fig_expected_regret.add_trace(go.Scatter(
        x=list(range(T)),
        y=stats_per_trial["Cumulative expected regret"][i],
        mode='lines',
        name=f'Trial {i+1}',
        line=dict(color=colors[i])
    ))

fig_expected_regret.update_layout(
    title='Cumulative Expected Regret per Trial',
    title_x=0.5,
    xaxis_title='t',
    yaxis_title='Cumulative Expected Regret',
    legend=dict(title='Trials'),
    font=dict(size=12, family="Arial")
)

fig_expected_regret.show()

fig_reward = go.Figure()
for i in range(TRIALS):
    fig_reward.add_trace(go.Scatter(
        x=list(range(T)),
        y=stats_per_trial["Cumulative reward"][i],
        mode='lines',
        name=f'Trial {i+1}',
        line=dict(color=colors[i])
    ))

fig_reward.update_layout(
    title='Cumulative Reward per Trial',
    title_x=0.5,
    xaxis_title='t',
    yaxis_title='Cumulative Reward',
    legend=dict(title='Trials'),
    font=dict(size=12, family="Arial")
)

fig_reward.show()


In [None]:
cumulative_regret = np.array(stats_per_trial["Cumulative regret"])
average_regret = np.mean(cumulative_regret, axis = 0)
regret_sd = np.std(cumulative_regret, axis = 0)
lower_bound = average_regret - regret_sd / np.sqrt(TRIALS)
upper_bound = average_regret + regret_sd / np.sqrt(TRIALS)
t_values = np.arange(T)

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=np.concatenate([t_values, t_values[::-1]]),
    y=np.concatenate([lower_bound, upper_bound[::-1]]),
    fill='toself',
    fillcolor='rgba(0, 0, 255, 0.2)',
    line=dict(color='rgba(255,255,255,0)'),
    hoverinfo='skip',
    showlegend=True,
    name='Uncertainty'
))


fig.add_trace(go.Scatter(
    x=t_values,
    y=average_regret,
    mode='lines',
    line=dict(color='blue'),
    name='Average Regret'
))

fig.update_layout(
    title='Cumulative regret of HEDGE',
    xaxis_title='t',
    yaxis_title='Regret',
    width=600,
    legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1),
    hovermode='x unified'
)

fig.show()

## Bidding

### Environment definition

In [None]:
class AdversarialBiddingEnvironment(Environment):
    def __init__(self, num_agents, time_steps, sigma=0.2, drift_range=(-0.1, 0.1)):
        self.num_agents = num_agents
        self.time_steps = time_steps
        self.sigma = sigma
        self.drift_range = drift_range
        self.mu = 0.5
        self.current_step = 0

    def update_means(self):
        drift = np.random.uniform(self.drift_range[0], self.drift_range[1])
        self.mu += drift

        self.mu = np.clip(self.mu, 0, 1)

    def get_bids(self):
        bids = np.random.normal(self.mu, self.sigma, size=self.num_agents)
        bids = np.clip(bids, 0, 1)
        return bids

    def round(self):
        if self.current_step < self.time_steps:
            self.update_means()
            self.current_step += 1

    def is_terminated(self):
      return self.current_step >= self.time_steps

In [None]:
n_users = 100
n_advertisers = 5

adversarial_bidding_env = AdversarialBiddingEnvironment(num_agents=n_advertisers, time_steps=n_users)

allBids = []

while not adversarial_bidding_env.is_terminated():
  bids = adversarial_bidding_env.get_bids()
  allBids.append(bids)
  adversarial_bidding_env.round()

In [None]:
fig = go.Figure()

mean_bids = [np.mean(bids) for bids in allBids]
min_bids = [np.min(bids) for bids in allBids]
max_bids = [np.max(bids) for bids in allBids]

fig.add_trace(go.Scatter(x=list(range(n_users)), y=mean_bids, mode='lines', name='Mean Bid'))
fig.add_trace(go.Scatter(x=list(range(n_users)), y=min_bids, mode='lines', fill=None, showlegend=False, line=dict(width=0)))
fig.add_trace(go.Scatter(x=list(range(n_users)), y=max_bids, mode='lines', fill='tonexty', name='Bid Range', line=dict(width=0)))

fig.update_layout(
    title='Competing Bids Over Time',
    title_x=0.5,
    xaxis_title='Time',
    yaxis_title='Bid Value',
    xaxis=dict(tickmode='linear', tick0=0, dtick=5, range=[0, n_users]),
    legend=dict(title='Bids'),
    font=dict(size=12, family="Arial"),
)

fig.show()

### Auction definition

In [None]:
class Auction:
    def __init__(self, *args, **kwargs):
      raise NotImplementedError

    def get_winners(self, bids):
      raise NotImplementedError

    def get_payments_per_click(self, winners, values, bids):
      raise NotImplementedError

    def round(self, bids):
      winners, values = self.get_winners(bids)
      payments_per_click = self.get_payments_per_click(winners, values, bids)
      return winners, payments_per_click

In [None]:
class GeneralizedFirstPriceAuction(Auction):
    def __init__(self, ctrs, slots):
        self.ctrs = ctrs
        self.n_adv = len(self.ctrs)
        self.slots = slots

    def get_winners(self, bids):
        adv_values = self.ctrs*bids
        adv_ranking = np.argsort(adv_values)[::-1]
        winners = adv_ranking[:self.slots]
        return winners, adv_values[winners]

    def get_payments_per_click(self, winners, values, bids):
        payment = bids[winners]
        return payment.round(2)

### Agents definition

In [None]:
class Agent:
  def __init__(self):
    raise NotImplementedError

  def pull_arm(self):
    raise NotImplementedError

  def update(self, r_t):
    raise NotImplementedError

In [None]:
class HedgeAgent(Agent):
    def __init__(self, K, learning_rate):
        self.K = K
        self.learning_rate = learning_rate
        self.weights = np.ones(K)
        self.x_t = np.ones(K)/K
        self.a_t = None
        self.t = 0

    def pull_arm(self):
        self.x_t = self.weights/sum(self.weights)
        self.a_t = np.random.choice(np.arange(self.K), p=self.x_t)
        return self.a_t

    def update(self, l_t):
        self.weights *= np.exp(-self.learning_rate*l_t)
        self.t += 1

### Bidders definition

In [None]:
class BiddingAgent:
  def __init__(self):
    raise NotImplementedError

  def bid(self):
    raise NotImplementedError

  def update(self):
    raise NotImplementedError

In [None]:
class HedgeMultiplicativePacingAgent(BiddingAgent):
    def __init__(self, bids_set, valuation, budget, T, eta):
        self.bids_set = bids_set
        self.K = len(bids_set)
        self.hedge = HedgeAgent(self.K, np.sqrt(np.log(self.K)/T))
        self.valuation = valuation
        self.budget = budget
        self.eta = eta
        self.T = T
        self.rho = self.budget/self.T
        self.lmbd = 1
        self.t = 0

    def bid(self):
        if self.budget < 1:
            return 0
        return self.bids_set[self.hedge.pull_arm()]

    def update(self, f_t, c_t, m_t):
        f_t_full = np.array([(self.valuation-b)*int(b >= m_t) for b in self.bids_set])
        c_t_full = np.array([b*int(b >= m_t) for b in self.bids_set])
        L = f_t_full - self.lmbd*(c_t_full-self.rho)
        range_L = 2+(1-self.rho)/self.rho
        self.hedge.update((2-L)/range_L)
        self.lmbd = np.clip(self.lmbd-self.eta*(self.rho-c_t), a_min=0, a_max=1/self.rho)
        self.budget -= c_t

### Algorithms execution

In [None]:
SLOTS = 3
ADVERTISERS = 5
VALUE = 0.4
BUDGET = 25
USERS = 150
BIDS = np.linspace(0,1,11)
LR = 1/np.sqrt(USERS)
RHO = BUDGET/USERS

adversarial_bidding_env = AdversarialBiddingEnvironment(num_agents=ADVERTISERS, time_steps=USERS)

COMPETING_BIDS = []

while not adversarial_bidding_env.is_terminated():
  bids = adversarial_bidding_env.get_bids()
  COMPETING_BIDS.append(bids)
  adversarial_bidding_env.round()

COMPETING_BIDS = np.array(COMPETING_BIDS)
M_T = np.max(COMPETING_BIDS, axis = 1)

#### Hedge Multiplicative Pacing Strategy

In [None]:
win_probabilities = np.array([sum(BUDGET > M_T)/USERS for b in BIDS])

c = -(VALUE-BIDS)*win_probabilities
A_ub = [BIDS*win_probabilities]
b_ub = [RHO]
A_eq = [np.ones(len(BIDS))]
b_eq = [1]
res = optimize.linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq, bounds=(0,1))
gamma = res.x
expected_clairvoyant_utilities = [-res.fun for u in range(USERS)]
expected_clairvoyant_bids = [sum(BIDS*gamma*win_probabilities) for u in range(USERS)]

Let's consider multiple trials

In [None]:
n_epochs = 10
all_regrets = []
all_payments = []
for i in range(n_epochs):
    agent = HedgeMultiplicativePacingAgent(bids_set = BIDS,
                                        valuation=VALUE,
                                        budget=BUDGET,
                                        T=USERS,
                                        eta=LR)

    auction = GeneralizedFirstPriceAuction(np.ones(ADVERTISERS+1), SLOTS)
    utilities = np.array([])
    my_bids = np.array([])
    my_payments = np.array([])

    np.random.seed(i)
    for u in range(USERS):
        my_bid = agent.bid()
        bids = np.append(my_bid, COMPETING_BIDS[u,:].ravel())
        winners, payments_per_click = auction.round(bids=bids)

        my_win = int(0 in winners)
        f_t, c_t = (VALUE - M_T[u])*my_win, M_T[u]*my_win
        agent.update(f_t, c_t, M_T[u])

        utilities = np.append(utilities, f_t)
        my_payments = np.append(my_payments, c_t)

    all_regrets.append(np.cumsum(expected_clairvoyant_utilities-utilities))
    all_payments.append(np.cumsum(my_payments))

avg_regret = np.array(all_regrets).mean(axis=0)
std_regret = np.array(all_regrets).std(axis=0)

avg_payments = np.array(all_payments).mean(axis=0)
std_payments = np.array(all_payments).std(axis=0)

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.cm as cm

def generate_colormap_colors(n):
    colormap = plt.colormaps['tab10']
    return [f'rgba({int(colormap(i)[0] * 255)}, {int(colormap(i)[1] * 255)}, {int(colormap(i)[2] * 255)}, 1)' for i in range(n)]

colors = generate_colormap_colors(n_epochs)

fig = make_subplots(rows=1, cols=2, subplot_titles=('Cumulative Regret per Trial', 'Cumulative Payments per Trial'))

for i in range(n_epochs):
    fig.add_trace(go.Scatter(
        x=list(range(USERS)),
        y=all_regrets[i],
        mode='lines',
        name=f'Trial {i+1}',
        line=dict(color=colors[i]),
        showlegend=True,
    ), row=1, col=1)

    fig.add_trace(go.Scatter(
        x=list(range(USERS)),
        y=all_payments[i],
        mode='lines',
        name=f'Trial {i+1}',
        line=dict(color=colors[i]),
        showlegend=False
    ), row=1, col=2)

fig.add_trace(go.Scatter(
    x=list(range(USERS)),
    y=avg_regret,
    mode='lines',
    name='Mean Regret',
    line=dict(color='black', dash='solid'),
    showlegend=True
), row=1, col=1)

fig.add_trace(go.Scatter(
    x=np.concatenate([list(range(USERS)), list(range(USERS))[::-1]]),
    y=np.concatenate([avg_regret - std_regret, (avg_regret + std_regret)[::-1]]),
    fill='toself',
    fillcolor='rgba(128,128,128,0.2)',
    line=dict(color='rgba(255,255,255,0)'),
    name='Regret ± Std',
    showlegend=True
), row=1, col=1)


fig.add_trace(go.Scatter(
    x=list(range(USERS)),
    y=avg_payments,
    mode='lines',
    name='Mean Payments',
    line=dict(color='black', dash='solid'),
    showlegend=True
), row=1, col=2)

fig.add_trace(go.Scatter(
    x=np.concatenate([list(range(USERS)), list(range(USERS))[::-1]]),
    y=np.concatenate([avg_payments - std_payments, (avg_payments + std_payments)[::-1]]),
    fill='toself',
    fillcolor='rgba(128,128,128,0.2)',
    line=dict(color='rgba(255,255,255,0)'),
    name='Payments ± Std',
    showlegend=True
), row=1, col=2)

fig.update_layout(
    legend=dict(title='Trials'),
    font=dict(size=12, family="Arial"),
    title='Metrics per Trial',
    title_x=0.5,
    xaxis_title='t',
)

fig.add_trace(go.Scatter(
    x=[0, USERS - 1],
    y=[BUDGET, BUDGET],
    mode='lines',
    line=dict(color='red', width=2),
    name='Budget',
    showlegend=True
), row=1, col=2)

fig.add_annotation(
    x=USERS - USERS/2,
    y=BUDGET,
    text="Budget",
    showarrow=True,
    arrowhead=2,
    ax=0,
    ay=-20,
    font=dict(color='red'),
    row=1,
    col=2
)

fig.update_yaxes(title_text='Cumulative Regret', row=1, col=1)
fig.update_yaxes(title_text='Cumulative Payments', row=1, col=2)

fig.show()

## Pricing + Auctions

In [None]:
class Logs:
    def __init__(self):
        self.records = {
            'pricing_cumulative_regret': [],
            'pricing_cumulative_reward': [],
            'chosen_prices_per_trial': [],
            'bidding_cumulative_regret': [],
            'bidding_cumulative_payments': [],
            'bidding_cumulative_wins': [],
            'bidding_bids': []
        }

    def add_log(self, field, value):
      self.records[field].append(value)

    def get_logs(self):
      return self.records

In [None]:
class CampainSettings:
  def __init__(self):
    self.ADVERTISERS = 3
    self.SLOT = 3
    self.ctrs = np.ones(self.ADVERTISERS+1)
    self.USERS = 500
    self.VALUE = 0.6
    self.BUDGET = 120
    self.days = 365
    self.n_trials = 2
    self.MIN_PRICE = 0
    self.MAX_PRICE = 1
    self.K = 20
    self.LR_PRICING = np.sqrt(np.log(self.K) / (self.K * self.days))
    self.LR_BIDDING = 1/np.sqrt(self.USERS)
    self.cost = 0.4
    self.PRICES = np.linspace(self.MIN_PRICE, self.MAX_PRICE, self.K)
    self.BIDS = np.linspace(self.MIN_PRICE, self.MAX_PRICE, 11)
    self.RHO = self.BUDGET/self.USERS

In [None]:
def pricing_auction_simulation(campain_settings, logs):
  trials_wins_3D= []
  profit_history_2D = []
  prices_2D = []


  for trial in tqdm(range(campain_settings.n_trials), desc="Trial n°"):
    pricing_agent = EXP3Agent(campain_settings.K, campain_settings.LR_PRICING)
    pricing_env = AdversarialBanditPricingEnvironment(time_steps=campain_settings.days, cost=campain_settings.cost, prices=campain_settings.PRICES)

    pricing_rewards = []
    pricing_max_rewards = []
    pricing_agent_losses = []
    pricing_expected_agent_losses = []
    chosen_prices = []

    bidding_regret_per_day = []
    bidding_payments_per_day = []
    bidding_wins_per_day = []
    bidding_bids_per_day = []

    for day in range(campain_settings.days):
        np.random.seed(trial + day)
        p_a_t = pricing_agent.pull_arm()

        auction = GeneralizedFirstPriceAuction(campain_settings.ctrs, campain_settings.SLOT)
        bidding_agent = HedgeMultiplicativePacingAgent(bids_set = campain_settings.BIDS,
                                        valuation=campain_settings.VALUE,
                                        budget=campain_settings.BUDGET,
                                        T=campain_settings.USERS,
                                        eta=campain_settings.LR_BIDDING)


        COMPETING_BIDS = []

        adversarial_bidding_env = AdversarialBiddingEnvironment(num_agents=campain_settings.ADVERTISERS, time_steps=campain_settings.USERS)
        while not adversarial_bidding_env.is_terminated():
          bids = adversarial_bidding_env.get_bids()
          COMPETING_BIDS.append(bids)
          adversarial_bidding_env.round()

        COMPETING_BIDS = np.array(COMPETING_BIDS)
        M_T = np.max(COMPETING_BIDS, axis = 1)

        win_probabilities = np.array([sum(campain_settings.BUDGET > M_T)/campain_settings.USERS for b in campain_settings.BIDS])

        c = -(campain_settings.VALUE-campain_settings.BIDS)*win_probabilities
        A_ub = [campain_settings.BIDS*win_probabilities]
        b_ub = [campain_settings.RHO]
        A_eq = [np.ones(len(campain_settings.BIDS))]
        b_eq = [1]
        res = optimize.linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq, bounds=(0,1))
        gamma = res.x
        expected_clairvoyant_utilities = [-res.fun for u in range(campain_settings.USERS)]
        expected_clairvoyant_bids = [sum(campain_settings.BIDS*gamma*win_probabilities) for u in range(campain_settings.USERS)]

        utilities = np.array([])
        my_bids = np.array([])
        my_payments = np.array([])
        my_wins = np.array([])

        for u in range(campain_settings.USERS):
          my_bid = bidding_agent.bid()
          bids = np.append(my_bid, COMPETING_BIDS[u,:].ravel())
          winners, payments_per_click = auction.round(bids=bids)
          my_win = int(0 in winners)
          f_t, c_t = (campain_settings.VALUE - M_T[u])*my_win, M_T[u]*my_win
          bidding_agent.update(f_t, c_t, M_T[u])

          utilities = np.append(utilities, f_t)
          my_payments = np.append(my_payments, c_t)
          my_wins = np.append(my_wins, my_win)
          my_bids = np.append(my_bids, my_bid)

        p_l_t, p_stats = pricing_env.round(campain_settings.PRICES[p_a_t], np.count_nonzero(my_wins == 1))
        pricing_agent.update(p_l_t)

        chosen_prices.append(campain_settings.PRICES[p_a_t])

        pricing_rewards.append(p_stats['possible_rewards'])
        pricing_max_rewards.append(p_stats['max_reward'])
        pricing_agent_losses.append(p_l_t)

        # ------------- START BIDDING STATS PER DAY -------------

        bidding_regret_per_day.append(np.cumsum(expected_clairvoyant_utilities-utilities))
        bidding_payments_per_day.append(np.cumsum(my_payments))
        bidding_wins_per_day.append(np.cumsum(my_wins)[-1])
        bidding_bids_per_day.append(my_bids)

        # ------------- END BIDDING STATS PER DAY -------------

    # ------------- START PRICING STATS PER TRIAL -------------

    pricing_cumulative_rewards = np.sum(np.array(pricing_rewards), axis = 0)
    pricing_best_price = campain_settings.PRICES[np.argmax(pricing_cumulative_rewards)]
    pricing_agent_pulls = np.array(pricing_agent.N_pulls)

    pricing_clairvoyant_rewards = np.array(pricing_rewards)[:, np.argmax(pricing_cumulative_rewards)]
    pricing_clairvoyant_losses = np.array([max_reward - clairvoyant_reward for max_reward, clairvoyant_reward in zip(pricing_max_rewards, pricing_clairvoyant_rewards)])
    pricing_cumulative_regret = np.cumsum(np.array(pricing_agent_losses) - pricing_clairvoyant_losses)

    logs.add_log('pricing_cumulative_regret',pricing_cumulative_regret)
    logs.add_log('pricing_cumulative_reward',pricing_cumulative_rewards)
    logs.add_log('chosen_prices_per_trial', chosen_prices)

    # ------------- END PRICING STATS PER TRIAL -------------

    # ------------- START BIDDING STATS PER TRIAL -------------

    logs.add_log('bidding_cumulative_regret', bidding_regret_per_day)
    logs.add_log('bidding_cumulative_payments', bidding_payments_per_day)
    logs.add_log('bidding_cumulative_wins', bidding_wins_per_day)
    logs.add_log('bidding_bids', bidding_bids_per_day)

    # ------------- END BIDDING STATS PER TRIAL -------------

In [None]:
campain_settings = CampainSettings()
logs = Logs()
pricing_auction_simulation(campain_settings, logs)

Trial n°: 100%|██████████| 2/2 [01:09<00:00, 34.79s/it]


### Pricing plots

In [None]:
out = logs.get_logs()

mean_pricing_regrets = np.array(out['pricing_cumulative_regret']).mean(axis=0)
std_pricing_regrets = np.array(out['pricing_cumulative_regret']).std(axis=0)

mean_chosen_prices = np.array(out['chosen_prices_per_trial']).mean(axis=0)
std_chosen_prices = np.array(out['chosen_prices_per_trial']).std(axis=0)

mean_pricing_rewards = np.array(out['pricing_cumulative_reward']).mean(axis=0)
std_pricing_rewards = np.array(out['pricing_cumulative_reward']).std(axis=0)

fig = make_subplots(rows=1, cols=2,
                    column_widths=[0.5, 0.5],
                    subplot_titles=("Average Cumulative Regret", "Average Cumulative Reward"))

fig.add_trace(go.Scatter(
    x=np.arange(campain_settings.days),
    y=mean_pricing_regrets,
    mode='lines',
    line=dict(color='blue'),
    name='Mean Pricing Regret',
), row=1, col=1)

fig.add_trace(go.Scatter(
    x=np.arange(campain_settings.days),
    y=mean_pricing_regrets + std_pricing_regrets / np.sqrt(campain_settings.n_trials),
    mode='lines',
    line=dict(width=0),
    showlegend=False
), row=1, col=1)

fig.add_trace(go.Scatter(
    x=np.arange(campain_settings.days),
    y=mean_pricing_regrets - std_pricing_regrets / np.sqrt(campain_settings.n_trials),
    mode='lines',
    line=dict(width=0),
    fill='tonexty',
    fillcolor='rgba(0, 0, 255, 0.2)',
    showlegend=False
), row=1, col=1)

fig.update_xaxes(title_text='$t$', row=1, col=1)
fig.update_yaxes(title_text='Regret', row=1, col=1)

fig.add_trace(go.Scatter(
    x=np.arange(campain_settings.days),
    y=mean_pricing_rewards,
    mode='lines',
    line=dict(color='green'),
    name='Mean Pricing Reward',
), row=1, col=2)

fig.add_trace(go.Scatter(
    x=np.arange(campain_settings.days),
    y=mean_pricing_rewards + std_pricing_rewards / np.sqrt(campain_settings.n_trials),
    mode='lines',
    line=dict(width=0),
    showlegend=False
), row=1, col=2)

fig.add_trace(go.Scatter(
    x=np.arange(campain_settings.days),
    y=mean_pricing_rewards - std_pricing_rewards / np.sqrt(campain_settings.n_trials),
    mode='lines',
    line=dict(width=0),
    fill='tonexty',
    fillcolor='rgba(0, 255, 0, 0.2)',
    showlegend=False
), row=1, col=2)

fig.update_xaxes(title_text='$t$', row=1, col=2)
fig.update_yaxes(title_text='Reward', row=1, col=2)

fig.update_layout(title="Pricing Data Analysis: Regret and Reward", height=400, width=1000, showlegend=False)

fig.show()

fig_prices = make_subplots(rows=1, cols=1, subplot_titles=("Chosen Prices Over Time",))

fig_prices.add_trace(go.Scatter(
    x=np.arange(campain_settings.days),
    y=mean_chosen_prices,
    mode='lines',
    line=dict(color='blue'),
    name='Mean Chosen Price',
))

fig_prices.add_trace(go.Scatter(
    x=np.arange(campain_settings.days),
    y=mean_chosen_prices + std_chosen_prices,
    mode='lines',
    line=dict(width=0),
    showlegend=False
))

fig_prices.add_trace(go.Scatter(
    x=np.arange(campain_settings.days),
    y=mean_chosen_prices - std_chosen_prices,
    mode='lines',
    line=dict(width=0),
    fill='tonexty',
    fillcolor='rgba(0, 0, 255, 0.2)',
    showlegend=False
))

fig_prices.update_xaxes(title_text='$t$')
fig_prices.update_yaxes(title_text='Price')

fig_prices.update_layout(title="Pricing Data Analysis: Chosen Prices Over Time", height=400, width=1000, showlegend=False)

fig_prices.show()


 ### Auction plots

In [None]:
def plot_auctions(data, x_axis, y_axis, title, budget=None):
    mean = []

    for i in range(campain_settings.days):
        sums = np.zeros(campain_settings.USERS)
        for j in range(campain_settings.n_trials):
            sums += data[j][i]
        mean.append(sums / campain_settings.n_trials)

    std = np.array(data).std(axis=0)

    indices = list(range(0, campain_settings.days, int(campain_settings.days / 10)))
    mean = np.array(mean)
    print(mean.shape)

    mean_new = mean[indices].flatten()
    std_new = std[indices].flatten()

    users = campain_settings.USERS
    fig = go.Figure()

    fig.add_trace(go.Scatter(y=mean_new, mode='lines', name='Dati', showlegend=False))

    fig.add_trace(go.Scatter(
        y=[mean + std / np.sqrt(campain_settings.n_trials) for mean, std in zip(mean_new, std_new)],
        mode='lines',
        line=dict(width=0),
        fill='tonexty',
        fillcolor='rgba(0, 255, 0, 0.2)',
        showlegend=False
    ))
    fig.add_trace(go.Scatter(
        y=[mean - std / np.sqrt(campain_settings.n_trials) for mean, std in zip(mean_new, std_new)],
        mode='lines',
        line=dict(width=0),
        fill='tonexty',
        fillcolor='rgba(0, 255, 0, 0.2)',
        showlegend=False
    ))

    for x in range(users, len(mean_new) + users, users):
        fig.add_shape(
            type="line",
            x0=x, y0=min(mean_new), x1=x, y1=max(mean_new),
            line=dict(color="green", width=2)
        )

    if "payments" in title.lower() and budget is not None:
        fig.add_shape(
            type="line",
            x0=0, y0=budget, x1=len(mean_new), y1=budget,
            line=dict(color="red", width=3, dash="dash"),
            name="Budget"
        )

    tick_vals = list(range(users // 2, len(mean_new), users))
    tick_text = [f"Day {i}" for i in indices]

    fig.update_layout(
        title=title,
        xaxis_title=x_axis,
        yaxis_title=y_axis,
        xaxis=dict(
            tickmode='array',
            tickvals=tick_vals,
            ticktext=tick_text
        )
    )

    fig.show()


In [None]:
out = logs.get_logs()
data = out['bidding_cumulative_regret']

plot_auctions(data, "Trials", "Regret", "Avg cumulative regret per day")

(365, 500)


In [None]:
out = logs.get_logs()
data = out['bidding_cumulative_payments']

plot_auctions(data, "Trials", "Payments", "Avg payments per day", campain_settings.BUDGET)

(365, 500)


In [None]:
out = logs.get_logs()
data = out['bidding_cumulative_wins']
mean_trials = np.mean(data, axis=0)


days = [f"Day {i}" for i in range(campain_settings.days)]

fig = go.Figure(data=[go.Bar(x=days, y=mean_trials, text=mean_trials, textposition='auto')])
fig.update_layout(
    title="Wins per Day",
    xaxis_title="Days",
    yaxis_title="Wins",
    xaxis=dict(tickmode='array', tickvals=list(range(0, campain_settings.days, 10)), ticktext=days[::10])
)
fig.show()

In [None]:
out = logs.get_logs()
data = out['bidding_bids']

plot_auctions(data, "Trials", "Bids", "Avg bids per day")

(365, 500)
