In [None]:
import sys
sys.path.append("../")

%load_ext autoreload
%autoreload 2

import random
import pandas as pd
import numpy as np

from tqdm import tqdm
import warnings

from simulator.simulation.utils_visualization import plot_metric_with_error
from simulator.validation.check_results import autobidder_check

from simulator.model.robust_ce import RobustBidCE
from simulator.model.simple import SimpleBid

pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', None)

# This notebook provides a guideline how to simulate and check Robust PID Bidder for all the campaigns in the dataset

In [None]:
loss_type = 'CE'

# Load data

In [None]:
auction_mode = 'FPA'

campaigns_path = f"../data/subsample_campaigns.csv"
stats_path = f"../data/subsample_stats.csv"

In [None]:
campaign_df = pd.read_csv(campaigns_path)
stats_df = pd.read_csv(stats_path)

In [None]:
campaign_df.describe()

## Robust LP Bid vs LP Bid

In [None]:
def find_worst_x(b, eps):
    log_ratio = np.log(b) - np.log(1-b)
    x = - (eps + np.log(1-b)) / log_ratio
    print(np.mean(x))
    x = np.clip(x, 1e-8, 1-1e-8)
    return x

def create_worst_case_data(stats_df, stats_path, old_ctr, eps, auction_mode, seed):
    random.seed(seed)
    np.random.seed(seed)
    noise = np.random.rand(old_ctr.size)
    stats_df['CTRPredicts_noised'] = find_worst_x(old_ctr, eps)
    stats_save_path = f"../data/stats_noised.csv"
    stats_save_path_real = stats_path
    stats_df.to_csv(stats_save_path)


In [None]:
def calculate_log_cross_entropy(p, p_noised):
    eps = 1e-12
    p_noised = np.clip(p_noised, eps, 1 - eps)
    return -np.sum(p * np.log(p_noised) + (1 - p) * np.log(1 - p_noised))

In [None]:
def add_noise_to_match_lce(p, target_eps, max_iter=1000, tol=5, seed=None):
    if seed is not None:
        np.random.seed(seed)
    
    p_noised = p.copy()
    noise_scale = 0.01
    best_p_noised = p_noised.copy()
    best_diff = float('inf')
    
    for _ in range(max_iter):
        noise = noise_scale * np.random.randn(len(p))
        p_noised = np.clip(p + noise, 0.001, 0.99)
        
        current_lce = calculate_log_cross_entropy(p, p_noised)
        current_diff = abs(current_lce - target_eps)
        
        if current_diff < best_diff:
            best_diff = current_diff
            best_p_noised = p_noised.copy()
        
        if current_diff < tol:
            return p_noised
        
        if current_lce < target_eps:
            noise_scale *= 1.05
        else:
            noise_scale *= 0.95
    
    return best_p_noised

def create_worst_case_data(stats_df, stats_save_path, eps, auction_mode, seed):
    grouped = stats_df.groupby('campaign_id')
    ce_list = []
    failed_campaigns = 0
    
    for campaign_id, group in grouped:
        old_ctr = group['CTRPredicts'].values
        try:
            noised_ctr = add_noise_to_match_lce(old_ctr, eps, seed=seed)
            achieved_ce = calculate_log_cross_entropy(old_ctr, noised_ctr)
            if abs(achieved_ce - eps) > 1.0:
                failed_campaigns += 1
            ce_list.append(achieved_ce)
            stats_df.loc[stats_df.campaign_id == campaign_id, 'CTRPredicts_noised'] = noised_ctr
        except Exception as e:
            print(f"Failed with campaign_id {campaign_id}: {str(e)}")
            failed_campaigns += 1
    
    if failed_campaigns > 0:
        warnings.warn(f"N={failed_campaigns} campaigns failed")
    
    stats_df.to_csv(stats_save_path, index=False)
    return stats_df

In [None]:
eps_set = [i*10 + 130 for i in range(8)]
n_campaigns = len(campaign_df)
bidder_types = ['simple', 'robust']
seeds = [i for i in range(20)]

metrics_list = [] 

old_ctr = np.array(stats_df.CTRPredicts.copy())

for eps in tqdm(eps_set):
    for seed in seeds:
        # Add noise to data
        stats_save_path = f"../data/{auction_mode.lower()}/stats_{auction_mode.lower()}_filtered_train_noised_{loss_type}.csv"
        create_worst_case_data(stats_df, stats_save_path, eps, auction_mode, seed)

        cpc = 300.

        # Simple bid
        res_simple = autobidder_check(
            bidder=SimpleBid,
            params={"input_campaigns": campaigns_path,
                    "input_stats": stats_save_path,
                    'eps': eps,
                    'p': 1,
                    'q': 1,
                    'LP': True,
                    'CPC': cpc},
            loss_type=loss_type
        )
        metrics_list.append({
            'eps': eps,
            'bidder_type': 'simple',
            'seed': seed,
            'tvc': res_simple['score'][0],
            'cpc_percent': res_simple['score'][1],
            'cpc_avg': res_simple['score'][2]
        })

        # Robust bid
        res_robust = autobidder_check(
            bidder=RobustBidCE,
            params={"input_campaigns": campaigns_path,
                    "input_stats": stats_save_path,
                    'eps': eps,
                    'p': 1,
                    'q': 1,
                    'LP': True,
                    'CPC': cpc},
            loss_type=loss_type
        )
        metrics_list.append({
            'eps': eps,
            'bidder_type': 'robust',
            'seed': seed,
            'tvc': res_robust['score'][0],
            'cpc_percent': res_robust['score'][1],
            'cpc_avg': res_robust['score'][2]
        })

metrics_df = pd.DataFrame(metrics_list, columns=['eps', 'bidder_type', 'seed', 'tvc', 'cpc_percent', 'cpc_avg'])


In [None]:
agg_metrics = metrics_df.groupby(['eps', 'bidder_type']).agg(
    mean_tvc=('tvc', 'mean'),
    std_tvc=('tvc', 'std'),
    mean_cpc_percent=('cpc_percent', 'mean'),
    std_cpc_percent=('cpc_percent', 'std'),
    mean_cpc_avg=('cpc_avg', 'mean'),
    std_cpc_avg=('cpc_avg', 'std')
).reset_index()

agg_metrics

In [None]:
agg_metrics.to_csv(f'../images/metrics_{loss_type.lower()}_BAT.csv')

In [None]:
plot_metric_with_error(
    eps_set=eps_set,
    agg_metrics=agg_metrics,
    metric_mean_col='mean_tvc',
    metric_std_col='std_tvc',
    metric_name='TVC',
    y_label='Total Value Clicks',
    loss_type=loss_type
)

plot_metric_with_error(
    eps_set=eps_set,
    agg_metrics=agg_metrics,
    metric_mean_col='mean_cpc_percent',
    metric_std_col='std_cpc_percent',
    metric_name='CPC Percent',
    y_label='Cost per Click (%)',
    loss_type=loss_type
)

plot_metric_with_error(
    eps_set=eps_set,
    agg_metrics=agg_metrics,
    metric_mean_col='mean_cpc_avg',
    metric_std_col='std_cpc_avg',
    metric_name='CPC Avg',
    y_label='Average Cost per Click',
    loss_type=loss_type
)
