In [None]:
import sys
sys.path.append("../")

%load_ext autoreload
%autoreload 2

import random
import pandas as pd
import numpy as np

from tqdm import tqdm

from simulator.simulation.utils_visualization import plot_metric_with_error, plot_2d_heatmaps_interpolated
from simulator.validation.check_results import autobidder_check

from simulator.model.robust_mse_2 import RobustBidMSE_CTRCVR
from simulator.model.simple import SimpleBid

pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', None)

# This notebook provides a guideline how to simulate and check Robust PID Bidder for all the campaigns in the dataset
### This is a case of double (CTR+CVR) uncertainty

In [None]:
loss_type = 'MSE'

# Load data

In [None]:
auction_mode = 'FPA'

# data paths
campaigns_path = f"../data/data/{auction_mode.lower()}/campaigns_{auction_mode.lower()}_filtered_train_final.csv"
stats_path = f"../data/data/{auction_mode.lower()}/stats_{auction_mode.lower()}_filtered_train_final.csv"
STATS_PATH_NOISED = f"../data/data/{auction_mode.lower()}/stats_{auction_mode.lower()}_filtered_train_noised_CTR_CVR.csv"

In [None]:
campaign_df = pd.read_csv(campaigns_path)
stats_df = pd.read_csv(stats_path)

In [None]:
campaigns_path_big = f"../data/{auction_mode.lower()}/campaigns.csv"
campaign_big = pd.read_csv(campaigns_path)

In [None]:
campaign_df.describe()

In [None]:
stats_df['CVR'] = stats_df['CRPredicts']

In [None]:
stats_df.describe()

## Robust LP Bid vs LP Bid

In [None]:
def create_noised_stats_mse(stats_df, old_ctr, eps_ctr, eps_cvr, auction_mode, seed):
    random.seed(seed)
    np.random.seed(seed)

    grouped = stats_df.groupby('campaign_id')
    for campaign_id, group in grouped:
        old_ctr = group['CTRPredicts'].values
        noise = np.random.rand(old_ctr.size)
        noise = noise / np.linalg.norm(noise) * np.sqrt(2 * eps_ctr)
        stats_df.loc[stats_df.campaign_id == campaign_id, 'CTRPredicts_noised'] = np.clip(old_ctr + noise, 0.01, 0.1)

        old_cvr = group['CRPredicts'].values
        noise = np.random.rand(old_cvr.size)
        noise = noise / np.linalg.norm(noise) * np.sqrt(2 * eps_cvr)
        stats_df.loc[stats_df.campaign_id == campaign_id, 'CRPredicts_noised'] = np.clip(old_cvr + noise, 0.001, 0.4)

    stats_df.to_csv(STATS_PATH_NOISED)
    pass


In [None]:
import time
robust_times = []

In [None]:
eps_set_ctr = [10., 5., 1., 0.5, 0.1, 0.01, 0.001, 1e-3, 1e-4, 1e-5] # [5., 1., 0.5, 0.1, 0.01, 0.001, 1e-3, 5e-4, 1e-4, 5e-5, 1e-5]
eps_set_cvr = [10., 5., 1., 0.5, 0.1, 0.01, 0.001, 1e-3, 1e-4, 1e-5] # [0.5, 0.1, 0.01, 0.001, 1e-3, 1e-4, 1e-5]
bidder_types = ['simple', 'robust']
seeds = [i for i in range(3)]

# metrics_df = pd.DataFrame(columns=['eps', 'bidder_type', 'seed', 'tvc', 'cpc_percent', 'cpc_avg'])
metrics_list = []

old_ctr = np.array(stats_df.CTRPredicts.copy())

for eps_ctr in tqdm(eps_set_ctr):
    for eps_cvr in tqdm(eps_set_cvr):
        for seed in seeds:
            create_noised_stats_mse(stats_df, old_ctr, eps_ctr, eps_cvr, auction_mode, seed)

            CPC = 100.

            # Simple bid
            res_simple = autobidder_check(
                bidder=SimpleBid,
                params={"input_campaigns": campaigns_path,
                        "input_stats": STATS_PATH_NOISED,
                        'eps_ctr': eps_ctr,
                        'eps_cvr': eps_cvr,
                        'p': 1,
                        'q': 1,
                        'LP': True,
                        'CPC': CPC},
                loss_type=loss_type
            )
            metrics_list.append({
                'eps_ctr': eps_ctr,
                'eps_cvr': eps_cvr,
                'bidder_type': 'simple',
                'seed': seed,
                'tvc': res_simple['score'][0],
                'cpc_percent': res_simple['score'][1],
                'cpc_avg': res_simple['score'][2]
            })
            start = time.time()

            # Robust bid
            res_robust = autobidder_check(
                bidder=RobustBidMSE_CTRCVR,
                params={"input_campaigns": campaigns_path,
                        "input_stats": STATS_PATH_NOISED,
                        'eps_ctr': eps_ctr,
                        'eps_cvr': eps_cvr,
                        'gamma': 1.,
                        'u_0': 1.,
                        'LP': True,
                        'CPC': CPC},
                loss_type=loss_type
            )
            end = time.time()
            robust_times.append(end-start)

            metrics_list.append({
                'eps_ctr': eps_ctr,
                'eps_cvr': eps_cvr,
                'bidder_type': 'robust',
                'seed': seed,
                'tvc': res_robust['score'][0],
                'cpc_percent': res_robust['score'][1],
                'cpc_avg': res_robust['score'][2]
            })

metrics_df = pd.DataFrame(metrics_list, columns=['eps_ctr', 'eps_cvr', 'bidder_type', 'seed', 'tvc', 'cpc_percent', 'cpc_avg'])

In [None]:
sum(robust_times) / len(robust_times) / 317

In [None]:
agg_metrics = metrics_df.groupby(['eps_ctr', 'eps_cvr', 'bidder_type']).agg(
    mean_tvc=('tvc', 'mean'),
    std_tvc=('tvc', 'std'),
    mean_cpc_percent=('cpc_percent', 'mean'),
    std_cpc_percent=('cpc_percent', 'std'),
    mean_cpc_avg=('cpc_avg', 'mean'),
    std_cpc_avg=('cpc_avg', 'std')
).reset_index()

agg_metrics

In [None]:
METRICS_SAVE_PATH = f'../results/metrics_{loss_type.lower()}_BAT.csv'

In [None]:
agg_metrics.to_csv(METRICS_SAVE_PATH)

In [None]:
plot_metric_with_error(
    agg_metrics=agg_metrics,
    metric_mean_col='mean_tvc',
    metric_std_col='std_tvc',
    metric_name='TVC',
    y_label='Total Value Clicks',
    loss_type=loss_type
)

# plot_metric_with_error(
#     agg_metrics=agg_metrics,
#     metric_mean_col='mean_cpc_percent',
#     metric_std_col='std_cpc_percent',
#     metric_name='CPC Percent',
#     y_label='Cost per Click (%)',
#     loss_type=loss_type
# )

# plot_metric_with_error(
#     agg_metrics=agg_metrics,
#     metric_mean_col='mean_cpc_avg',
#     metric_std_col='std_cpc_avg',
#     metric_name='CPC Avg',
#     y_label='Average Cost per Click',
#     loss_type=loss_type
# )

In [None]:
plot_2d_heatmaps_interpolated(METRICS_SAVE_PATH, '1')