In [1]:
import sys

sys.path.append("../")

%load_ext autoreload
%autoreload 2

import random

import numpy as np
import pandas as pd
from simulator.model.robust_mse_2 import RobustBidMSE_CTRCVR
from simulator.model.simple import SimpleBid
from simulator.simulation.utils_visualization import plot_2d_heatmaps_interpolated, plot_metric_with_error
from simulator.validation.check_results import autobidder_check
from tqdm import tqdm

pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', None)

# This notebook provides a guideline how to simulate and check Robust PID Bidder for all the campaigns in the dataset
### This is a case of double (CTR+CVR) uncertainty

In [2]:
loss_type = 'MSE'

# Load data

In [3]:
auction_mode = 'FPA'

# data paths
campaigns_path = f"../data/data/{auction_mode.lower()}/campaigns_{auction_mode.lower()}_filtered_train_final.csv"
stats_path = f"../data/data/{auction_mode.lower()}/stats_{auction_mode.lower()}_filtered_train_final.csv"
STATS_PATH_NOISED = f"../data/data/{auction_mode.lower()}/stats_{auction_mode.lower()}_filtered_train_noised_CTR_CVR.csv"

In [4]:
campaign_df = pd.read_csv(campaigns_path)
stats_df = pd.read_csv(stats_path)

In [5]:
campaigns_path_big = f"../data/{auction_mode.lower()}/campaigns.csv"
campaign_big = pd.read_csv(campaigns_path)

In [6]:
campaign_df.describe()

Unnamed: 0.1,Unnamed: 0,campaign_id,loc_id,item_id,campaign_start,campaign_end,auction_budget,microcat_ext,logical_category,region_id,start_hour
count,317.0,317.0,317.0,317.0,317.0,317.0,317.0,317.0,317.0,317.0,317.0
mean,158.0,72211700.0,643730.753943,3464000000.0,529567100.0,529653500.0,482.268864,1310064.0,2.669811,642908.586751,6.599369
std,91.654242,4545295.0,13312.992329,797231600.0,8416.271,8416.271,653.280628,946311.4,1.465637,9740.578484,3.776225
min,0.0,13795870.0,621630.0,294430500.0,529535000.0,529621400.0,3.84,18.0,1.0,621590.0,0.0
25%,79.0,72879040.0,637640.0,2891171000.0,529564500.0,529650900.0,134.4,44730.0,1.15,637640.0,5.0
50%,158.0,72991970.0,639740.0,3820329000.0,529569100.0,529655500.0,268.8,1289833.0,3.1,637680.0,6.0
75%,237.0,73114090.0,653140.0,4058614000.0,529572600.0,529659000.0,614.4,2169972.0,3.41,652560.0,7.0
max,316.0,73277500.0,806540.0,4577361000.0,529577300.0,529663700.0,6669.12,2300143.0,5.37,662530.0,23.0


In [7]:
stats_df['CVR'] = stats_df['CRPredicts']

In [8]:
stats_df.describe()

Unnamed: 0,campaign_id,period,item_id,contact_price_bin,AuctionVisibilitySurplus,AuctionClicksSurplus,AuctionContactsSurplus,AuctionWinBidSurplus,CTRPredicts,CRPredicts,AuctionCount,x_platfrom_num,CTRPredicts_noised,CVR
count,199859.0,199859.0,199859.0,199859.0,199859.0,199859.0,199859.0,199859.0,199859.0,199859.0,199859.0,37931.0,199859.0,199859.0
mean,72178700.0,529604900.0,3453895000.0,37.570087,2.292433,0.1985176,0.01606526,253.203315,0.040505,0.098286,14.537314,19.625531,0.055559,0.098286
std,4690015.0,27260.8,798646800.0,14.994818,7.123523,0.5280265,0.04728542,942.599109,0.039297,0.097094,40.122623,29.47137,0.043702,0.097094
min,13795870.0,529534800.0,294430500.0,-44.0,4.057939e-07,4.759838e-08,6.842122e-09,0.0,0.0017,0.003221,1.0,1.0,0.01,0.003221
25%,72874020.0,529581600.0,2888165000.0,30.0,0.1446765,0.01195774,0.0005472256,0.92,0.015004,0.027795,3.0,6.0,0.01,0.027795
50%,72991970.0,529603200.0,3815980000.0,41.0,0.5714305,0.04944516,0.002595186,28.69,0.024219,0.058972,5.0,11.0,0.058031,0.058972
75%,73113870.0,529624800.0,4058614000.0,48.0,1.861864,0.1784186,0.01129214,175.15,0.05242,0.144046,13.0,21.0,0.1,0.144046
max,73277500.0,529660800.0,4577361000.0,77.0,375.1635,37.18276,1.909585,72046.96,0.633199,0.823119,1967.0,270.0,0.1,0.823119


## Robust LP Bid vs LP Bid

In [9]:
def create_noised_stats_mse(stats_df, old_ctr, eps_ctr, eps_cvr, auction_mode, seed):
    random.seed(seed)
    np.random.seed(seed)

    grouped = stats_df.groupby('campaign_id')
    for campaign_id, group in grouped:
        old_ctr = group['CTRPredicts'].values
        noise = np.random.rand(old_ctr.size)
        noise = noise / np.linalg.norm(noise) * np.sqrt(2 * eps_ctr)
        stats_df.loc[stats_df.campaign_id == campaign_id, 'CTRPredicts_noised'] = np.clip(old_ctr + noise, 0.01, 0.1)

        old_cvr = group['CRPredicts'].values
        noise = np.random.rand(old_cvr.size)
        noise = noise / np.linalg.norm(noise) * np.sqrt(2 * eps_cvr)
        stats_df.loc[stats_df.campaign_id == campaign_id, 'CRPredicts_noised'] = np.clip(old_cvr + noise, 0.001, 0.4)

    stats_df.to_csv(STATS_PATH_NOISED)


In [10]:
robust_times = []
risk_bid_times = []
non_robust_times = []

In [11]:
eps_set_ctr = [10., 5., 1., 0.1, 0.01, 0.001, 1e-3, 1e-4, 1e-5] # [5., 1., 0.5, 0.1, 0.01, 0.001, 1e-3, 5e-4, 1e-4, 5e-5, 1e-5]
eps_set_cvr = [10., 5., 1., 0.1, 0.01, 0.001, 1e-3, 1e-4, 1e-5] # [0.5, 0.1, 0.01, 0.001, 1e-3, 1e-4, 1e-5]
bidder_types = ['simple', 'robust']
seeds = list(range(5))

# metrics_df = pd.DataFrame(columns=['eps', 'bidder_type', 'seed', 'tvc', 'cpc_percent', 'cpc_avg'])
metrics_list = []
T = 10
old_ctr = np.array(stats_df.CTRPredicts.copy())
T_times = {}
T_times_non_robust = {}
for eps_ctr in eps_set_ctr:
    for eps_cvr in eps_set_cvr:
        # for T in [1, 10, 100, 1000, int(1e4), int(1e5)]:
        robust_times = []
        non_robust_times = []
        for seed in tqdm(seeds):
            create_noised_stats_mse(stats_df, old_ctr, eps_ctr, eps_cvr, auction_mode, seed)

            CPC = 100.
            # start_non_robust = time.time()
            # Simple bid
            res_simple = autobidder_check(
                bidder=SimpleBid,
                params={"input_campaigns": campaigns_path,
                        "input_stats": STATS_PATH_NOISED,
                        'eps_ctr': eps_ctr,
                        'eps_cvr': eps_cvr,
                        'p': 1,
                        'q': 1,
                        'LP': True,
                        'CPC': CPC},
                loss_type=loss_type
            )
            # end_non_robust = time.time()
            # non_robust_times.append(start_non_robust-end_non_robust)
            times_campaigns_nonrobust = sum(res_simple['mean_times']) / len(res_simple['mean_times'])
            non_robust_times.append(times_campaigns_nonrobust)
            metrics_list.append({
                'eps_ctr': eps_ctr,
                'eps_cvr': eps_cvr,
                'bidder_type': 'simple',
                'seed': seed,
                'tvc': res_simple['score'][0],
                'cpc_percent': res_simple['score'][1],
                'cpc_avg': res_simple['score'][2]
            })
            # start = time.time()

            # Robust bid
            res_robust = autobidder_check(
                bidder=RobustBidMSE_CTRCVR,
                params={"input_campaigns": campaigns_path,
                        "input_stats": STATS_PATH_NOISED,
                        'eps_ctr': eps_ctr,
                        'eps_cvr': eps_cvr,
                        'gamma': 1.,
                        'u_0': 1.,
                        'LP': True,
                        'CPC': CPC,
                        'T': T
                        },
                loss_type=loss_type
            )
            # end = time.time()
            # robust_times.append(end-start)
            times_campaigns_robust = sum(res_robust['mean_times']) / len(res_robust['mean_times'])
            robust_times.append(times_campaigns_robust)

            metrics_list.append({
                'eps_ctr': eps_ctr,
                'eps_cvr': eps_cvr,
                'bidder_type': 'robust',
                'seed': seed,
                'tvc': res_robust['score'][0],
                'cpc_percent': res_robust['score'][1],
                'cpc_avg': res_robust['score'][2]
            })
            # T_times[T] = sum(robust_times) / len(robust_times)
            # T_times_non_robust[T] = sum(non_robust_times) / len(non_robust_times)
metrics_df = pd.DataFrame(metrics_list, columns=['eps_ctr', 'eps_cvr', 'bidder_type', 'seed', 'tvc', 'cpc_percent', 'cpc_avg'])

100%|██████████| 5/5 [05:22<00:00, 64.56s/it]
100%|██████████| 5/5 [05:08<00:00, 61.74s/it]
100%|██████████| 5/5 [06:18<00:00, 75.68s/it]
100%|██████████| 5/5 [07:30<00:00, 90.14s/it]
100%|██████████| 5/5 [08:17<00:00, 99.57s/it] 
100%|██████████| 5/5 [14:23<00:00, 172.67s/it]
100%|██████████| 5/5 [14:08<00:00, 169.63s/it]
100%|██████████| 5/5 [14:08<00:00, 169.77s/it]
100%|██████████| 5/5 [14:08<00:00, 169.74s/it]
100%|██████████| 5/5 [05:09<00:00, 61.81s/it]
100%|██████████| 5/5 [05:03<00:00, 60.72s/it]
100%|██████████| 5/5 [04:52<00:00, 58.54s/it]
100%|██████████| 5/5 [06:58<00:00, 83.67s/it]
100%|██████████| 5/5 [07:40<00:00, 92.08s/it]
100%|██████████| 5/5 [12:56<00:00, 155.22s/it]
100%|██████████| 5/5 [12:54<00:00, 154.98s/it]
100%|██████████| 5/5 [13:52<00:00, 166.45s/it]
100%|██████████| 5/5 [13:49<00:00, 165.98s/it]
100%|██████████| 5/5 [07:02<00:00, 84.47s/it]
100%|██████████| 5/5 [05:08<00:00, 61.74s/it]
100%|██████████| 5/5 [04:59<00:00, 59.83s/it]
100%|██████████| 5/5 [05:

In [24]:
T_r_ms = {k: v * 1e6 for k, v in T_times.items()}
T_r_ms

{100000: 8399.893104965844}

In [21]:
T_nr_ms = {k: v * 1e6 for k, v in T_times_non_robust.items()}
T_nr_ms

{1: 1706.9458356734385,
 10: 1691.3340606539293,
 100: 1728.2422990362843,
 1000: 1707.752529945456,
 10000: 1731.9220244154283,
 100000: 1711.8722104735716}

In [None]:
sum(robust_times) / len(robust_times) / 317

In [12]:
agg_metrics = metrics_df.groupby(['eps_ctr', 'eps_cvr', 'bidder_type']).agg(
    mean_tvc=('tvc', 'mean'),
    std_tvc=('tvc', 'std'),
    mean_cpc_percent=('cpc_percent', 'mean'),
    std_cpc_percent=('cpc_percent', 'std'),
    mean_cpc_avg=('cpc_avg', 'mean'),
    std_cpc_avg=('cpc_avg', 'std')
).reset_index()

agg_metrics

Unnamed: 0,eps_ctr,eps_cvr,bidder_type,mean_tvc,std_tvc,mean_cpc_percent,std_cpc_percent,mean_cpc_avg,std_cpc_avg
0,1e-05,1e-05,robust,2.533994,0.000263,0.0,0.0,89.05959,0.007814
1,1e-05,1e-05,simple,1.709728,0.003388,0.0,0.0,353.646266,0.285041
2,1e-05,0.0001,robust,2.53729,0.000257,0.0,0.0,89.549034,0.018512
3,1e-05,0.0001,simple,1.709728,0.003388,0.0,0.0,353.646266,0.285041
4,1e-05,0.001,robust,2.538282,0.000189,0.0,0.0,89.243812,0.008504
5,1e-05,0.001,simple,1.709728,0.003194,0.0,0.0,353.646266,0.268739
6,1e-05,0.01,robust,2.528044,0.000962,0.0,0.0,84.370863,0.027729
7,1e-05,0.01,simple,1.709728,0.003388,0.0,0.0,353.646266,0.285041
8,1e-05,0.1,robust,2.534149,0.004381,0.0,0.0,89.080637,0.089994
9,1e-05,0.1,simple,1.709728,0.003388,0.0,0.0,353.646266,0.285041


In [13]:
METRICS_SAVE_PATH = f'../results/metrics_{loss_type.lower()}_BAT_CTR_CVR.csv'

In [14]:
agg_metrics.to_csv(METRICS_SAVE_PATH)

In [None]:
plot_metric_with_error(
    agg_metrics=agg_metrics,
    metric_mean_col='mean_tvc',
    metric_std_col='std_tvc',
    metric_name='TVC',
    y_label='Total Value Clicks',
    loss_type=loss_type
)

# plot_metric_with_error(
#     agg_metrics=agg_metrics,
#     metric_mean_col='mean_cpc_percent',
#     metric_std_col='std_cpc_percent',
#     metric_name='CPC Percent',
#     y_label='Cost per Click (%)',
#     loss_type=loss_type
# )

# plot_metric_with_error(
#     agg_metrics=agg_metrics,
#     metric_mean_col='mean_cpc_avg',
#     metric_std_col='std_cpc_avg',
#     metric_name='CPC Avg',
#     y_label='Average Cost per Click',
#     loss_type=loss_type
# )

In [None]:
plot_2d_heatmaps_interpolated(METRICS_SAVE_PATH, '1')