In [0]:
from __future__ import print_function, division

from google.colab import drive
drive.mount('/content/gdrive')

import numpy as np
import pandas as pd
import math
import pickle
import torch
import time
from tqdm import tqdm, tqdm_notebook
from sklearn.externals import joblib

from sklearn.metrics import balanced_accuracy_score, roc_auc_score, \
                            classification_report, log_loss

from sklearn.linear_model import LogisticRegression

import sys
sys.path.append('/content/gdrive/My Drive/Colab Notebooks/')
from multi_agent_simulator import BiddingAgent, BiddingEnvironment, BidStrategy


import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="ticks")
pd.set_option('display.max_columns', 100)

%matplotlib inline
%config InlineBackend.figure_format = 'svg'
%load_ext autoreload
%autoreload 2

In [0]:
validation = pd.read_hdf(
    '/content/gdrive/My Drive/Colab Notebooks/we_data/preprocessed.h5',
    'validation')

# pCTR using different models

In [3]:
# load the trained LR
lr = pickle.load(open( "/content/gdrive/My Drive/Colab Notebooks/lr_model", "rb" ))

# user LR to predict pCTR
pCTR_lr = lr.predict_proba(validation.drop(columns=['payprice', 'click']))[:, 1]



In [4]:
# load the best NN
file = 'nmodel_roc_0.856_balanced_acc_0.000_model_loss_0.017.pt'
PATH = '/content/gdrive/My Drive/Colab Notebooks/' + file

model = torch.load(PATH)
model.eval()

# use NN to predict pCTR
batches = np.array_split(validation, 200)

pCTR_NN = []
for batch in batches:
    data = batch.drop(columns=['click', 'payprice']).values
    target = batch['click'].values
    
    data = torch.from_numpy(data).float()
    target = torch.from_numpy(target).float()
    
    output = model(data)
    
    prediction = (output.data).float()
    y_hat_prob = prediction.cpu().numpy().squeeze()
    
    pCTR_NN.extend(y_hat_prob)
    
pCTR_NN = np.array(pCTR_NN)
del(data, target, model, output, prediction, y_hat_prob)
print('done')

done


In [5]:

# Load different models
xgb = pickle.load(open( "/content/gdrive/My Drive/Colab Notebooks/XGB_ROC_0.8911", "rb" ))
pCTR_XGB = xgb.predict_proba(validation.drop(columns=['payprice', 'click']))[:, 1]
print('done')


done


In [0]:
# get baseline avgCTR
no_click, click = np.bincount(validation['click'].values)
avgCTR = click / (no_click + click)

# Set environment & agents

In [0]:
# constant bidding
environment = BiddingEnvironment(validation.copy())

In [8]:
%%time
# From previous analysis the best performing models are included

bids_const1 = BidStrategy.const_bidding(300, environment.length)
bids_const2 = BidStrategy.const_bidding(1000, environment.length)

bids_random1 = BidStrategy.random_bidding(300, 400, environment.length)
bids_random2 = BidStrategy.random_bidding(1000, 1500, environment.length)
bids_random3 = BidStrategy.random_bidding(10000, 15000, environment.length)
bids_random4 = BidStrategy.random_bidding(100000, 150000, environment.length)


bids_opportunistic_lr_1 = BidStrategy.opportunistic(pCTR_lr, 150000, 0.8)
bids_opportunistic_lr_2 = BidStrategy.opportunistic(pCTR_lr, 200000, 0.9)

bids_opportunistic_NN_1 = BidStrategy.opportunistic(pCTR_NN, 150000, 0.8)
bids_opportunistic_NN_2 = BidStrategy.opportunistic(pCTR_NN, 200000, 0.9)

bids_opportunistic_XGB_1 = BidStrategy.opportunistic(pCTR_XGB, 150000, 0.8)
bids_opportunistic_XGB_2 = BidStrategy.opportunistic(pCTR_XGB, 200000, 0.9)


bids_linear_lr_1 = BidStrategy.linear_bidding(pCTR_lr, avgCTR, 3.52)
bids_linear_lr_2 = BidStrategy.linear_bidding(pCTR_lr, avgCTR, 3.54)

bids_linear_NN_1 = BidStrategy.linear_bidding(pCTR_NN, avgCTR, 70)
bids_linear_NN_2 = BidStrategy.linear_bidding(pCTR_NN, avgCTR, 60)

bids_linear_XGB_1 = BidStrategy.linear_bidding(pCTR_XGB, avgCTR, 202)
bids_linear_XGB_2 = BidStrategy.linear_bidding(pCTR_XGB, avgCTR, 203)


bids_ortb1_lr_1 = BidStrategy.ortb1(pCTR_lr, 15, 0.000025)
bids_ortb1_lr_2 = BidStrategy.ortb1(pCTR_lr, 45, 0.000047)

bids_ortb1_NN_1 = BidStrategy.ortb1(pCTR_NN, 30 ,0.000002)
bids_ortb1_NN_2 = BidStrategy.ortb1(pCTR_NN, 50 ,0.000002)

bids_ortb1_XGB_1 = BidStrategy.ortb1(pCTR_XGB, 80, 0.000001)
bids_ortb1_XGB_2 = BidStrategy.ortb1(pCTR_XGB, 70, 0.000001)


bids_ortb2_lr_1 = BidStrategy.ortb2(pCTR_lr, 90, 0.000090)
bids_ortb2_lr_2 = BidStrategy.ortb2(pCTR_lr, 80, 0.000086)

bids_ortb2_NN_1 = BidStrategy.ortb2(pCTR_NN, 20, 6.210526e-7)
bids_ortb2_NN_2 = BidStrategy.ortb2(pCTR_NN, 50, 2.184211e-6)

bids_ortb2_XGB_1 = BidStrategy.ortb2(pCTR_XGB, 80, 0.000001)
bids_ortb2_XGB_2 = BidStrategy.ortb2(pCTR_XGB, 70, 0.000001)


bids_optimal_second_price_NN_1 = BidStrategy.second_price(pCTR_NN, 6250*1000, 303925, 4e6)
bids_optimal_second_price_NN_2 = BidStrategy.second_price(pCTR_NN, 6250*1000, 303925, 4.16e6)

bids_optimal_second_price_XGB_1 = BidStrategy.second_price(pCTR_XGB, 6250*1000, 303925, 13450000)
bids_optimal_second_price_XGB_2 = BidStrategy.second_price(pCTR_XGB, 6250*1000, 303925, 13684000)


# register bids to the environment
environment.register_bid(bids_const1)
environment.register_bid(bids_const2)
environment.register_bid(bids_random1)
environment.register_bid(bids_random2)
environment.register_bid(bids_random3)
environment.register_bid(bids_random4)
environment.register_bid(bids_opportunistic_lr_1)
environment.register_bid(bids_opportunistic_lr_2)
environment.register_bid(bids_opportunistic_NN_1)
environment.register_bid(bids_opportunistic_NN_2)
environment.register_bid(bids_opportunistic_XGB_1)
environment.register_bid(bids_opportunistic_XGB_2)
environment.register_bid(bids_linear_lr_1)
environment.register_bid(bids_linear_lr_2)
environment.register_bid(bids_linear_NN_1)
environment.register_bid(bids_linear_NN_2)
environment.register_bid(bids_linear_XGB_1)
environment.register_bid(bids_linear_XGB_2)
environment.register_bid(bids_ortb1_lr_1)
environment.register_bid(bids_ortb1_lr_2)
environment.register_bid(bids_ortb1_NN_1)
environment.register_bid(bids_ortb1_NN_2)
environment.register_bid(bids_ortb1_XGB_1)
environment.register_bid(bids_ortb1_XGB_2)
environment.register_bid(bids_ortb2_lr_1)
environment.register_bid(bids_ortb2_lr_2)
environment.register_bid(bids_ortb2_NN_1)
environment.register_bid(bids_ortb2_NN_2)
environment.register_bid(bids_ortb2_XGB_1)
environment.register_bid(bids_ortb2_XGB_2)
environment.register_bid(bids_optimal_second_price_NN_1)
environment.register_bid(bids_optimal_second_price_NN_2)
environment.register_bid(bids_optimal_second_price_XGB_1)
environment.register_bid(bids_optimal_second_price_XGB_2)

print('Registerd {} agents\' bids to the environment'.format(environment.number_bids))

Registerd 34 agents' bids to the environment
CPU times: user 884 ms, sys: 21.9 ms, total: 906 ms
Wall time: 906 ms


In [0]:
%%time 
# OPTIONAL: RECALCUATE #2 WHEN NEW BIDS HAVE BEEN ADDEDD LATER
# environment.calculate_bids_budget_constrained()

CPU times: user 16.8 s, sys: 19.2 ms, total: 16.8 s
Wall time: 16.8 s


array([1.27949000e+05, 1.06027000e+05, 1.21025000e+05, ...,
       1.69004333e+02, 6.89538879e+01, 7.81720428e+01])

In [9]:
agents_simulated = environment.number_bids

# number of teams that we are bidding against
actual_agents = 27

total_money_in_game = actual_agents * 6250*1000
money_per_agent_simulation = total_money_in_game / agents_simulated

environment.budget = money_per_agent_simulation
print('Corrected budget per agent {:.10}'.format(environment.budget))

Corrected budget per agent 4963235.294


# Find best perfoming strategies in this environment

In [0]:
# register agents that have been optimized for this environment

environment.register_bid(
    BidStrategy.second_price(pCTR_NN, 6250*1000, environment.length, 58000))

environment.register_bid(BidStrategy.linear_bidding(pCTR_NN, avgCTR, 5.6))

environment.register_bid(BidStrategy.linear_bidding(pCTR_lr, avgCTR, 11))

In [11]:
# Redetermine the total budget in the environment

agents_simulated = environment.number_bids

# number of teams that we are bidding against
actual_agents = 27

total_money_in_game = actual_agents * 6250*1000
money_per_agent_simulation = total_money_in_game / agents_simulated

environment.budget = money_per_agent_simulation
print('Corrected budget per agent {:.10}'.format(environment.budget))

environment.calculate_bids_budget_constrained()

Corrected budget per agent 4560810.811


array([1.23876000e+05, 1.46311000e+05, 1.33646000e+05, ...,
       1.59474960e+02, 6.89538879e+01, 7.81720428e+01])

In [15]:
# Optimal bidding strategy given the total environemnt

bids = BidStrategy.second_price(pCTR_XGB, 6250*1000, environment.length, 55000000)
agent = BiddingAgent(6250*1000, environment)

agent.simulate(bids, criteria='2')
agent.statistics()

{'CTR': 0.005535055350553505,
 'aCPC': 347.2221939915927,
 'aCPM': 1921.8940626840922,
 'budget_left': 0.50815132932658,
 'clicks': 18,
 'impressions': 3252,
 'items': 303925,
 'lost': 199963,
 'spend': 6249999.491848668,
 'too_expensive': 100710}

# Apply strategy on test set for submission

In [0]:
test = pd.read_hdf(
    '/content/gdrive/My Drive/Colab Notebooks/we_data/preprocessed.h5',
    'test')

In [0]:
pCTR_XGB_testset = xgb.predict_proba(test.drop(columns=['payprice', 'click']))[:, 1]
print('done')

done


In [0]:
X_test = test.drop(columns=['click', 'payprice'])

bids = BidStrategy.second_price(pCTR_XGB_testset, 6250*1000, len(X_test), 55000000)

# get bid id's
test_raw = pd.read_csv(
    '/content/gdrive/My Drive/Colab Notebooks/we_data/test.csv')

# export to file
df_bids = pd.DataFrame(
    np.round(bids, 1), index=test_raw['bidid'].values, columns=['bidprice'])
df_bids.index.name = 'bidid'
df_bids = df_bids.reset_index()

file = '/content/gdrive/My Drive/Colab Notebooks/bid_attemnt_second_price_55000000_XGB_{}.csv'.format(
    time.strftime('%Y-%m-%d_%H:%M:%S'))
df_bids.to_csv(file, index=False)