## Question 2 - Basic Bidding Strategies

### Import Libraries

In [1]:
from __future__ import print_function, division

import numpy as np
import pandas as pd
import time
import random

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid")

from sklearn.preprocessing import MultiLabelBinarizer
from random import randrange

pd.set_option('display.max_columns', 999)
pd.set_option("display.max_rows",999)

%matplotlib inline
%config InlineBackend.figure_format = 'svg'

import warnings
warnings.filterwarnings('ignore')

In [2]:
def memory(df):
    print("Memory usage of the dataframe is {:.2f} MB".format(
        df.memory_usage().sum() / 1024**2))

In [3]:
convert = True
path = 'C:/Users/User/OneDrive/UCL/term_2/git/maai_bid_strategy'

if(convert):
    train_df = pd.read_csv(path+'/we_data/train.csv')
    train_df.to_hdf('train.h5', 'train')
    
else:
    train_df = pd.read_hdf('train.h5', 'train')
    
memory(train_df)

Memory usage of the dataframe is 463.67 MB


In [4]:
convert = True
path = 'C:/Users/User/OneDrive/UCL/term_2/git/maai_bid_strategy'

if(convert):
    val_df = pd.read_csv(path+'/we_data/validation.csv')
    val_df.to_hdf('validation.h5', 'validation')
    
else:
    val_df = pd.read_hdf('validation.h5', 'validation')
    
memory(val_df)

Memory usage of the dataframe is 57.97 MB


## Define Functions to Get Winning Impressions, and Metrics for Evaluation

In [41]:
def eval_bid(bid,df):
    impression = 0.0
    clicks = 0
    cost = 0.0
    budget = 6250*1000  ## Given budget of 6,250 CNY Fen
        
    for price_paid, click in df[['payprice','click']].values:
            if price_paid <= budget and bid >= price_paid:
                impression += 1
                clicks += click
                cost += price_paid
                budget -= price_paid
    return impression, clicks, cost

def competitor_bids(rand_bid, other_bids,df):
    impression = 0.0
    clicks = 0
    cost = 0.0
    budget = 6250*1000  ## Given budget of 6,250 CNY Fen

    for price_paid, click in df[['payprice','click']].values:
        if price_paid <= budget and rand_bid > max(other_bids) and rand_bid >= price_paid:
            impression += 1
            clicks += click
            if max(other_bids) >= price_paid:
                cost += max(other_bids)
                budget -= max(other_bids)
            else:
                cost += price_paid
                budget -= price_paid                
    return impression, clicks, cost

def bounds(df, l_list, u_list):
    
    df['lower_bound'] = l_list
    df['upper_bound'] = u_list
    
    return df

def calc_impression(df, input_list):
    
    input_list = [int(x) for x in input_list]
    df['impression_won'] = input_list
    
    return df

def calc_clicks(df, input_list):
    
    df['clicks'] = input_list
    df['ctr'] = (df['clicks']/df['impression_won'] * 100).round(4).astype(str)
    
    return df 

def calc_costs(df,input_list):
    
    input_list = [x / 1000 for x in input_list]
    df['total_spend'] = input_list
    df['cpm'] = (df['total_spend']/df['impression_won'] * 1000).round(2).astype(str)
    df['cpc'] = (df['total_spend']/df['clicks']).round(2).astype(str)
    
    return df

### Part 1 - Constant Bidding

#### Finding Optimal Constant Bid Value from Training Set

In [6]:
%%time
min_bid = np.min(val_df["payprice"].values)
max_bid = np.max(val_df["payprice"].values)

bid_range = np.arange(min_bid+2, max_bid+2, 2) # determines the range that bids should be in

### Find Optimal Constant Bid from Training Set

impression_list = []
clicks_list = []
cost_list = []
for i in bid_range:
    [impressions, clicks, costs] = eval_bid(i,train_df)
    impression_list.append(impressions)
    clicks_list.append(clicks)
    cost_list.append(costs)

Wall time: 5min 57s


In [7]:
const_tr = pd.DataFrame()
const_tr['constant_bids'] = bid_range   

const_tr = calc_impression(const_tr,impression_list)
const_tr = calc_clicks(const_tr,clicks_list)
const_tr = calc_costs(const_tr,cost_list)

In [46]:
## Find Optimal Bid based on Max CTR

opt_const_ctr = const_tr[const_tr['ctr'] == max(const_tr['ctr'])]
opt_const_ctr

Unnamed: 0,constant_bids,impression_won,clicks,ctr,total_spend,cpm,cpc
81,164,101029,72,0.0713,6249.999,61.86,86.81


In [52]:
## Find Optimal Bid based on Max CLICKS

opt_const_clk = const_tr[const_tr['clicks'] == max(const_tr['clicks'])]
opt_const_clk

Unnamed: 0,constant_bids,impression_won,clicks,ctr,total_spend,cpm,cpc
11,24,411972,134,0.0325,6249.999,15.17,46.64


In [58]:
### Evaluate Optimal Constant Bid found, on Validation Set (based on max CTR)

the_bid_ctr = opt_const_ctr.iloc[0]['constant_bids']

[impressions, clicks, costs] = eval_bid(the_bid_ctr,val_df)

ctr = (clicks/impressions * 100).round(4).astype(str)
cpm = ((costs/1000)/impressions * 1000).round(2).astype(str)
cpc = ((costs/1000)/clicks).round(2).astype(str)


data = {'optimal_bid' : the_bid_ctr, 'impression_won' : impressions, 'clicks' : clicks
        ,'ctr' : ctr , 'total_spend' : costs/1000,'cpm' : cpm,'cpc' : cpc}

val_metrics_ctr = pd.DataFrame(data, index=[0])
print('Evaluation of Optimal Bid  (found from Training Set) on Validation Set - based on Max CTR')
print('-----------------------------------------------------------------------------------------')
print(val_metrics_ctr)
print(' ')
print(' ')



### Evaluate Optimal Constant Bid found, on Validation Set (based on max CLICKS)

the_bid_clk = opt_const_clk.iloc[0]['constant_bids']

[impressions, clicks, costs] = eval_bid(the_bid_clk,val_df)

ctr = (clicks/impressions * 100).round(4).astype(str)
cpm = ((costs/1000)/impressions * 1000).round(2).astype(str)
cpc = ((costs/1000)/clicks).round(2).astype(str)

data = {'optimal_bid' : the_bid_clk, 'impression_won' : impressions, 'clicks' : clicks
        ,'ctr' : ctr , 'total_spend' : costs/1000,'cpm' : cpm,'cpc' : cpc}

val_metrics_clk = pd.DataFrame(data, index=[0])
print('Evaluation of Optimal Bid (found from Training Set) on Validation Set - based on Max CLICKS')
print('-------------------------------------------------------------------------------------------')
print(val_metrics_clk)

Evaluation of Optimal Bid  (found from Training Set) on Validation Set - based on Max CTR
-----------------------------------------------------------------------------------------
   optimal_bid  impression_won  clicks     ctr  total_spend    cpm     cpc
0          164        100993.0      57  0.0564       6250.0  61.89  109.65
 
 
Evaluation of Optimal Bid (found from Training Set) on Validation Set - based on Max CLICKS
-------------------------------------------------------------------------------------------
   optimal_bid  impression_won  clicks     ctr  total_spend    cpm    cpc
0           24         58778.0      15  0.0255      893.006  15.19  59.53


#### Find Optimal Constant Bid directly from Validation Set

In [60]:
%%time
min_bid = np.min(val_df["payprice"].values)
max_bid = np.max(val_df["payprice"].values)

bid_range = np.arange(min_bid+2, max_bid+2, 2) # determines the range that bids should be in

### Find Optimal Constant Bid from Training Set

impression_list = []
clicks_list = []
cost_list = []
for i in bid_range:
    [impressions, clicks, costs] = eval_bid(i,val_df)
    impression_list.append(impressions)
    clicks_list.append(clicks)
    cost_list.append(costs)

Wall time: 50 s


In [61]:
const_val = pd.DataFrame()
const_val['constant_bids'] = bid_range   

const_val = calc_impression(const_val,impression_list)
const_val = calc_clicks(const_val,clicks_list)
const_val = calc_costs(const_val,cost_list)

opt_const_ctr = const_val[const_val['ctr'] == max(const_val['ctr'])]
print('Evaluation of Optimal Bid from Validation Set - based on Max CTR')
print('----------------------------------------------------------------')
print(opt_const_ctr)
print(' ')
print(' ')

opt_const_clk = const_val[const_val['clicks'] == max(const_val['clicks'])]
print('Evaluation of Optimal Bid from Validation Set - based on Max CLICKS')
print('-------------------------------------------------------------------')
print(opt_const_clk)

Evaluation of Optimal Bid from Validation Set - based on Max CTR
----------------------------------------------------------------
     constant_bids  impression_won  clicks     ctr  total_spend    cpm     cpc
128            258           82751      54  0.0653       6250.0  75.53  115.74
 
 
Evaluation of Optimal Bid from Validation Set - based on Max CLICKS
-------------------------------------------------------------------
    constant_bids  impression_won  clicks     ctr  total_spend    cpm    cpc
38             78          146336      67  0.0458       6250.0  42.71  93.28


#### Comments :

### Part 2 - Random Bidding

In [41]:
%%time
m = 30
bid_range = np.arange(min_bid, max_bid-m, 1)
upper_bound = []
lower_bound = []
impression_list = []
clicks_list = []
cost_list = []
for i in range(1,501):
    rnd = random.choice(bid_range)
    lower_bound.append(rnd)
    upper_bound.append(rnd+m)
    i_list = []
    cl_list = []
    c_list = []
    for j in range(rnd,rnd+m,2):
        [impressions, clicks, costs] = eval_bid(j,val_df)
        i_list.append(impressions)
        cl_list.append(clicks)
        c_list.append(costs)
    impression_list.append(sum(i_list)/len(i_list))
    clicks_list.append(sum(cl_list)/len(cl_list))
    cost_list.append(sum(c_list)/len(c_list))

Wall time: 45min 20s


In [42]:
rand_df = pd.DataFrame()
rand_df = bounds(rand_df,lower_bound,upper_bound)

rand_df = calc_impression(rand_df,impression_list)
rand_df = calc_clicks(rand_df,clicks_list)
rand_df = calc_costs(rand_df,cost_list)

In [43]:
rand_df

Unnamed: 0,lower_bound,upper_bound,impression_won,clicks,ctr,total_spend,cpm,cpc
0,82,112,128544,62.933333,0.049,6249.999,48.62,99.31
1,250,280,82277,52.8,0.0642,6249.999,75.96,118.37
2,205,235,87477,55.2,0.0631,6249.999,71.45,113.22
3,76,106,133472,64.266667,0.0481,6249.999,46.83,97.25
4,121,151,112185,57.466667,0.0512,6249.999,55.71,108.76
5,189,219,90937,57.6,0.0633,6249.999,68.73,108.51
6,115,145,114324,57.333333,0.0501,6249.999,54.67,109.01
7,145,175,103095,57.333333,0.0556,6249.999,60.62,109.01
8,202,232,88030,55.733333,0.0633,6249.999,71.0,112.14
9,125,155,110686,57.4,0.0519,6249.999,56.47,108.88


In [39]:
val_metrics

Unnamed: 0,optimal_bid,impression_won,clicks,ctr,total_spend,cpm,cpc
0,164,100994.0,57,0.0564,6249.999,61.88,109.65


In [44]:
best_bound_ctr = rand_df[rand_df['ctr'] == max(rand_df['ctr'])]
best_bound_ctr

Unnamed: 0,lower_bound,upper_bound,impression_won,clicks,ctr,total_spend,cpm,cpc
32,249,279,82367,52.933333,0.0643,6249.999,75.88,118.07
46,249,279,82367,52.933333,0.0643,6249.999,75.88,118.07
149,249,279,82367,52.933333,0.0643,6249.999,75.88,118.07
449,249,279,82367,52.933333,0.0643,6249.999,75.88,118.07


#### Comments:

### Part 3 - Random Bidding with  Competitions

Takes Very Very Long to Run.

##### n = 50

In [13]:
%%time
m = 30
n = 50
bid_range = np.arange(min_bid, max_bid-m, 1)
# comp_bid = np.arange(min_bid, max_bid-m, 1)
upper_bound = []
lower_bound = []
impression_list = []
clicks_list = []
cost_list = []
for i in range(1,501):
    rnd = random.choice(bid_range)
    compt = random.sample(range(min_bid, max_bid-m), n)
    lower_bound.append(rnd)
    upper_bound.append(rnd+m)
    i_list = []
    cl_list = []
    c_list = []
    for j in range(rnd,rnd+m,1):
        [impressions, clicks, costs] = competitor_bids(j,compt,val_df)
        i_list.append(impressions)
        cl_list.append(clicks)
        c_list.append(costs)
    impression_list.append(sum(i_list)/len(i_list))
    clicks_list.append(sum(cl_list)/len(cl_list))
    cost_list.append(sum(c_list)/len(c_list))

Wall time: 2h 44min 40s


In [14]:
comp50_df = pd.DataFrame()
comp50_df = bounds(comp50_df,lower_bound,upper_bound)

comp50_df = calc_impression(comp50_df,impression_list)
comp50_df = calc_clicks(comp50_df,clicks_list)
comp50_df = calc_costs(comp50_df,cost_list)

In [15]:
comp50_df

Unnamed: 0,lower_bound,upper_bound,impression_won,clicks,ctr,total_spend,cpm,cpc
0,46,76,0,0.0,,0.0,,
1,84,114,0,0.0,,0.0,,
2,145,175,0,0.0,,0.0,,
3,230,260,0,0.0,,0.0,,
4,10,40,0,0.0,,0.0,,
5,198,228,0,0.0,,0.0,,
6,26,56,0,0.0,,0.0,,
7,256,286,14879,8.866667,0.0596,3958.4267,266.04,446.44
8,2,32,0,0.0,,0.0,,
9,84,114,0,0.0,,0.0,,


In [16]:
# best_bound2_ctr = comp_df[(comp_df['ctr'] == max(comp_df['ctr'])) & (comp_df['ctr'] != 'nan')]
comp50_bound_ctr = comp50_df[(comp50_df['ctr'] != 'nan')]
comp50_bound_ctr[(comp50_bound_ctr['ctr'] == max(comp50_bound_ctr['ctr']))]

Unnamed: 0,lower_bound,upper_bound,impression_won,clicks,ctr,total_spend,cpm,cpc
62,265,295,19359,11.666667,0.0603,5208.4604,269.05,446.44
102,260,290,15488,9.333333,0.0603,4166.765,269.03,446.44
248,268,298,21681,13.066667,0.0603,5833.472667,269.06,446.44
258,248,278,6195,3.733333,0.0603,1666.698933,269.04,446.44
440,260,290,15488,9.333333,0.0603,4166.765,269.03,446.44


#### n = 100

In [17]:
%%time
m = 30
n = 100
bid_range = np.arange(min_bid, max_bid-m, 1)
# comp_bid = np.arange(min_bid, max_bid-m, 1)
upper_bound = []
lower_bound = []
impression_list = []
clicks_list = []
cost_list = []
for i in range(1,501):
    rnd = random.choice(bid_range)
    compt = random.sample(range(min_bid, max_bid-m), n)
    lower_bound.append(rnd)
    upper_bound.append(rnd+m)
    i_list = []
    cl_list = []
    c_list = []
    for j in range(rnd,rnd+m,1):
        [impressions, clicks, costs] = competitor_bids(j,compt,val_df)
        i_list.append(impressions)
        cl_list.append(clicks)
        c_list.append(costs)
    impression_list.append(sum(i_list)/len(i_list))
    clicks_list.append(sum(cl_list)/len(cl_list))
    cost_list.append(sum(c_list)/len(c_list))

Wall time: 3h 44min 25s


In [18]:
comp100_df = pd.DataFrame()
comp100_df = bounds(comp100_df,lower_bound,upper_bound)

comp100_df = calc_impression(comp100_df,impression_list)
comp100_df = calc_clicks(comp100_df,clicks_list)
comp100_df = calc_costs(comp100_df,cost_list)

comp100_df

Unnamed: 0,lower_bound,upper_bound,impression_won,clicks,ctr,total_spend,cpm,cpc
0,77,107,0,0.0,,0.0,,
1,59,89,0,0.0,,0.0,,
2,141,171,0,0.0,,0.0,,
3,152,182,0,0.0,,0.0,,
4,50,80,0,0.0,,0.0,,
5,128,158,0,0.0,,0.0,,
6,262,292,18724,11.2,0.0598,5000.122933,267.04,446.44
7,21,51,0,0.0,,0.0,,
8,195,225,0,0.0,,0.0,,
9,79,109,0,0.0,,0.0,,


In [19]:
comp100_bound_ctr = comp100_df[(comp100_df['ctr'] != 'nan')]
comp100_bound_ctr[(comp100_bound_ctr['ctr'] == max(comp100_bound_ctr['ctr']))]

Unnamed: 0,lower_bound,upper_bound,impression_won,clicks,ctr,total_spend,cpm,cpc
20,255,285,11616,7.0,0.0603,3125.0735,269.03,446.44
40,261,291,16262,9.8,0.0603,4375.0993,269.04,446.44
53,262,292,17036,10.266667,0.0603,4583.440933,269.04,446.44
61,262,292,17036,10.266667,0.0603,4583.440933,269.04,446.44
73,257,287,13165,7.933333,0.0603,3541.7506,269.03,446.44
104,249,279,6970,4.2,0.0603,1875.0405,269.02,446.44
125,251,281,8519,5.133333,0.0603,2291.7149,269.01,446.44
147,253,283,10067,6.066667,0.0603,2708.3966,269.04,446.44
167,251,281,8519,5.133333,0.0603,2291.7149,269.01,446.44
226,254,284,10842,6.533333,0.0603,2916.7338,269.02,446.44
