In [1]:
from __future__ import print_function, division

import numpy as np
import pandas as pd
import math
import pickle
from tqdm import tqdm, tqdm_notebook

from sklearn.metrics import balanced_accuracy_score, roc_auc_score, \
                            classification_report, log_loss

from sklearn.linear_model import LogisticRegression

import matplotlib as mpl

import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="ticks")
pd.set_option('display.max_columns', 100)

%matplotlib inline
%config InlineBackend.figure_format = 'svg'

In [2]:
validation = pd.read_hdf('preprocessed.h5', 'validation')

In [3]:
class BiddingEnvironment(object):
    def __init__(self, data):
        """Initate new environment in which agents operate
        
        Parameters
        ----------
        data : pandas DataFrame
            DataFrame containing all items up for auction. Fields payprice 
        
        """
        self.lenght = len(data)
        self.original_bids = data['payprice'].values
        self.click = data['click'].values
        self.other_bids = False
        self.other_bids_registred = False

    def get_bids(self):
        """
            combines multiple bids if present
            
            self.original_bids contains a list of single bids per row 
            self.other_bisd might be present containing a list of extra bids
            bid by other agents in the evironment
            
            joins both list if other_bids are present
            
        """
        if (self.other_bids_registred):
            return np.c_[self.original_bids, self.other_bids]

        return self.original_bids

    def eval_click(self, row):
        """Determine if this item resulted in a click"""
        return self.click[row]

    def register_bid(self, new_bids):
        """ Register bids of a """
        if len(new_bids) != self.lenght:
            raise ValueError(
                'Number of bids must equal the length of the environment')

        # some additional bids have been registerd
        if (self.other_bids_registred):
            self.other_bids = np.c_[self.other_bids, new_bids]

        # no additional bids have been addded to the environmnet yet
        else:
            self.other_bids = new_bids
            self.other_bids_registred = True


class BiddingAgent(object):
    """Builds bidding agent
    
    Attributes
    ----------
    
    
    """

    def __init__(self, budget, data):
        """Initate new agent
        
        Parameters
        ----------
        budget : int
            set the maximum budget for the agent
        data : pandas DataFrame
            DataFrame containing all items up for auction
        
        """

        self.budget = budget
        self.data = data
        self.clicks = 0
        self.spend = 0
        self.impressions = 0
        self.too_expensive = 0
        self.lost = 0
        self.ctr = 0
        self.aCPM = 0
        self.aCPC = 0
        self.budget_remaining = budget

    def simulate(self, bids):
        """Simulates and executes the strategy for the agent
        
        Parameters
        ----------
        bids : list
            list containing bids for every item
        
        """

        self.reset_agent()

        if len(bids) != self.data.lenght:
            raise ValueError('Input data and bids are not equal in lenght')

        other_bids = self.data.get_bids()

        # loop through all bids
        for x in range(len(bids)):
            current_bid = bids[x]
            current_other = other_bids[x]
            won = self.win_auction(current_bid, current_other)

            if won:
                second_higest_bid = np.max(current_other)

                # not enough budget left
                if (second_higest_bid) > self.budget_remaining:
                    self.too_expensive += 1
                else:
                    self.spend += second_higest_bid
                    self.clicks += self.data.eval_click(x)
                    self.impressions += 1
                    self.budget_remaining -= second_higest_bid
            else:
                self.lost += 1

        self.ctr = self.ctr_function()
        self.aCPM = self.aCPM_function()
        self.aCPC = self.aCPC_function()

    def reset_agent(self):
        """
        Resets current agent to initial state
        """

        self.clicks = 0
        self.spend = 0
        self.impressions = 0
        self.too_expensive = 0
        self.lost = 0
        self.ctr = 0
        self.aCPM = 0
        self.aCPC = 0
        self.budget_remaining = self.budget

    def win_auction(self, bid, other_bids):
        """
        Check if bid is higher or equal to one or more bids.
        Return True when bid is higher than all elements given
        """

        return np.all(np.greater_equal(bid, other_bids))

    def statistics(self):
        """Return statistics"""

        return ({
            'CTR': self.ctr,
            'aCPM': self.aCPM,
            'aCPC': self.aCPC,
            'spend': self.spend,
            'impressions': self.impressions,
            'clicks': self.clicks,
            'lost': self.lost,
            'budget_left': self.budget_remaining,
            'spend': self.spend
        })

    def ctr_function(self):
        """Calculate click through rate"""
        if (self.impressions == 0):
            return 0
        return self.clicks / self.impressions

    def aCPM_function(self):
        """Calcaule avaerage cost per mille"""
        if (self.impressions):
            return 0
        return self.spend / self.impressions

    def aCPC_function(self):
        """Calculate cost per click"""
        if self.clicks == 0:
            return 0
        return (self.spend / 1000) / self.clicks


class BidStrategy:
    @staticmethod
    def const_bidding(bid, lenght):
        """Bids a constant value for every item
        
        Parameters
        ----------
        lenght : int
            number of bids to place
        
        """
        return np.repeat(bid, lenght)

    @staticmethod
    def random_bidding(lower_bound, upper_bound, lenght):
        """Bid a random value within lower and upper bound
        
        Parameters
        ----------
        lenght : int
            number of bids to place
        lower_bound : int
            lower bound of the random range
        upper_bound : int
            upper bound of the random range
        
        """

        return np.random.randint(lower_bound, upper_bound, size=lenght)

    @staticmethod
    def linear_bidding(pCTR, avgCTR, const):
        """Linear bidding strategy
        
        Parameters
        ----------
        pCTR : list
            list of proabilities P(click=1) for every item
        avgCTR : float
            average click through rate for the dataet
        const : float
            constant value that can be used to optimise a KPI
        
        """

        return const * (pCTR / avgCTR)

    @staticmethod
    def ortb1(pCTR, const, lamda):
        """Optimal Real Time Bidding #1
        
        Parameters
        ----------
        pCTR : list
            list of proabilities P(click=1) for every item
        lamda : float
            scaling parameter
        const : float
            constant value that can be used to optimise a KPI
        
        """
        return np.sqrt(np.multiply((const / lamda), pCTR) + const**2) - const

    @staticmethod
    def ortb2(pCTR, const, lamda):
        """Optimal Real Time Bidding #2
        
        Parameters
        ----------
        pCTR : list
            list of proabilities P(click=1) for every item
        lamda : float
            scaling parameter
        const : float
            constant value that can be used to optimise a KPI
        
        """
        return const * (((pCTR + np.sqrt(const**2 * lamda**2 + pCTR**2)) /
                         (const * lamda))**(1 / 3) - (
                             (const * lamda) /
                             (pCTR + np.sqrt(const**2 * lamda**2 + pCTR**2)))**
                        (1 / 3))

    @staticmethod
    def second_price(pCTR, B, T, l):
        """Optimal Second price auction bidding strategy
        
        Parameters
        ----------
        pCTR : list
            list of proabilities P(click=1) for every item
        B : int
            total campaign budget
        T : int
            total number of items
        l : float
            constant value that can be used to optimise a KPI
        
        """

        return 2 * pCTR * (((B * (l**2))) / T)**(1 / 3)

    @staticmethod
    def opportunistic(pCTR, bid_price, treshold):
        """Bid constant price when pCTR is above a treshold
        
        Parameters
        ----------
        pCTR : list
            list of proabilities P(click=1) for every item
        bid_price : int
            price to bid when pCTR is above set treshold
        treshold : float
            total number of items
        
        """
        bids = pCTR
        bids[bids >= treshold] = bid_price
        bids[bids < treshold] = 1

        return bids

In [4]:
# constant bidding
environment = BiddingEnvironment(validation.copy())

In [5]:
# example grid search const biddidng strategy
results = []
for const_bid in tqdm(np.arange(200,300,5)):

    agent = BiddingAgent(6250*1000, environment)
    bids = BidStrategy.const_bidding(const_bid, environment.lenght)

    agent.simulate(bids)
    results.append(agent.statistics())
    
results = pd.DataFrame(results)
results.sort_values('CTR', ascending=False).head(5)

100%|██████████| 20/20 [01:31<00:00,  4.00s/it]


Unnamed: 0,CTR,aCPC,aCPM,budget_left,clicks,impressions,lost,spend
11,0.000651,115.740741,0,0,54,82956,4397,6250000
2,0.000649,107.758621,0,0,58,89317,14386,6250000
10,0.000647,115.740741,0,0,54,83412,5102,6250000
12,0.000643,117.924528,0,0,53,82483,3702,6250000
4,0.000641,111.607143,0,0,56,87320,11174,6250000


In [6]:
# linear bidding based on LR pCTR

# load the trained LR
lr = pickle.load(open( "lr_model", "rb" ))

# user LR to predict pCTR
pCTR_lr = lr.predict_proba(validation.drop(columns=['payprice', 'click']))[:, 1]

# get baseline avgCTR
no_click, click = np.bincount(validation['click'].values)
avgCTR = click / (no_click + click)

# init agent
agent = BiddingAgent(6250*1000, environment)

# get bids provided given strategy
bids = BidStrategy.linear_bidding(pCTR_lr, avgCTR, 3.52)

# simulate the of the agent in the environment
agent.simulate(bids)

# get the perforamnce
agent.statistics()

{'CTR': 0.0012896971375648094,
 'aCPM': 0,
 'aCPC': 41.52029530201342,
 'spend': 6186524,
 'impressions': 115531,
 'clicks': 149,
 'lost': 188394,
 'budget_left': 63476}

In [7]:
# add agent to evironment without simulating it

bids_random1 = BidStrategy.random_bidding(300, 400, environment.lenght)
bids_random2 = BidStrategy.random_bidding(1000, 1500, environment.lenght)

# register bids to the environment
environment.register_bid(bids_random1)
environment.register_bid(bids_random2)

In [12]:
environment.get_bids()

array([[  23,  338, 1149],
       [  75,  366, 1051],
       [  65,  316, 1491],
       ...,
       [ 108,  300, 1173],
       [  50,  340, 1470],
       [  40,  346, 1162]])