In [1]:
from __future__ import print_function, division

import numpy as np
import pandas as pd
import math
import pickle
from tqdm import tqdm, tqdm_notebook

from sklearn.metrics import balanced_accuracy_score, roc_auc_score, \
                            classification_report, log_loss

from sklearn.linear_model import LogisticRegression

import matplotlib as mpl

import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="ticks")
pd.set_option('display.max_columns', 100)

%matplotlib inline
%config InlineBackend.figure_format = 'svg'

In [145]:
validation = pd.read_hdf('preprocessed.h5', 'validation')

In [314]:
class BiddingAgent(object):
    """Builds bidding agent
    
    Attributes
    ----------
    
    
    """
    
    def __init__(self, budget, data):
        """Initate new agent
        
        Parameters
        ----------
        budget : int
            set the maximum budget for the agent
        data : pandas DataFrame
            DataFrame containing all items up for auction
        
        """
        
        self.budget = budget
        self.data = data
        self.clicks = 0
        self.spend = 0
        self.impressions = 0
        self.too_expensive = 0
        self.lost = 0
        self.ctr = 0
        self.aCPM = 0
        self.aCPC = 0 
        
        self.budget_remaining = budget

    @staticmethod
    def simulate(self, bids):
        
        """Simulates and executes the strategy for the agent
        
        Parameters
        ----------
        bids : list
            list containing bids for every item
        
        """
        
        if len(bids) != len(self.data):
            raise ValueError('Input data and bids are not equal in lenght')
        
        other_bids = self.combine_bids()
        
        # loop through all bids
        for x in range(len(bids)):
            won = self.win_auction(bids[x], other_bids[x])

            if won:
                second_higest_bid = np.max(other_bids[x])
                
                # not enough budget left
                if (second_higest_bid) > self.budget_remaining:
                    self.too_expensive += 1
                else:
                    self.spend += second_higest_bid
                    self.clicks += self.get_click(x)
                    self.impressions += 1
                    self.budget_remaining -= second_higest_bid
            else:
                self.lost += 1
                
        self.ctr = self.ctr_function()
        self.aCPM = self.aCPM_function()
        self.aCPC = self.aCPC_function()
        
    @staticmethod
    def combine_bids(self):
        """
            combines multiple bids if present
            
            data['payprice'] contains a list of single bids per row
            data['agent_bids'] might be present containing a list of extra bids
            per rows 
            
            For example:
                data[payprice][214] = 300
                data[agent_bids][214] = [400,500]
                
                returns [300, 400, 500]
            
        """
        if 'agent_bids' not in self.data.columns:
            return self.data['payprice'].values

        return np.c_[self.data['payprice'].values, self.data['agent_bids'].values]

    @property
    def win_auction(self, bid, other_bids):
        """
        Check if bid is higher or equal to one or more bids.
        Return True when bid is higher than all elements given
        """

        return np.all(np.greater_equal(bid, other_bids))

    @property
    def get_click(self, row):
        """Determine if this item resulted in a click"""
        return self.data['click'][row]

    @property
    def statistics(self):
        """Return statistics"""

        return ({
            'CTR': self.ctr,
            'aCPM': self.aCPM,
            'aCPC': self.aCPC,
            'spend': self.spend,
            'impressions': self.impressions,
            'clicks': self.clicks,
            'lost': self.lost,
            'budget_left': self.budget_remaining,
            'spend': self.spend
        })

    @property
    def ctr_function(self):
        """Calculate click through rate"""
        return self.clicks / self.impressions

    @property
    def aCPM_function(self):
        """Calcaule avaerage cost per mille"""
        return self.spend / self.impressions

    @property
    def aCPC_function(self):
        """Calculate cost per click"""
        if self.clicks == 0:
            return 0
        return(self.spend / 1000) / self.clicks


class BidStrategy:
    @staticmethod
    def const_bidding(bid, lenght):
        """Bids a constant value
        
        Parameters
        ----------
        lenght : int
            number of bids to place
        
        """
        return np.repeat(bid, lenght)

    @staticmethod
    def random_bidding(lower_bound, upper_bound, lenght):
        """Bid a random value within lower and upper bound
        
        Parameters
        ----------
        lenght : int
            number of bids to place
        lower_bound : int
            lower bound of the random range
        upper_bound : int
            upper bound of the random range
        
        """

        return np.random.randint(lower_bound, upper_bound, size=lenght)

    @staticmethod
    def linear_bidding(pCTR, avgCTR, const):
        """Linear bidding strategy
        
        Parameters
        ----------
        pCTR : list
            list of proabilities P(click=1) for every item
        avgCTR : float
            average click through rate for the dataet
        const : float
            constant value that can be used to optimise a KPI
        
        """

        return const * (pCTR / avgCTR)

    @staticmethod
    def ortb1(pCTR, const, lamda):
        """Optimal Real Time Bidding #1
        
        Parameters
        ----------
        pCTR : list
            list of proabilities P(click=1) for every item
        lamda : float
            scaling parameter
        const : float
            constant value that can be used to optimise a KPI
        
        """
        return np.sqrt(np.multiply((const / lamda), pCTR) + const**2) - const

    @staticmethod
    def ortb2(pCTR, const, lamda):
        """Optimal Real Time Bidding #2
        
        Parameters
        ----------
        pCTR : list
            list of proabilities P(click=1) for every item
        lamda : float
            scaling parameter
        const : float
            constant value that can be used to optimise a KPI
        
        """
        return const * (((pCTR + np.sqrt(const**2 * lamda**2 + pCTR**2)) /
                         (const * lamda))**(1 / 3) - (
                             (const * lamda) /
                             (pCTR + np.sqrt(const**2 * lamda**2 + pCTR**2)))**
                        (1 / 3))

    @staticmethod
    def second_price(pCTR, B, T, l):
        """Optimal Second price auction bidding strategy
        
        Parameters
        ----------
        pCTR : list
            list of proabilities P(click=1) for every item
        B : int
            total campaign budget
        T : int
            total number of items
        l : float
            constant value that can be used to optimise a KPI
        
        """

        return 2 * pCTR * (((B * (l**2))) / T)**(1 / 3)

In [317]:
# constant bidding
agent2 = BiddingAgent(6250*1000, validation)
bids2 = BidStrategy.const_bidding(258, len(validation))

agent2.simulate(bids2)
agent2.statistics()

{'CTR': 0.000652560089908279,
 'aCPM': 75.52778818382859,
 'aCPC': 115.74074074074075,
 'spend': 6250000,
 'impressions': 82751,
 'clicks': 54,
 'lost': 4109,
 'budget_left': 0}

In [316]:
# linear bidding based on LR pCTR

lr = pickle.load(open( "lr_model", "rb" ))

pCTR_lr = lr.predict_proba(validation.drop(columns=['payprice', 'click']))[:, 1]

no_click, click = np.bincount(validation['click'].values)
avgCTR = click / (no_click + click)

agent = BiddingAgent(6250*1000, validation)
bids = BidStrategy.linear_bidding(pCTR_lr, avgCTR, 3.52)

agent.simulate(bids)
agent.statistics()

{'CTR': 0.0012896971375648094,
 'aCPM': 53.54860600185231,
 'aCPC': 41.52029530201342,
 'spend': 6186524,
 'impressions': 115531,
 'clicks': 149,
 'lost': 188394,
 'budget_left': 63476}