In [4]:
import pandas as pd

train = pd.read_csv('/Users/liuce/Downloads/we_data/train.csv')
validation = pd.read_csv('/Users/liuce/Downloads/we_data/validation.csv')
test = pd.read_csv('/Users/liuce/Downloads/we_data/test.csv')

In [5]:
payprice_train = train['payprice']
clicks_train = train['click']

In [6]:
import sys

# show how far a loop goes
def progress(count, total, status='Complete'):
    bar_len = 60
    filled_len = int(round(bar_len * count / total))
    percents = round(100.0 * count / total, 1)
    bar = '=' * filled_len + '-' * (bar_len - filled_len)
    sys.stdout.write('[%s] %s%s %s\r' % (bar, percents, '%', status))
    if count==total:
        sys.stdout.write('\n')
    sys.stdout.flush()

In [31]:
# bidprice : is a constant value
# payprice  : is a list of market prices
# clicks    : is the list of user feedback on clicks

import random

def random_evaluation (lowerbound, upperbound, payprice, clicks, budget=6250000):
    win_num = 0.
    total_cost = 0.
    click_num = 0.
    
    N = len(payprice)
    for i in range(0, N): # for each bid request
        
        bidprice = random.randint(lowerbound, upperbound) # random int bidding price
        
        if bidprice > payprice[i]:
            if total_cost + payprice[i] > budget:
                break
            else:
                win_num = win_num + 1
                total_cost = total_cost + payprice[i]
                click_num += clicks[i]
                
    if win_num == 0:
        ctr = 0
    else:
        ctr = click_num / win_num
        
    if click_num == 0:
        cpc = -1
    else:
        cpc = total_cost / click_num
        
    #print('lowerbound {}, upperbound {}, win_num {}, total_cost {}, click_num {}, ctr {}, cpc {}'.format(lowerbound, upperbound, win_num, total_cost, click_num, ctr, cpc))
    return win_num, total_cost, click_num, ctr, cpc


# Training

In [1]:
import random

In [3]:
random.randint(1,3) # test random usage

3

In [33]:
N = 25  # half interval

logs = []

for mean in range(25, 201, 25): # mean range
    
    print('Mean = ', mean)

    for i in range(1, N + 1):
        progress(i - 1, N, ) # 显示进度
        
        # i is 1/2 * (the length of interval)
        upperbound = mean + i
        lowerbound = mean - i
        
        imp, total_cost, click_num, ctr, cpc = random_evaluation(lowerbound, upperbound, payprice_train, clicks_train)
        logs.append((upperbound, lowerbound, imp, total_cost, click_num, ctr, cpc))

Mean =  25

In [35]:
df_logs = pd.DataFrame(logs)
df_logs.columns = ['lowerbound', 'upperbound', 'imp', 'totalcost', 'clicks', 'ctr', 'cpc']
df_logs[:5]

Unnamed: 0,lowerbound,upperbound,imp,totalcost,clicks,ctr,cpc
0,26,24,411730.0,6249992.0,135.0,0.000328,46296.237037
1,27,23,411826.0,6249991.0,132.0,0.000321,47348.416667
2,28,22,412852.0,6249999.0,134.0,0.000325,46641.783582
3,29,21,413637.0,6249990.0,133.0,0.000322,46992.406015
4,30,20,416654.0,6249999.0,131.0,0.000314,47709.916031


In [43]:
df_logs['mean'] = (df_logs['upperbound'] + df_logs['lowerbound']) / 2

In [44]:
df_logs.sort_values(by=['ctr','cpc'], ascending=[False, True])[:10]

Unnamed: 0,lowerbound,upperbound,imp,totalcost,clicks,ctr,cpc,mean
160,186,164,97471.0,6249960.0,69.0,0.000708,90579.130435,175.0
168,194,156,97709.0,6249998.0,69.0,0.000706,90579.681159,175.0
171,197,153,97769.0,6249870.0,69.0,0.000706,90577.826087,175.0
169,195,155,97595.0,6249928.0,68.0,0.000697,91910.705882,175.0
165,191,159,97605.0,6249966.0,68.0,0.000697,91911.264706,175.0
157,183,167,97613.0,6249984.0,68.0,0.000697,91911.529412,175.0
158,184,166,97632.0,6249869.0,68.0,0.000696,91909.838235,175.0
164,190,160,97637.0,6249981.0,68.0,0.000696,91911.485294,175.0
167,193,157,97645.0,6249973.0,68.0,0.000696,91911.367647,175.0
166,192,158,97703.0,6249989.0,68.0,0.000696,91911.602941,175.0


In [36]:
df_logs.to_csv('randomBidding_Result.csv')

# Predict

# Evaluation

# Functions

In [None]:
def evaluate(bids, data_set, budget):
    '''
    bids = dictionary (key=bidid, value=bidprice)
    data_set = path to csv
    budget = spending is capped at this value
    '''
    print('Evaluating bidding strategy:')
    df = data_set
    spent, impressions, clicks = 0, 0, 0

    n = len(df)

    payPriceErrors = 0

    for i in range(0, n):
        
        progress(i+1, n)
        
        bidid = df.bidid.values[i]
        actualBidPrice = df.bidprice.values[i]
        payPrice = df.payprice.values[i]
        clicked = df.click.values[i]
        biddedPrice = bids[bidid] # prediction
        
        if payPrice > actualBidPrice:
            payPriceErrors += 1
        #if spent+biddedPrice <= budget and payPrice < actualBidPrice:  # ensure pay price is less than bid price to remove garbage results
        elif spent+payPrice <= budget:
            if biddedPrice >= actualBidPrice:
                #print('update')
                spent += payPrice
                impressions += 1
                clicks += clicked

    print(str(payPriceErrors)+' rows ignored because payprice > bidprice')

    if impressions > 0:
        ctr = clicks / impressions
    else:
        ctr = 0

    if clicks > 0:
        cpc = spent / clicks
    else:
        cpc = 0

    print('{:<12}\t{:<12}\t{:<12}\t{:<12}\t{:<12}'.format('spent', 'impressions', 'clicks', 'ctr', 'cpc'))
    print('{:<12}\t{:<12}\t{:<12}\t{:.10f}\t{:.10f}'.format(spent, impressions, clicks, ctr, cpc))
    return spent, impressions, clicks, ctr, cpc