In [1]:
# --- importing dependencies for data wrangling, visualization, plotting and preprocessing
import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import vowpalwabbit as pyvw

from sklearn.preprocessing import LabelEncoder, OrdinalEncoder
from sklearn.metrics import *
from workalendar.europe import Netherlands, NetherlandsWithSchoolHolidays

de_calendar = Netherlands()

In [2]:
df = pd.read_csv('s3://floor-ai/ebayk-floor-ai/floor-price-data/ebay_k_floor_price_v1.csv')
df.head()

Unnamed: 0,date,adunit,day,hour,upr,yield,fill_rate
0,2022-12-01,/7233/AAX/eBay_Kleinanzeigen_X/ATF/left,1,11,0.56,0.0,0.0
1,2022-12-01,/7233/AAX/eBay_Kleinanzeigen_X/ATF/right,1,11,0.28,0.0,0.0
2,2022-12-01,/7233/AAX/eBay_Kleinanzeigen_X/BTF/belly,1,11,0.59,0.0,0.0
3,2022-12-01,/7233/AAX/eBay_Kleinanzeigen_X/feed,1,11,0.55,0.0,0.0
4,2022-12-01,/7233/AAX/eBay_Kleinanzeigen_X/feed,1,21,0.55,0.0,0.0


In [3]:
df.drop(df.loc[(df['yield'].isnull())|(df['fill_rate'].isnull())].index, axis = 0, inplace = True)
df.isnull().sum()

date         0
adunit       0
day          0
hour         0
upr          0
yield        0
fill_rate    0
dtype: int64

In [4]:
df.drop('day', axis = 1, inplace = True)

In [5]:
def get_day_num(date_column):
    day = pd.to_datetime(date_column).dt.dayofweek
    return day

df['day'] = get_day_num(df['date'])
df.head()

Unnamed: 0,date,adunit,hour,upr,yield,fill_rate,day
0,2022-12-01,/7233/AAX/eBay_Kleinanzeigen_X/ATF/left,11,0.56,0.0,0.0,3
1,2022-12-01,/7233/AAX/eBay_Kleinanzeigen_X/ATF/right,11,0.28,0.0,0.0,3
2,2022-12-01,/7233/AAX/eBay_Kleinanzeigen_X/BTF/belly,11,0.59,0.0,0.0,3
3,2022-12-01,/7233/AAX/eBay_Kleinanzeigen_X/feed,11,0.55,0.0,0.0,3
4,2022-12-01,/7233/AAX/eBay_Kleinanzeigen_X/feed,21,0.55,0.0,0.0,3


In [7]:
def get_holiday_df():
    
    holidays_2022 = pd.DataFrame(
        de_calendar.holidays(
            2022
        ), columns = [
            'date', 'holiday'
        ]
    )
    
    holidays_2023 = pd.DataFrame(
        de_calendar.holidays(
            2023
        ), columns = [
            'date', 'holiday'
        ]
    )
    
    holiday_df = pd.concat(
        [
            holidays_2022, holidays_2023
        ], axis = 0
    )
    
    holiday_df['date'] = holiday_df['date'].astype(str)
    return holiday_df


holiday_df = get_holiday_df()


def get_holiday_var(date, holiday_df):
    if str(date) in holiday_df['date'].tolist():
        return 'yes'
    else:
        return 'no'

df['is_holiday'] = df['date'].apply(lambda x: get_holiday_var(x, holiday_df))
df.head()

Unnamed: 0,date,adunit,hour,upr,yield,fill_rate,day,is_holiday
0,2022-12-01,/7233/AAX/eBay_Kleinanzeigen_X/ATF/left,11,0.56,0.0,0.0,3,no
1,2022-12-01,/7233/AAX/eBay_Kleinanzeigen_X/ATF/right,11,0.28,0.0,0.0,3,no
2,2022-12-01,/7233/AAX/eBay_Kleinanzeigen_X/BTF/belly,11,0.59,0.0,0.0,3,no
3,2022-12-01,/7233/AAX/eBay_Kleinanzeigen_X/feed,11,0.55,0.0,0.0,3,no
4,2022-12-01,/7233/AAX/eBay_Kleinanzeigen_X/feed,21,0.55,0.0,0.0,3,no


In [8]:
df.drop('date', axis = 1, inplace = True)
df.head()

Unnamed: 0,adunit,hour,upr,yield,fill_rate,day,is_holiday
0,/7233/AAX/eBay_Kleinanzeigen_X/ATF/left,11,0.56,0.0,0.0,3,no
1,/7233/AAX/eBay_Kleinanzeigen_X/ATF/right,11,0.28,0.0,0.0,3,no
2,/7233/AAX/eBay_Kleinanzeigen_X/BTF/belly,11,0.59,0.0,0.0,3,no
3,/7233/AAX/eBay_Kleinanzeigen_X/feed,11,0.55,0.0,0.0,3,no
4,/7233/AAX/eBay_Kleinanzeigen_X/feed,21,0.55,0.0,0.0,3,no


In [9]:
def get_weekend_var(day_num_col):
    is_weekend = day_num_col.apply(
        lambda x: 'yes' if x > 5 else 'no'
    )
    return is_weekend

df['is_weekend'] = get_weekend_var(df['day'])
df.head()

Unnamed: 0,adunit,hour,upr,yield,fill_rate,day,is_holiday,is_weekend
0,/7233/AAX/eBay_Kleinanzeigen_X/ATF/left,11,0.56,0.0,0.0,3,no,no
1,/7233/AAX/eBay_Kleinanzeigen_X/ATF/right,11,0.28,0.0,0.0,3,no,no
2,/7233/AAX/eBay_Kleinanzeigen_X/BTF/belly,11,0.59,0.0,0.0,3,no,no
3,/7233/AAX/eBay_Kleinanzeigen_X/feed,11,0.55,0.0,0.0,3,no,no
4,/7233/AAX/eBay_Kleinanzeigen_X/feed,21,0.55,0.0,0.0,3,no,no


In [10]:
df['yield'] = df['yield'] * df['fill_rate']
df.drop('fill_rate', axis = 1, inplace = True)
df.head()

Unnamed: 0,adunit,hour,upr,yield,day,is_holiday,is_weekend
0,/7233/AAX/eBay_Kleinanzeigen_X/ATF/left,11,0.56,0.0,3,no,no
1,/7233/AAX/eBay_Kleinanzeigen_X/ATF/right,11,0.28,0.0,3,no,no
2,/7233/AAX/eBay_Kleinanzeigen_X/BTF/belly,11,0.59,0.0,3,no,no
3,/7233/AAX/eBay_Kleinanzeigen_X/feed,11,0.55,0.0,3,no,no
4,/7233/AAX/eBay_Kleinanzeigen_X/feed,21,0.55,0.0,3,no,no


In [11]:
def get_probability(df):
    
    contexts = [
        'adunit', 'hour', 'day', 
        'is_holiday', 'is_weekend'  
    ]
    
    contexts_with_upr = [
        'adunit', 'hour', 'day', 
        'is_holiday', 'is_weekend',
        'upr'
    ]
    
    temp_1 = df.groupby(contexts_with_upr).upr.transform('count')
    temp_2 = df.groupby(contexts).upr.transform('count')
    
    action_probability = temp_1/temp_2
    return action_probability

df['action_probability'] = get_probability(df)
df.head()

Unnamed: 0,adunit,hour,upr,yield,day,is_holiday,is_weekend,action_probability
0,/7233/AAX/eBay_Kleinanzeigen_X/ATF/left,11,0.56,0.0,3,no,no,0.25
1,/7233/AAX/eBay_Kleinanzeigen_X/ATF/right,11,0.28,0.0,3,no,no,1.0
2,/7233/AAX/eBay_Kleinanzeigen_X/BTF/belly,11,0.59,0.0,3,no,no,1.0
3,/7233/AAX/eBay_Kleinanzeigen_X/feed,11,0.55,0.0,3,no,no,0.5
4,/7233/AAX/eBay_Kleinanzeigen_X/feed,21,0.55,0.0,3,no,no,0.363636


In [23]:
df.head(10)

Unnamed: 0,adunit,hour,upr,yield,day,is_holiday,is_weekend,action_probability
0,/7233/AAX/eBay_Kleinanzeigen_X/ATF/left,11,0.56,0.0,3,no,no,0.25
1,/7233/AAX/eBay_Kleinanzeigen_X/ATF/right,11,0.28,0.0,3,no,no,1.0
2,/7233/AAX/eBay_Kleinanzeigen_X/BTF/belly,11,0.59,0.0,3,no,no,1.0
3,/7233/AAX/eBay_Kleinanzeigen_X/feed,11,0.55,0.0,3,no,no,0.5
4,/7233/AAX/eBay_Kleinanzeigen_X/feed,21,0.55,0.0,3,no,no,0.363636
5,/7233/eBay_Kleinanzeigen_X/ATF/Header,0,0.58,0.0,3,no,no,1.0
6,/7233/eBay_Kleinanzeigen_X/ATF/Header,0,0.58,0.001206,3,no,no,1.0
7,/7233/eBay_Kleinanzeigen_X/ATF/Header,0,0.58,0.001643,3,no,no,1.0
8,/7233/eBay_Kleinanzeigen_X/ATF/Header,0,0.58,0.00116,3,no,no,1.0
9,/7233/eBay_Kleinanzeigen_X/ATF/Header,0,0.58,0.003558,3,no,no,1.0


In [54]:
import warnings
warnings.filterwarnings("ignore")

def get_adunit_values(adunit):
    adunit_list = adunit.split('/')[1:]
    
    if len(adunit_list) == 5:
        page_type, ad_position = adunit_list[-2], adunit_list[-1]
        return page_type, ad_position
    
    if len(adunit_list)  <= 4:
        page_type, ad_position = adunit_list[-2], adunit_list[-1]
        if page_type.startswith('AT')\
                or page_type.startswith('BT')\
                or page_type.startswith('AA') == True:
            page_type = page_type
        else:
            page_type = 'not_available'
            
        norm_ad_pos = [
            'left', 'feed', 'right', 'bottom',
            'gallery', 'top', 'partnership', 'right1',
            'header', 'belly', 'map', 'mid', 'bot', 'img',
            ]
        
        if ad_position.lower() in norm_ad_pos:
            ad_position = ad_position
        else:
            ad_position = 'not_available'
            
        return page_type, ad_position
            
            
result = df['adunit'].apply(lambda x: get_adunit_values(x))
result = result.apply(pd.Series)

prep_df = pd.concat(
    [
        df, result.rename(
            columns = {
                0:'page_type', 
                1:'ad_position'
            }
        )
    ], axis = 1
)

prep_df.head()

Unnamed: 0,adunit,hour,upr,yield,day,is_holiday,is_weekend,action_probability,page_type,ad_position
0,/7233/AAX/eBay_Kleinanzeigen_X/ATF/left,11,0.56,0.0,3,no,no,0.25,ATF,left
1,/7233/AAX/eBay_Kleinanzeigen_X/ATF/right,11,0.28,0.0,3,no,no,1.0,ATF,right
2,/7233/AAX/eBay_Kleinanzeigen_X/BTF/belly,11,0.59,0.0,3,no,no,1.0,BTF,belly
3,/7233/AAX/eBay_Kleinanzeigen_X/feed,11,0.55,0.0,3,no,no,0.5,not_available,feed
4,/7233/AAX/eBay_Kleinanzeigen_X/feed,21,0.55,0.0,3,no,no,0.363636,not_available,feed


In [60]:
ordinal_encoder = OrdinalEncoder()
label_encoder = LabelEncoder()

ordinal_cols = [
    'page_type', 'ad_position'
]

prep_df[ordinal_cols] = ordinal_encoder.fit_transform(prep_df[ordinal_cols])
prep_df['upr'] = label_encoder.fit_transform(prep_df['upr'])

In [61]:
prep_df

Unnamed: 0,adunit,hour,upr,yield,day,is_holiday,is_weekend,action_probability,page_type,ad_position
0,/7233/AAX/eBay_Kleinanzeigen_X/ATF/left,11,34,0.000000,3,no,no,0.250000,0.0,5.0
1,/7233/AAX/eBay_Kleinanzeigen_X/ATF/right,11,6,0.000000,3,no,no,1.000000,0.0,8.0
2,/7233/AAX/eBay_Kleinanzeigen_X/BTF/belly,11,37,0.000000,3,no,no,1.000000,1.0,1.0
3,/7233/AAX/eBay_Kleinanzeigen_X/feed,11,33,0.000000,3,no,no,0.500000,2.0,3.0
4,/7233/AAX/eBay_Kleinanzeigen_X/feed,21,33,0.000000,3,no,no,0.363636,2.0,3.0
...,...,...,...,...,...,...,...,...,...,...
1064206,/7233/eBay_Kleinanzeigen_X/BTF/left,8,15,0.000173,1,no,no,0.204936,1.0,5.0
1064207,/7233/eBay_Kleinanzeigen_X/feed,15,33,0.000414,6,no,yes,0.791822,2.0,3.0
1064208,/7233/eBay_Kleinanzeigen_X/BTF/bottom,19,53,0.000466,6,yes,yes,0.245989,1.0,2.0
1064209,/7233/eBay_Kleinanzeigen_X/ATF/top,9,24,0.000520,1,no,no,0.437996,0.0,9.0


In [62]:
original_adunits = prep_df['adunit']

prep_df.drop('adunit', axis = 1, inplace = True)
prep_df.head()

Unnamed: 0,hour,upr,yield,day,is_holiday,is_weekend,action_probability,page_type,ad_position
0,11,34,0.0,3,no,no,0.25,0.0,5.0
1,11,6,0.0,3,no,no,1.0,0.0,8.0
2,11,37,0.0,3,no,no,1.0,1.0,1.0
3,11,33,0.0,3,no,no,0.5,2.0,3.0
4,21,33,0.0,3,no,no,0.363636,2.0,3.0


In [63]:
agent = pyvw.Workspace(f"--cb {prep_df['upr'].nunique()}", quiet = True)


def train(agent, df):
    target_cols = [
        'upr', 
        'yield', 
        'action_probability'
    ]
    for row in range(len(df)):
    
        action = df['upr'].iloc[row]
        cost = -np.round(float(df['yield'].iloc[row]), 4)
        probability = df['action_probability'].iloc[row]
        features = ' '.join(
            [
                str(feature) for feature in df.iloc[row].drop(target_cols)
            ]
        )
        learning_example = (
            str(action) + ':' + str(cost) + ':' + str(probability) + ' | ' + features
        )
        
        agent.learn(learning_example)
    return agent

model = train(agent, prep_df)

In [65]:
model.save("model/floor_ai.model")

In [66]:
import pickle as pkl


def export_object(object_, file_name):
    
    with open(file_name, 'wb') as file:
        pkl.dump(
            object_, 
            file, 
            protocol = pkl.HIGHEST_PROTOCOL
        )
    print(
        f'object {object_} has been saved as {file_name}'
    )
    
    
def import_object(file_name):
    
    with open(file_name, 'rb') as file:
        object_ = pkl.load(file)
    print(
        f'object {object_} has been loaded successfully'
    )    
    return object_

In [67]:
export_object(label_encoder, 'model/label_encoder.pkl')

object LabelEncoder() has been saved as model/label_encoder.pkl


In [68]:
export_object(ordinal_encoder, 'model/ordinal_encoder.pkl')

object OrdinalEncoder() has been saved as model/ordinal_encoder.pkl


In [2]:
import pandas as pd
pd.read_csv('s3a://floor-ai/ebayk-floor-ai/floor-price-data/training_data_january_june.csv')

Unnamed: 0,date,adunit,day,hour,upr,yield,fill_rate
0,2022-01-27,/7233/AAX/eBay_Kleinanzeigen_X/feed,6,19,0.49,0.003348,0.09206
1,2022-01-27,/7233/AAX/eBay_Kleinanzeigen_X/feed,6,19,0.49,0.000000,0.09206
2,2022-01-27,/7233/AAX/eBay_Kleinanzeigen_X/feed,6,19,0.49,0.000547,0.09206
3,2022-01-27,/7233/AAX/eBay_Kleinanzeigen_X/feed,6,19,0.49,0.000650,0.09206
4,2022-01-27,/7233/AAX/eBay_Kleinanzeigen_X/feed,6,19,0.49,0.001065,0.09206
...,...,...,...,...,...,...,...
9919995,2022-06-12,/7233/AAX/eBay_Kleinanzeigen_X/BTF/belly,5,0,1.10,0.001345,0.08578
9919996,2022-06-12,/7233/AAX/eBay_Kleinanzeigen_X/BTF/belly,5,0,1.10,0.001808,0.08578
9919997,2022-06-12,/7233/AAX/eBay_Kleinanzeigen_X/BTF/belly,5,0,1.10,0.002569,0.08578
9919998,2022-06-12,/7233/AAX/eBay_Kleinanzeigen_X/BTF/belly,5,0,1.10,0.004579,0.08578


In [4]:
import vowpalwabbit as pyvw
model = pyvw.Workspace(
    "--cb 108 -i model/floor_ai.model", 
    quiet=True
)

In [2]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

test_df = pd.DataFrame(
    {
        'key':['boy', 'girl', 'boy', 'girl', 'boy', 'girl', 'boy', 'girl']
    }
)

test_df

Unnamed: 0,key
0,boy
1,girl
2,boy
3,girl
4,boy
5,girl
6,boy
7,girl


In [3]:
label_encoder = LabelEncoder()

label_encoder.fit(test_df['key'].unique())

LabelEncoder()

In [4]:
label_encoder.transform(test_df['key'])

array([0, 1, 0, 1, 0, 1, 0, 1])