In [1]:
import os
import json

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import shap
import xgboost as xgb
import pickle
import scipy

from connections.db_class import RedshiftDB
from connections.db_config import rs_de_conn



In [2]:
os.chdir('../')

In [3]:
con = RedshiftDB(rs_de_conn)

In [4]:
model_features = ['product_id',
 'search_type_id',
 'average_published_price',
 'advance_purchase_days',
 'number_of_nights',
 'number_of_adults',
 'number_of_children',
 'rating',
 'stars',
 'srq_hotel_distance',
 'previous_user_hotel_interaction',
 'srq_price_zscore',
 'srq_spread_zscore',
 'srq_distance_zscore',
 'srq_rating_zscore',
 'srq_stars_zscore',
 'user_price_diff',
 'user_stars_diff',
 'user_rating_diff',
 'region_distance_diff',
 'region_cumulative_bookings',
 'region_meanprice_diff',
 'region_rating_diff',
 'region_stars_diff',
 'hotel_cumulative_bookings',
 'hotel_cumulative_share',
 'user_spread_diff',
 'bayesian_refundability_rate',
 'bayesian_refundability_premium']

In [5]:
data = con.read_sql("""
WITH variants AS
(
SELECT DISTINCT session_id, variant
FROM experiment_assignment
WHERE experiment_name = 'Config:tw_sr_smart-rank-api'
    OR experiment_name = 'Config:testarossa.experiment.tw_sr.search.result.ranking.api_ppw.enabled'
    OR experiment_name = 'Config:testarossa.experiment.tw_sr.search.result.ranking.api.enabled'
)
SELECT r.search_request_id, 
    {},
    r.is_popawi_1,
    r.label,
    v.variant
FROM search_result_ranking_qa.ranking_features_all r
JOIN variants v 
    ON v.session_id = r.session_id
WHERE r.display_rank <= 3
    AND variant != 'dual'
""".format(', '.join(f for f in model_features if f != 'popawi')))

In [6]:
data['variant'].value_counts()

alternative    831958
original       816523
Name: variant, dtype: int64

In [7]:
data['label'].value_counts()

0    1448166
1     146630
2      40441
3      13244
Name: label, dtype: int64

# Ability of direct method to measure on-policy

In [8]:
from sklearn.linear_model import LinearRegression

In [9]:
# split into samples
def get_value(df, model, model_type):
    true_value = df['label'].mean()
    if model_type in {'boost', 'lambda', 'rf'}:
        pred_value = model.predict(df[model_features].astype('float'))
    if model_type == 'lm':
        pred_value = model.predict(df[model_features].astype('float').fillna(0))
    pred_value = np.where(pred_value > 4, 4, np.where(pred_value < 0, 0, pred_value))
    bias = (true_value - pred_value).mean()
    rmse = np.sqrt(np.mean((true_value - pred_value) ** 2))
    return true_value, bias, rmse

def off_policy_estimate(v0, v1, model_type, name):
    bias_v0s = []
    bias_v1s = []
    true_value_v0s = []
    true_value_v1s = []
    rmse_v0s = []
    rmse_v1s = []
    for i in range(10):
        v0_train_requests = v0['search_request_id'].drop_duplicates().sample(frac=0.5)
        v0_train = v0[v0['search_request_id'].isin(v0_train_requests)].sort_values('search_request_id')
        v0_test = v0[~v0['search_request_id'].isin(v0_train_requests)]

        # train model
        if model_type == 'boost':
            model = xgb.XGBRegressor(n_estimators=50)
            model.fit(v0_train[model_features].astype('float'), v0_train['label'])
        if model_type == 'lm':
            model = LinearRegression()
            model.fit(v0_train[model_features].astype('float').fillna(0), v0_train['label'])
        if model_type == 'lambda':
            model = xgb.XGBRanker(n_estimators=50)
            model.fit(v0_train[model_features].astype('float'), 
                      v0_train['label'], 
                      v0_train['search_request_id'].value_counts(sort=False).sort_index())
        if model_type == 'rf':
            model = xgb.XGBRFRegressor(n_estimators=50)
            model.fit(v0_train[model_features].astype('float'), v0_train['label'])

        # predict and evaluate on-policy value
        true_value_v0, bias_v0, rmse_v0 = get_value(v0_test, model, model_type)
        bias_v0s.append(bias_v0)
        true_value_v0s.append(true_value_v0)
        rmse_v0s.append(rmse_v0)

        # predict and evaluate off-policy value
        true_value_v1, bias_v1, rmse_v1 = get_value(v1, model, model_type)
        bias_v1s.append(bias_v1)
        true_value_v1s.append(true_value_v1)
        rmse_v1s.append(rmse_v1)
        
    results = {}
    results['Name'] = name
    results['On Policy Value'] = np.mean(true_value_v0s)
    results['Off Policy Value'] = np.mean(true_value_v1s)
    results['On Policy Bias'] = np.mean(bias_v0s)
    results['Off Policy Bias'] = np.mean(bias_v1s)
    results['On Policy Std'] = np.std(bias_v0s)    
    results['Off Policy Std'] = np.std(bias_v1s)
    results['On Policy RMSE'] = np.mean(rmse_v0s)
    results['Off Policy RMSE'] = np.mean(rmse_v1s)
        
    return results

In [10]:
v0 = data[data['variant'] == 'original']
v1 = data[data['variant'] == 'alternative']

In [11]:
performance = []
performance.append(off_policy_estimate(v0, v1, 'boost', 'OP - Boosting'))
performance.append(off_policy_estimate(v0, v1, 'lm', 'OP - Linear Regression'))
performance.append(off_policy_estimate(v0, v1, 'lambda', 'OP - LambdaMART'))
performance.append(off_policy_estimate(v0, v1, 'rf', 'OP - Random Forest'))
performance.append(off_policy_estimate(v1, v0, 'boost', 'NP - Boosting'))
performance.append(off_policy_estimate(v1, v0, 'lm', 'NP - Linear Regression'))
performance.append(off_policy_estimate(v1, v0, 'lambda', 'NP - LambdaMART'))
performance.append(off_policy_estimate(v1, v0, 'rf', 'NP - Random Forest'))

Pass `group` as keyword args.  Passing these as positional arguments will be considered as error in future releases.
Pass `group` as keyword args.  Passing these as positional arguments will be considered as error in future releases.


In [22]:
performance_df = pd.DataFrame(performance)

In [26]:
print(performance_df[['Name', 'On Policy Value', 'On Policy Bias', 'On Policy Std', 'On Policy RMSE']].to_latex())

\begin{tabular}{llrrrr}
\toprule
{} &                           Name &  On Policy Value &  On Policy Bias &  On Policy Std &  On Policy RMSE \\
\midrule
0 &                  OP - Boosting &         0.137233 &       -0.000772 &       0.000707 &        0.165774 \\
1 &         OP - Linear Regression &         0.137395 &       -0.000399 &       0.001017 &        0.132199 \\
2 &                OP - LambdaMART &         0.137365 &       -0.298251 &       0.002409 &        0.555895 \\
3 &  OP - Random Forest Regression &         0.137157 &        0.000016 &       0.000849 &        0.143139 \\
4 &                  NP - Boosting &         0.186631 &       -0.000270 &       0.000734 &        0.184320 \\
5 &         NP - Linear Regression &         0.186394 &       -0.000720 &       0.000991 &        0.146489 \\
6 &                NP - LambdaMART &         0.186658 &       -0.250004 &       0.003538 &        0.576260 \\
7 &             NP - Random Forest &         0.186704 &        0.000409 &    

In [27]:
print(performance_df[['Name', 'Off Policy Value', 'Off Policy Bias', 'Off Policy Std', 'Off Policy RMSE']].to_latex())

\begin{tabular}{llrrrr}
\toprule
{} &                           Name &  Off Policy Value &  Off Policy Bias &  Off Policy Std &  Off Policy RMSE \\
\midrule
0 &                  OP - Boosting &          0.186435 &        -0.009941 &        0.001531 &         0.212893 \\
1 &         OP - Linear Regression &          0.186435 &         0.002677 &        0.000688 &         0.172784 \\
2 &                OP - LambdaMART &          0.186435 &        -0.442453 &        0.004265 &         0.699071 \\
3 &  OP - Random Forest Regression &          0.186435 &        -0.006005 &        0.001467 &         0.191687 \\
4 &                  NP - Boosting &          0.137336 &        -0.010458 &        0.001054 &         0.158688 \\
5 &         NP - Linear Regression &          0.137336 &        -0.012108 &        0.000564 &         0.116803 \\
6 &                NP - LambdaMART &          0.137336 &        -0.115359 &        0.004427 &         0.418053 \\
7 &             NP - Random Forest &         