In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import tensorflow as tf

import local_pkg_path
import counterfactual_evaluation as cfeval
from counterfactual_evaluation.propensity_estimators import ContextualPositionBasedModel

Read train and test data

In [2]:
fold = 'Fold1'

In [3]:
sim_exp_train_vali_rankings = pd.read_csv(f'../build/simulation/{fold}/sim_exp_train_vali_rankings.csv')
nqids = sim_exp_train_vali_rankings['qid'].nunique()
unique_qids = sim_exp_train_vali_rankings['qid'].unique()

In [4]:
propensity_test_features = pd.read_csv(f'../build/simulation/{fold}/examination_test.csv')
propensity_test_features['rank'] = propensity_test_features.groupby('qid').cumcount()

In [5]:
ltr_test_features = pd.read_csv(f'../build/simulation/{fold}/examination_features.csv')

In [6]:
with open(f'../build/simulation/{fold}/examination_fc_names.pkl', 'rb') as f:
    print(f'Loading examination feature columns')
    examination_fc = pickle.load(f)

Loading examination feature columns


In [7]:
examination_fc

array(['53', '54', '63', '99', '103', '106', '108', '126', '129', '133'],
      dtype='<U3')

In [8]:
propensity_test_features.head()

Unnamed: 0,qid,y,53,54,63,99,103,106,108,126,129,133,rank
0,1,1.0,-0.471854,-0.24279,-0.672084,0.607835,-0.814787,3.742025,-0.740711,-0.140762,-0.822696,-0.036124,0
1,1,1.0,-0.471854,0.188449,-0.184864,0.607835,0.492574,-0.373532,0.197622,0.863501,1.539698,-0.036124,1
2,1,1.0,-0.471854,-0.467668,-0.672084,0.607835,-0.814787,-0.373532,-0.740711,2.096005,-0.61493,-0.036124,2
3,1,0.0,-0.471854,-0.330532,-0.672084,0.607835,-0.814787,-0.373532,-0.740711,0.498314,-0.953782,-0.036124,3
4,1,0.0,-0.471854,-0.247741,-0.672084,0.607835,-0.814787,-0.373532,-0.740711,0.589611,-0.895698,-0.036124,4


In [9]:
ltr_test_features.head()

Unnamed: 0,partition,53,54,63,99,103,106,108,126,129,133,qd_id
0,train,1.976927,0.453423,1.764048,0.607835,1.511765,-0.373532,1.048441,-0.4603,-0.809199,-0.029974,0
1,train,1.364728,1.090312,1.155011,0.607835,1.511765,1.802201,0.800594,0.178776,-0.705149,-0.036124,1
2,train,1.364728,1.103194,1.155011,0.607835,1.511765,-0.373532,0.800594,0.133128,-0.602185,-0.036124,2
3,train,1.627095,0.574641,1.416023,0.607835,1.511765,2.97195,0.915295,0.08748,-0.033838,-0.036124,3
4,train,1.160662,0.128851,0.951999,0.607835,1.511765,3.023082,0.700753,0.270073,-0.008724,-0.036124,4


In [10]:
propensity_test_features['rank'] = propensity_test_features.groupby('qid').cumcount()

In [11]:
def train_and_test(graph, train_features, pos_labels, neg_labels, test_features, model_dir, 
                   epochs=500, nfeatures=10, nranks=10, examination_nhidden_layers=10, relevance_nhidden_layers=10):
    with tf.Session(graph=graph) as sess:
        model = ContextualPositionBasedModel(nfeatures, examination_nhidden_layers, relevance_nhidden_layers, nranks, tf.train.AdamOptimizer(learning_rate=0.05))
        saver = tf.compat.v1.train.Saver(
            tf.compat.v1.global_variables(), 
            pad_step_number=True, 
            keep_checkpoint_every_n_hours=1.0)
        sess.run(tf.compat.v1.global_variables_initializer())
        # train
        for epoch in range(epochs):
            train_loss, _ = sess.run([model.loss, model.train_op],
                                     feed_dict={model.features: train_features, model.y_pos: pos_labels, model.y_neg: neg_labels})
            if epoch % 10 == 0:
                tf.compat.v1.logging.info(f'Epoch: {epoch}; Loss: {train_loss}')
                saver.save(sess, f'{model_dir}/model.ckpt', global_step=tf.compat.v1.train.get_or_create_global_step())
        # test
        saver.restore(sess, tf.compat.v1.train.latest_checkpoint(model_dir))
        test_results = []
        for test_feature in test_features:
            propensity_normed, click_probabilities = sess.run([model.norm_p_logits, model.logits], feed_dict={model.features: test_feature})
            pos_1_k, pos_k_1 = click_probabilities[:, 0, :], click_probabilities[:, :, 0]
            test_results.append([pd.DataFrame(propensity_normed[:, 1:]), pd.DataFrame(pos_1_k), pd.DataFrame(pos_k_1)])
        return test_results

In [12]:
test_features = [propensity_test_features[examination_fc].values, ltr_test_features[examination_fc].values]

Create model dir

In [13]:
if not Path(f'../build/simulation/{fold}/cpbm_models').exists():
    Path(f'../build/simulation/{fold}/cpbm_models').mkdir(parents=True)

In [None]:
test_propensity_results = []
test_ltr_results = []
for avg_clicks in [5, 10, 25, 50]:
    for nq in [nqids // i for i in [100, 10, 2, 1]]:
        g = tf.Graph()
        print(f'Loading train clicks with {avg_clicks} sessions, {nq} queries')
        train_click = pd.read_csv(f'../build/simulation/{fold}/sim_exp_swap_cpbm_train_clicks_{avg_clicks}_{nq}.csv')
        train_click['intervention_rank_idx'] = train_click['intervention_rank_idx'].astype(int)
        # preprocess input features and labels
        print('preprocess input features and labels')
        nclicks = train_click['click_idx'].max() + 1
        max_rank = 10
        pos_labels = np.zeros((nclicks, max_rank, max_rank))
        neg_labels = np.zeros((nclicks, max_rank, max_rank))
        pos_labels[train_click['click_idx'].values, train_click['rank_idx'].values, train_click['intervention_rank_idx'].values] = train_click['inverse_frequency_weighted_pos_click']
        neg_labels[train_click['click_idx'].values, train_click['rank_idx'].values, train_click['intervention_rank_idx'].values] = train_click['inverse_frequency_weighted_neg_click']
        train_click_features = train_click.drop_duplicates(subset=['click_idx']).sort_values('click_idx').merge(ltr_test_features, how='left', on=['partition', 'qd_id'])
        train_features = train_click_features[examination_fc].values
        model_dir = f'../build/simulation/{fold}/cpbm_models/cpbm_{avg_clicks}_{nq}'
        
        # train and test
        print(f'train and test nsess: {avg_clicks}, nqueries: {nq}')
        
        test_sets_res = train_and_test(g, train_features, pos_labels, neg_labels, test_features, model_dir, epochs=1000)
        # propensity results
        _, test_1_k, test_k_1 = test_sets_res[0]
        test_meta = propensity_test_features[['y', 'qid', 'rank']].copy()
        test_meta[[x for x in range(1, 11)]] = test_1_k
        test_meta['avg_clicks'] = avg_clicks
        test_meta['nqueries'] = nq
        test_1_k = test_meta.melt(id_vars=['qid', 'y', 'rank', 'avg_clicks', 'nqueries'], value_vars=[x for x in range(2, 11)], 
                                  var_name='exam_position', value_name='pair_1_k')
        test_meta = propensity_test_features[['y', 'qid', 'rank']].copy()
        test_meta[[x for x in range(1, 11)]] = test_k_1
        test_meta['avg_clicks'] = avg_clicks
        test_meta['nqueries'] = nq
        test_k_1 = test_meta.melt(id_vars=['qid', 'y', 'rank', 'avg_clicks', 'nqueries'], value_vars=[x for x in range(2, 11)], 
                                  var_name='exam_position', value_name='pair_k_1')
        merged = test_1_k.merge(test_k_1)
        test_propensity_results.append(merged)
        
        # save test results
        print('saving test results')
        test_propensity_normed, _, _ = test_sets_res[1]
        test_meta = ltr_test_features[['partition', 'qd_id']].copy()
        test_meta[[str(x) for x in range(2, 11)]] = test_propensity_normed
        test_res = test_meta.melt(id_vars=['partition', 'qd_id'], value_vars=[str(x) for x in range(2, 11)], 
                                  var_name='exam_position', value_name='cpbm_propensity_ratio_hat')
        test_res['avg_clicks'] = avg_clicks
        test_res['nqueries'] = nq
        test_ltr_results.append(test_res)
test_propensity_results = pd.concat(test_propensity_results, ignore_index=True)
test_ltr_results = pd.concat(test_ltr_results, ignore_index=True)

In [16]:
test_ltr_results.head()

Unnamed: 0,partition,qd_id,exam_position,cpbm_propensity_ratio_hat,avg_clicks,nqueries
0,train,0,2,0.324793,5,3
1,train,1,2,0.338517,5,3
2,train,2,2,0.325198,5,3
3,train,3,2,0.360588,5,3
4,train,4,2,0.392882,5,3


In [19]:
test_propensity_results.head()

Unnamed: 0,qid,y,rank,avg_clicks,nqueries,exam_position,pair_1_k,pair_k_1
0,1,1.0,0,5,3,2,0.742119,0.425353
1,1,1.0,1,5,3,2,0.999298,0.349601
2,1,1.0,2,5,3,2,0.947938,0.403232
3,1,0.0,3,5,3,2,0.971236,0.395866
4,1,0.0,4,5,3,2,0.974597,0.394374


In [20]:
test_propensity_results['cpbm_tau'] = test_propensity_results['pair_1_k'] - test_propensity_results['pair_k_1']

In [21]:
test_propensity_results.to_csv(f'../build/simulation/{fold}/sim_exp_swap_cpbm_propensity_test_results.csv', index=False)

In [22]:
test_ltr_results['inverse_cpbm_propensity_ratio_hat'] = 1 / test_ltr_results['cpbm_propensity_ratio_hat']

In [23]:
test_ltr_results['inverse_cpbm_propensity_ratio_hat'] = test_ltr_results['inverse_cpbm_propensity_ratio_hat'].replace(np.inf, 1e6)

In [24]:
test_ltr_results.to_csv(f'../build/simulation/{fold}/sim_exp_swap_cpbm_ltr_test_results.csv', index=False)

### Clipped IPS

In [25]:
fold = 'Fold1'

In [26]:
test_ltr_results = pd.read_csv(f'../build/simulation/{fold}/sim_exp_swap_cpbm_ltr_test_results.csv')

In [27]:
test_ltr_results.head()

Unnamed: 0,partition,qd_id,exam_position,cpbm_propensity_ratio_hat,avg_clicks,nqueries,inverse_cpbm_propensity_ratio_hat
0,train,0,2,0.324793,5,3,3.07888
1,train,1,2,0.338517,5,3,2.954066
2,train,2,2,0.325198,5,3,3.075051
3,train,3,2,0.360588,5,3,2.773247
4,train,4,2,0.392882,5,3,2.545296


In [28]:
test_ltr_results['cpbm_propensity_ratio_hat'].describe()

count    547632.000000
mean          0.453289
std           0.280874
min           0.000273
25%           0.239423
50%           0.446032
75%           0.650167
max           1.614855
Name: cpbm_propensity_ratio_hat, dtype: float64

In [29]:
test_ltr_results['inverse_cpbm_propensity_ratio_hat'].describe()

count    547632.000000
mean         46.705738
std         274.679558
min           0.619251
25%           1.538066
50%           2.241990
75%           4.176707
max        3659.505900
Name: inverse_cpbm_propensity_ratio_hat, dtype: float64

In [30]:
threshold = 0.01

In [31]:
(test_ltr_results['inverse_cpbm_propensity_ratio_hat'] >= 1 / threshold).value_counts(normalize=True)

False    0.960088
True     0.039912
Name: inverse_cpbm_propensity_ratio_hat, dtype: float64

In [32]:
(test_ltr_results['inverse_cpbm_propensity_ratio_hat'] <= 1 / (1 - threshold)).value_counts(normalize=True)

False    0.966956
True     0.033044
Name: inverse_cpbm_propensity_ratio_hat, dtype: float64

In [33]:
test_ltr_results.loc[(test_ltr_results['inverse_cpbm_propensity_ratio_hat'] >= 1 / threshold), 'inverse_cpbm_propensity_ratio_hat'] = 1 / threshold

In [34]:
test_ltr_results['inverse_cpbm_propensity_ratio_hat'].describe()

count    547632.000000
mean          9.068006
std          21.393802
min           0.619251
25%           1.538066
50%           2.241990
75%           4.176707
max         100.000000
Name: inverse_cpbm_propensity_ratio_hat, dtype: float64

In [35]:
test_ltr_results.to_csv(f'../build/simulation/{fold}/sim_exp_swap_cpbm_clipped_ips_ltr_test_results.csv', index=False)