In [1]:
import numpy as np
import pandas as pd
import pickle

from xgboost import XGBRegressor
from causalml.inference.meta import BaseXRegressor
from causalml.dataset import synthetic_data

The sklearn.utils.testing module is  deprecated in version 0.22 and will be removed in version 0.24. The corresponding classes / functions should instead be imported from sklearn.utils. Anything that cannot be imported from sklearn.utils is now part of the private API.


Read train and test data

In [2]:
fold = 'Fold3'

Total number of queries

In [3]:
sim_exp_train_vali_rankings = pd.read_csv(f'../build/simulation/{fold}/sim_exp_train_vali_rankings.csv')
nqids = sim_exp_train_vali_rankings['qid'].nunique()
unique_qids = sim_exp_train_vali_rankings['qid'].unique()

Test Data for evaluations of propensity accuracy

In [4]:
propensity_test_features = pd.read_csv(f'../build/simulation/{fold}/examination_test.csv')
propensity_test_features['rank'] = propensity_test_features.groupby('qid').cumcount()

Examination context feature for LTR

In [5]:
ltr_test_features = pd.read_csv(f'../build/simulation/{fold}/examination_features.csv')

Examination feature columns

In [6]:
with open(f'../build/simulation/{fold}/examination_fc_names.pkl', 'rb') as f:
    print(f'Loading examination feature columns')
    examination_fc = pickle.load(f)

Loading examination feature columns


In [7]:
examination_fc

array(['13', '38', '53', '63', '64', '88', '107', '126', '129', '133'],
      dtype='<U3')

In [8]:
propensity_test_features.head()

Unnamed: 0,qid,y,13,38,53,63,64,88,107,126,129,133,rank
0,1,1.0,-1.051746,-0.804303,-0.537913,-0.734882,-0.837864,-0.738388,-1.217684,-0.81953,-0.112918,-0.015019,0
1,1,1.0,-0.501171,1.30828,1.787716,1.619355,2.332349,0.743675,-1.217684,-0.357546,-0.692904,-0.015019,1
2,1,1.0,0.599979,1.30828,0.857462,0.677657,0.430221,0.743675,-1.217684,1.574389,-0.67061,-0.015019,2
3,1,0.0,0.875267,-0.804303,-0.537913,-0.734882,-0.784275,-0.738388,-1.217684,0.608421,-0.696357,-0.015019,3
4,1,0.0,-1.051746,-0.804303,-0.537913,-0.734882,0.776053,-0.738388,-0.584129,-0.52554,-0.695462,-0.015019,4


In [9]:
ltr_test_features.head()

Unnamed: 0,partition,13,38,53,63,64,88,107,126,129,133,qd_id
0,train,0.875267,0.251989,-0.537913,-0.092812,0.446275,0.041744,-0.371683,0.986409,-0.182869,-0.015019,0
1,train,0.599979,0.251989,-0.537913,-0.028612,0.619698,0.041744,0.022067,0.398429,-0.678922,-0.015019,1
2,train,1.976418,-0.804303,-0.537913,-0.734882,0.517947,-0.738388,-0.335721,1.322397,0.206786,-0.015019,2
3,train,-0.501171,0.251989,-0.537913,0.442229,0.692588,0.041744,-0.174736,-0.693534,2.066825,0.000475,3
4,train,-1.051746,0.251989,-0.537913,1.030792,0.747242,0.041744,-0.174736,-1.239516,2.08085,-0.015019,4


In [10]:
for avg_click in [5, 10, 25, 50]:
    for nq in [nqids // i for i in [100, 10, 2, 1]]:
        data = pd.read_csv(f'../build/simulation/{fold}/sim_exp_swap_causal_forests_train_clicks_{avg_click}_{nq}.csv')
        propensity_test_results = []
        ltr_test_results = []
        for i in range(2, 11):
            train_pairs = data[data['treatment_group'] == i].copy()
            # X learner
            learner_x = BaseXRegressor(learner=XGBRegressor())
            learner_x.fit(train_pairs[examination_fc].values, train_pairs['treatment'].values, train_pairs['click'].values, 0.5 * np.ones_like(train_pairs['click'].values))
            # predict tau on the test data for estimation evaluation
            propensity_test_tau_pred = learner_x.predict(propensity_test_features[examination_fc].values, p=0.5 * np.ones(propensity_test_features[examination_fc].values.shape[0]))
            propensity_test_result = propensity_test_features.copy()
            propensity_test_result['tau_pred'] = propensity_test_tau_pred.flatten()
            propensity_test_result['treatment_rank'] = i - 1
            propensity_test_results.append(propensity_test_result)
            
            # predict tau on the ltr training data
            ltr_test_tau_pred = learner_x.predict(ltr_test_features[examination_fc].values, p=0.5 * np.ones(ltr_test_features[examination_fc].values.shape[0]))
            ltr_test_result = ltr_test_features.copy()
            ltr_test_result['tau_pred'] = ltr_test_tau_pred.flatten()
            ltr_test_result['treatment_rank'] = i - 1
            ltr_test_results.append(ltr_test_result)
        propensity_test_results = pd.concat(propensity_test_results, ignore_index=True)
        saveto = f'../build/simulation/{fold}/sim_exp_swap_xlearner_propensity_test_results_{avg_click}_{nq}.csv'
        print('Saving estimation results to', saveto)
        propensity_test_results.to_csv(saveto, index=False)
        
        ltr_test_results = pd.concat(ltr_test_results, ignore_index=True)
        saveto = f'../build/simulation/{fold}/sim_exp_swap_xlearner_ltr_test_results_{avg_click}_{nq}.csv'
        print('Saving estimation results to', saveto)
        ltr_test_results.to_csv(saveto, index=False)

Saving estimation results to ../build/simulation/Fold3/sim_exp_swap_xlearner_propensity_test_results_5_159.csv
Saving estimation results to ../build/simulation/Fold3/sim_exp_swap_xlearner_ltr_test_results_5_159.csv
Saving estimation results to ../build/simulation/Fold3/sim_exp_swap_xlearner_propensity_test_results_5_1599.csv
Saving estimation results to ../build/simulation/Fold3/sim_exp_swap_xlearner_ltr_test_results_5_1599.csv
Saving estimation results to ../build/simulation/Fold3/sim_exp_swap_xlearner_propensity_test_results_5_7996.csv
Saving estimation results to ../build/simulation/Fold3/sim_exp_swap_xlearner_ltr_test_results_5_7996.csv
Saving estimation results to ../build/simulation/Fold3/sim_exp_swap_xlearner_propensity_test_results_5_15992.csv
Saving estimation results to ../build/simulation/Fold3/sim_exp_swap_xlearner_ltr_test_results_5_15992.csv
Saving estimation results to ../build/simulation/Fold3/sim_exp_swap_xlearner_propensity_test_results_10_159.csv
Saving estimation re