In [2]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
import xgboost
import os
import math
import itertools
import shutil
from sklearn.datasets import load_svmlight_files

# Setup lambda boosting

In [3]:
(x_train, y_train, qid_train, x_test, y_test, qid_test,
 x_valid, y_valid, qid_valid) = load_svmlight_files(
    ("evaldata.txt", "testdata.txt", "evaldata.txt"), 
    query_id=True, zero_based=True)

# instantiate the matrices
dtrain = xgboost.DMatrix(x_train, y_train)
dvalid = xgboost.DMatrix(x_valid, y_valid)
dtest = xgboost.DMatrix(x_test, y_test)

# set the group counts from the query IDs
dtrain.set_group([len(list(items)) for _key, items in itertools.groupby(qid_train)])
dtest.set_group([len(list(items)) for _key, items in itertools.groupby(qid_test)])
dvalid.set_group([len(list(items)) for _key, items in itertools.groupby(qid_valid)])

# save the query IDs for testing
qid_train = qid_train
qid_test = qid_test
qid_valid = qid_valid

params = {'booster': 'gbtree', 'tree_method': 'hist', 'gpu_id': -1, 'predictor': 'cpu_predictor'}

# Train the dataset

In [4]:
# specify validations set to watch performance
watchlist = [(dtest, 'eval'), (dtrain, 'train')]

num_trees=2500
check_metric_improvement_rounds=10

evals_result = {}
params['objective'] = 'rank:pairwise'
params['eval_metric'] = ['ndcg', 'map']
bstc = xgboost.train(params, dtrain, num_boost_round=num_trees,
                     early_stopping_rounds=check_metric_improvement_rounds,
                     evals=watchlist, evals_result=evals_result)

[0]	eval-ndcg:0.93880	eval-map:0.91925	train-ndcg:0.94188	train-map:0.92315
Multiple eval metrics have been passed: 'train-map' will be used for early stopping.

Will train until train-map hasn't improved in 10 rounds.
[1]	eval-ndcg:0.93851	eval-map:0.91883	train-ndcg:0.94220	train-map:0.92359
[2]	eval-ndcg:0.93712	eval-map:0.91701	train-ndcg:0.94119	train-map:0.92224
[3]	eval-ndcg:0.93783	eval-map:0.91797	train-ndcg:0.94185	train-map:0.92312
[4]	eval-ndcg:0.93773	eval-map:0.91785	train-ndcg:0.94217	train-map:0.92360
[5]	eval-ndcg:0.93704	eval-map:0.91693	train-ndcg:0.94211	train-map:0.92352
[6]	eval-ndcg:0.93747	eval-map:0.91750	train-ndcg:0.94215	train-map:0.92358
[7]	eval-ndcg:0.93790	eval-map:0.91802	train-ndcg:0.94287	train-map:0.92451
[8]	eval-ndcg:0.93744	eval-map:0.91742	train-ndcg:0.94266	train-map:0.92423
[9]	eval-ndcg:0.93664	eval-map:0.91639	train-ndcg:0.94217	train-map:0.92360
[10]	eval-ndcg:0.93674	eval-map:0.91653	train-ndcg:0.94244	train-map:0.92394
[11]	eval-ndcg:0.936

In [5]:
map_metric = evals_result['train']['map'][-1]
print(map_metric)
ndcg_metric = evals_result['train']['ndcg'][-1]
print(ndcg_metric)

0.923203
0.941857
