In [1]:
import pyltr

In [4]:
from os.path import expanduser

In [5]:
# loading data only from fold 1
path = expanduser('~/Downloads/MSLR-WEB10K/Fold1/')

In [6]:
with open(path + 'train.txt') as trainfile, \
        open(path + 'vali.txt') as valifile, \
        open(path + 'test.txt') as evalfile:
    TX, Ty, Tqids, _ = pyltr.data.letor.read_dataset(trainfile)
    VX, Vy, Vqids, _ = pyltr.data.letor.read_dataset(valifile)
    EX, Ey, Eqids, _ = pyltr.data.letor.read_dataset(evalfile)

In [7]:
help(pyltr.models.LambdaMART)

Help on class LambdaMART in module pyltr.models.lambdamart:

class LambdaMART(pyltr.models._models.AdditiveModel)
 |  LambdaMART(metric=None, learning_rate=0.1, n_estimators=100, query_subsample=1.0, subsample=1.0, min_samples_split=2, min_samples_leaf=1, max_depth=3, random_state=None, max_features=None, verbose=0, max_leaf_nodes=None, warm_start=True)
 |  
 |  Tree-based learning to rank model.
 |  
 |  Parameters
 |  ----------
 |  
 |  metric : object
 |      The metric to be maximized by the model.
 |  learning_rate : float, optional (default=0.1)
 |      Shrinks the contribution of each tree by `learning_rate`.
 |      There is a trade-off between learning_rate and n_estimators.
 |  n_estimators : int, optional (default=100)
 |      The number of boosting stages to perform. Gradient boosting
 |      is fairly robust to over-fitting so a large number usually
 |      results in better performance.
 |  max_depth : int, optional (default=3)
 |      Maximum depth of the individual reg

In [11]:
metric = pyltr.metrics.NDCG(k=10)

# Needed when we want to perform validation (early stopping & trimming)
monitor = pyltr.models.monitors.ValidationMonitor(
    VX, Vy, Vqids, metric=metric, stop_after=15)

model = pyltr.models.LambdaMART(
    metric=metric,
    n_estimators=100,
    learning_rate=0.02,
    max_features="auto",
    query_subsample=0.5,
    min_samples_leaf=64,
    verbose=1,
    random_state=42
)

model.fit(TX, Ty, Tqids, monitor=monitor)

 Iter  Train score  OOB Improve    Remaining                           Monitor Output 
    1       0.1786       0.1745       57.35m      C:      0.1803 B:      0.1803 S:  0
    2       0.2183       0.0389       64.42m      C:      0.2216 B:      0.2216 S:  0
    3       0.2313       0.0084       62.77m      C:      0.2297 B:      0.2297 S:  0
    4       0.2354       0.0017       61.15m      C:      0.2315 B:      0.2315 S:  0
    5       0.2266       0.0007       60.06m      C:      0.2318 B:      0.2318 S:  0
    6       0.2299       0.0007       59.21m      C:      0.2326 B:      0.2326 S:  0
    7       0.2281       0.0019       57.79m      C:      0.2340 B:      0.2340 S:  0
    8       0.2349       0.0002       56.83m      C:      0.2344 B:      0.2344 S:  0
    9       0.2335       0.0002       55.85m      C:      0.2345 B:      0.2345 S:  0
   10       0.2346      -0.0002       55.07m      C:      0.2344 B:      0.2345 S:  1
   15       0.2597       0.0002       51.85m      C: 

<pyltr.models.lambdamart.LambdaMART at 0x1a21370d30>

In [17]:
# Evaluation
Epred = model.predict(EX)
print ('Random ranking:', metric.calc_mean_random(Eqids, Ey))
print ('NDCG@10 of our model:', metric.calc_mean(Eqids, Ey, Epred))

Random ranking: 0.18912831330223595
NDCG@10 of our model: 0.3721021688093439
