In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
from catboost import CatBoostRegressor, CatBoostClassifier, Pool, CatBoost
import numpy as np

In [2]:
def read_batches(filename):
    with open(filename, 'r') as handler:
        next(handler)
        x_by_q = defaultdict(lambda: [])
        y_by_q = defaultdict(lambda: [])
        for line in handler:
            splited = list(map(float, line.strip().split()))
            q = splited[2]
            y_by_q[q].append(splited[1])
            x_by_q[q].append(splited[3:])
        X, Y = [], []
        for q in x_by_q:
            X.append(np.array(x_by_q[q]))
            Y.append(np.array(y_by_q[q]))
        return X, Y

def train_test_split(X, Y, test_size=0.2):
    assert len(X) == len(Y)
    permutation = np.random.permutation(len(X))
    split = int(len(permutation) * (1 - test_size))
    train_indices = permutation[:split]
    test_indices = permutation[split:]
    return (
        list(np.array(X)[train_indices]),
        list(np.array(Y)[train_indices]),
        list(np.array(X)[test_indices]),
        list(np.array(Y)[test_indices]),
    )

def generate_data(X, Y):
    permutation = np.random.permutation(len(X))
    index = 0
    while True:
        yield X[permutation[index]], Y[permutation[index]]
        index += 1
        if index >= len(X):
            index = 0

In [3]:
X, Y = read_batches('train.tsv')
train_X, train_Y, test_X, test_Y = train_test_split(X, Y)
features_num = 46

In [4]:
def grouped(X, Y):
    grouped_X, grouped_Y, grouped_doc_ids = [], [], []
    current_id = 0
    for x, y in zip(X, Y):
        assert len(x) == len(y)
        grouped_X += list(x)
        grouped_Y += list(y)
        grouped_doc_ids += [current_id] * len(x)
        current_id += 1
    return grouped_X, grouped_Y, grouped_doc_ids

In [6]:
grouped_X, grouped_Y, grouped_doc_ids = grouped(train_X, train_Y)
train_pool = Pool(grouped_X, grouped_Y, group_id=grouped_doc_ids)

In [7]:
param = {'loss_function':'YetiRank'}
catboost_ranking_model = CatBoost(param)

In [8]:
catboost_ranking_model.fit(train_pool)

0:	total: 185ms	remaining: 3m 4s
1:	total: 278ms	remaining: 2m 18s
2:	total: 373ms	remaining: 2m 3s
3:	total: 472ms	remaining: 1m 57s
4:	total: 575ms	remaining: 1m 54s
5:	total: 676ms	remaining: 1m 51s
6:	total: 774ms	remaining: 1m 49s
7:	total: 867ms	remaining: 1m 47s
8:	total: 973ms	remaining: 1m 47s
9:	total: 1.07s	remaining: 1m 45s
10:	total: 1.17s	remaining: 1m 44s
11:	total: 1.27s	remaining: 1m 44s
12:	total: 1.37s	remaining: 1m 44s
13:	total: 1.47s	remaining: 1m 43s
14:	total: 1.57s	remaining: 1m 42s
15:	total: 1.67s	remaining: 1m 42s
16:	total: 1.77s	remaining: 1m 42s
17:	total: 1.87s	remaining: 1m 42s
18:	total: 1.97s	remaining: 1m 41s
19:	total: 2.07s	remaining: 1m 41s
20:	total: 2.16s	remaining: 1m 40s
21:	total: 2.27s	remaining: 1m 40s
22:	total: 2.37s	remaining: 1m 40s
23:	total: 2.47s	remaining: 1m 40s
24:	total: 2.57s	remaining: 1m 40s
25:	total: 2.66s	remaining: 1m 39s
26:	total: 2.76s	remaining: 1m 39s
27:	total: 2.87s	remaining: 1m 39s
28:	total: 2.97s	remaining: 1m 3

232:	total: 24.3s	remaining: 1m 19s
233:	total: 24.4s	remaining: 1m 19s
234:	total: 24.5s	remaining: 1m 19s
235:	total: 24.6s	remaining: 1m 19s
236:	total: 24.7s	remaining: 1m 19s
237:	total: 24.8s	remaining: 1m 19s
238:	total: 24.9s	remaining: 1m 19s
239:	total: 25s	remaining: 1m 19s
240:	total: 25.1s	remaining: 1m 19s
241:	total: 25.2s	remaining: 1m 18s
242:	total: 25.3s	remaining: 1m 18s
243:	total: 25.4s	remaining: 1m 18s
244:	total: 25.5s	remaining: 1m 18s
245:	total: 25.6s	remaining: 1m 18s
246:	total: 25.8s	remaining: 1m 18s
247:	total: 25.9s	remaining: 1m 18s
248:	total: 26s	remaining: 1m 18s
249:	total: 26.1s	remaining: 1m 18s
250:	total: 26.2s	remaining: 1m 18s
251:	total: 26.3s	remaining: 1m 18s
252:	total: 26.4s	remaining: 1m 17s
253:	total: 26.5s	remaining: 1m 17s
254:	total: 26.6s	remaining: 1m 17s
255:	total: 26.7s	remaining: 1m 17s
256:	total: 26.8s	remaining: 1m 17s
257:	total: 26.9s	remaining: 1m 17s
258:	total: 27s	remaining: 1m 17s
259:	total: 27.1s	remaining: 1m 17

467:	total: 50.4s	remaining: 57.3s
468:	total: 50.5s	remaining: 57.2s
469:	total: 50.6s	remaining: 57.1s
470:	total: 50.7s	remaining: 57s
471:	total: 50.9s	remaining: 56.9s
472:	total: 51s	remaining: 56.8s
473:	total: 51.1s	remaining: 56.7s
474:	total: 51.2s	remaining: 56.6s
475:	total: 51.3s	remaining: 56.5s
476:	total: 51.4s	remaining: 56.4s
477:	total: 51.5s	remaining: 56.3s
478:	total: 51.6s	remaining: 56.2s
479:	total: 51.8s	remaining: 56.1s
480:	total: 51.9s	remaining: 56s
481:	total: 52s	remaining: 55.9s
482:	total: 52.1s	remaining: 55.8s
483:	total: 52.2s	remaining: 55.7s
484:	total: 52.4s	remaining: 55.6s
485:	total: 52.5s	remaining: 55.5s
486:	total: 52.6s	remaining: 55.4s
487:	total: 52.7s	remaining: 55.3s
488:	total: 52.8s	remaining: 55.2s
489:	total: 52.9s	remaining: 55.1s
490:	total: 53s	remaining: 55s
491:	total: 53.2s	remaining: 54.9s
492:	total: 53.3s	remaining: 54.8s
493:	total: 53.4s	remaining: 54.7s
494:	total: 53.5s	remaining: 54.6s
495:	total: 53.6s	remaining: 54.

703:	total: 1m 18s	remaining: 33.1s
704:	total: 1m 18s	remaining: 33s
705:	total: 1m 18s	remaining: 32.9s
706:	total: 1m 19s	remaining: 32.7s
707:	total: 1m 19s	remaining: 32.6s
708:	total: 1m 19s	remaining: 32.5s
709:	total: 1m 19s	remaining: 32.4s
710:	total: 1m 19s	remaining: 32.3s
711:	total: 1m 19s	remaining: 32.2s
712:	total: 1m 19s	remaining: 32.1s
713:	total: 1m 19s	remaining: 32s
714:	total: 1m 19s	remaining: 31.9s
715:	total: 1m 20s	remaining: 31.8s
716:	total: 1m 20s	remaining: 31.7s
717:	total: 1m 20s	remaining: 31.6s
718:	total: 1m 20s	remaining: 31.5s
719:	total: 1m 20s	remaining: 31.4s
720:	total: 1m 20s	remaining: 31.3s
721:	total: 1m 20s	remaining: 31.2s
722:	total: 1m 21s	remaining: 31.1s
723:	total: 1m 21s	remaining: 30.9s
724:	total: 1m 21s	remaining: 30.8s
725:	total: 1m 21s	remaining: 30.7s
726:	total: 1m 21s	remaining: 30.6s
727:	total: 1m 21s	remaining: 30.5s
728:	total: 1m 21s	remaining: 30.4s
729:	total: 1m 21s	remaining: 30.3s
730:	total: 1m 22s	remaining: 30

932:	total: 1m 45s	remaining: 7.56s
933:	total: 1m 45s	remaining: 7.45s
934:	total: 1m 45s	remaining: 7.33s
935:	total: 1m 45s	remaining: 7.22s
936:	total: 1m 45s	remaining: 7.11s
937:	total: 1m 45s	remaining: 7s
938:	total: 1m 45s	remaining: 6.88s
939:	total: 1m 46s	remaining: 6.77s
940:	total: 1m 46s	remaining: 6.66s
941:	total: 1m 46s	remaining: 6.54s
942:	total: 1m 46s	remaining: 6.43s
943:	total: 1m 46s	remaining: 6.32s
944:	total: 1m 46s	remaining: 6.21s
945:	total: 1m 46s	remaining: 6.09s
946:	total: 1m 46s	remaining: 5.98s
947:	total: 1m 46s	remaining: 5.87s
948:	total: 1m 47s	remaining: 5.75s
949:	total: 1m 47s	remaining: 5.64s
950:	total: 1m 47s	remaining: 5.53s
951:	total: 1m 47s	remaining: 5.41s
952:	total: 1m 47s	remaining: 5.3s
953:	total: 1m 47s	remaining: 5.19s
954:	total: 1m 47s	remaining: 5.08s
955:	total: 1m 47s	remaining: 4.96s
956:	total: 1m 47s	remaining: 4.85s
957:	total: 1m 48s	remaining: 4.74s
958:	total: 1m 48s	remaining: 4.62s
959:	total: 1m 48s	remaining: 4.

<catboost.core.CatBoost at 0x11742a0f0>

In [9]:
""" Reference from https://gist.github.com/bwhite/3726239
"""

import numpy as np

def dcg_at_k(r, k, method=0):
    """Score is discounted cumulative gain (dcg)
    Relevance is positive real values.  Can use binary
    as the previous methods.
    Example from
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
    >>> dcg_at_k(r, 1)
    3.0
    >>> dcg_at_k(r, 1, method=1)
    3.0
    >>> dcg_at_k(r, 2)
    5.0
    >>> dcg_at_k(r, 2, method=1)
    4.2618595071429155
    >>> dcg_at_k(r, 10)
    9.6051177391888114
    >>> dcg_at_k(r, 11)
    9.6051177391888114
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
    Returns:
        Discounted cumulative gain
    """
    r = np.asfarray(r)[:k]
    if r.size:
        if method == 0:
            return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1)))
        elif method == 1:
            return np.sum(r / np.log2(np.arange(2, r.size + 2)))
        else:
            raise ValueError('method must be 0 or 1.')
    return 0.


def ndcg_at_k(r, k, method=0):
    """Score is normalized discounted cumulative gain (ndcg)
    Relevance is positive real values.  Can use binary
    as the previous methods.
    Example from
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
    >>> ndcg_at_k(r, 1)
    1.0
    >>> r = [2, 1, 2, 0]
    >>> ndcg_at_k(r, 4)
    0.9203032077642922
    >>> ndcg_at_k(r, 4, method=1)
    0.96519546960144276
    >>> ndcg_at_k([0], 1)
    0.0
    >>> ndcg_at_k([1], 2)
    1.0
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
    Returns:
        Normalized discounted cumulative gain
    """
    dcg_max = dcg_at_k(sorted(r, reverse=True), k, method)
    if not dcg_max:
        return 0.
    return dcg_at_k(r, k, method) / dcg_max

def get_ranked_relevances(model, X, Y):
    all_ranked_relevances = []
    for x, y in zip(X, Y):
        prediction = model.predict(x)
        assert len(prediction) == len(y)
        pairs = list(zip(prediction, y))
        ranked_relevances = [
            y
            for prediction, y in sorted(pairs, key=lambda pair: -pair[0])
        ]
        all_ranked_relevances.append(ranked_relevances)
    return all_ranked_relevances

In [14]:
ranked_relevances = get_ranked_relevances(catboost_ranking_model, test_X, test_Y)
ndcg = np.array([ndcg_at_k(r, k=5) for r in ranked_relevances]).mean()
print(ndcg)
ranked_relevances = get_ranked_relevances(catboost_ranking_model, test_X, test_Y)
ndcg = np.array([ndcg_at_k(r, k=10) for r in ranked_relevances]).mean()
print(ndcg)

0.471759808563698
0.4972841895493263


In [31]:
param = {'loss_function':'YetiRankPairwise'}
other_catboost_ranking_model = CatBoost(param)

In [32]:
other_catboost_ranking_model.fit(train_pool)

0:	total: 64.1ms	remaining: 1m 3s
1:	total: 125ms	remaining: 1m 2s
2:	total: 187ms	remaining: 1m 2s
3:	total: 250ms	remaining: 1m 2s
4:	total: 315ms	remaining: 1m 2s
5:	total: 379ms	remaining: 1m 2s
6:	total: 441ms	remaining: 1m 2s
7:	total: 502ms	remaining: 1m 2s
8:	total: 565ms	remaining: 1m 2s
9:	total: 628ms	remaining: 1m 2s
10:	total: 691ms	remaining: 1m 2s
11:	total: 752ms	remaining: 1m 1s
12:	total: 815ms	remaining: 1m 1s
13:	total: 879ms	remaining: 1m 1s
14:	total: 940ms	remaining: 1m 1s
15:	total: 1.01s	remaining: 1m 1s
16:	total: 1.07s	remaining: 1m 2s
17:	total: 1.14s	remaining: 1m 2s
18:	total: 1.2s	remaining: 1m 2s
19:	total: 1.27s	remaining: 1m 2s
20:	total: 1.33s	remaining: 1m 2s
21:	total: 1.4s	remaining: 1m 2s
22:	total: 1.46s	remaining: 1m 2s
23:	total: 1.53s	remaining: 1m 2s
24:	total: 1.6s	remaining: 1m 2s
25:	total: 1.67s	remaining: 1m 2s
26:	total: 1.74s	remaining: 1m 2s
27:	total: 1.81s	remaining: 1m 2s
28:	total: 1.88s	remaining: 1m 2s
29:	total: 1.94s	remaining

242:	total: 16.8s	remaining: 52.3s
243:	total: 16.9s	remaining: 52.3s
244:	total: 16.9s	remaining: 52.2s
245:	total: 17s	remaining: 52.2s
246:	total: 17.1s	remaining: 52.1s
247:	total: 17.2s	remaining: 52s
248:	total: 17.2s	remaining: 52s
249:	total: 17.3s	remaining: 52s
250:	total: 17.4s	remaining: 51.9s
251:	total: 17.5s	remaining: 51.9s
252:	total: 17.6s	remaining: 51.8s
253:	total: 17.6s	remaining: 51.8s
254:	total: 17.7s	remaining: 51.7s
255:	total: 17.8s	remaining: 51.7s
256:	total: 17.9s	remaining: 51.6s
257:	total: 17.9s	remaining: 51.6s
258:	total: 18s	remaining: 51.5s
259:	total: 18.1s	remaining: 51.5s
260:	total: 18.2s	remaining: 51.4s
261:	total: 18.2s	remaining: 51.4s
262:	total: 18.3s	remaining: 51.3s
263:	total: 18.4s	remaining: 51.3s
264:	total: 18.5s	remaining: 51.2s
265:	total: 18.6s	remaining: 51.2s
266:	total: 18.6s	remaining: 51.2s
267:	total: 18.7s	remaining: 51.1s
268:	total: 18.8s	remaining: 51.1s
269:	total: 18.9s	remaining: 51s
270:	total: 19s	remaining: 51s
2

479:	total: 34.9s	remaining: 37.8s
480:	total: 35s	remaining: 37.8s
481:	total: 35.1s	remaining: 37.7s
482:	total: 35.2s	remaining: 37.7s
483:	total: 35.3s	remaining: 37.6s
484:	total: 35.3s	remaining: 37.5s
485:	total: 35.4s	remaining: 37.4s
486:	total: 35.5s	remaining: 37.4s
487:	total: 35.6s	remaining: 37.3s
488:	total: 35.6s	remaining: 37.2s
489:	total: 35.7s	remaining: 37.2s
490:	total: 35.8s	remaining: 37.1s
491:	total: 35.9s	remaining: 37s
492:	total: 36s	remaining: 37s
493:	total: 36s	remaining: 36.9s
494:	total: 36.1s	remaining: 36.8s
495:	total: 36.2s	remaining: 36.8s
496:	total: 36.3s	remaining: 36.7s
497:	total: 36.3s	remaining: 36.6s
498:	total: 36.4s	remaining: 36.6s
499:	total: 36.5s	remaining: 36.5s
500:	total: 36.6s	remaining: 36.4s
501:	total: 36.6s	remaining: 36.4s
502:	total: 36.7s	remaining: 36.3s
503:	total: 36.8s	remaining: 36.2s
504:	total: 36.9s	remaining: 36.2s
505:	total: 37s	remaining: 36.1s
506:	total: 37s	remaining: 36s
507:	total: 37.1s	remaining: 35.9s
5

716:	total: 53.8s	remaining: 21.2s
717:	total: 53.9s	remaining: 21.2s
718:	total: 53.9s	remaining: 21.1s
719:	total: 54s	remaining: 21s
720:	total: 54.1s	remaining: 20.9s
721:	total: 54.2s	remaining: 20.9s
722:	total: 54.3s	remaining: 20.8s
723:	total: 54.4s	remaining: 20.7s
724:	total: 54.4s	remaining: 20.6s
725:	total: 54.5s	remaining: 20.6s
726:	total: 54.6s	remaining: 20.5s
727:	total: 54.7s	remaining: 20.4s
728:	total: 54.8s	remaining: 20.4s
729:	total: 54.9s	remaining: 20.3s
730:	total: 54.9s	remaining: 20.2s
731:	total: 55s	remaining: 20.1s
732:	total: 55.1s	remaining: 20.1s
733:	total: 55.2s	remaining: 20s
734:	total: 55.3s	remaining: 19.9s
735:	total: 55.3s	remaining: 19.8s
736:	total: 55.4s	remaining: 19.8s
737:	total: 55.5s	remaining: 19.7s
738:	total: 55.6s	remaining: 19.6s
739:	total: 55.7s	remaining: 19.6s
740:	total: 55.7s	remaining: 19.5s
741:	total: 55.8s	remaining: 19.4s
742:	total: 55.9s	remaining: 19.3s
743:	total: 56s	remaining: 19.3s
744:	total: 56.1s	remaining: 1

953:	total: 1m 12s	remaining: 3.52s
954:	total: 1m 13s	remaining: 3.44s
955:	total: 1m 13s	remaining: 3.37s
956:	total: 1m 13s	remaining: 3.29s
957:	total: 1m 13s	remaining: 3.21s
958:	total: 1m 13s	remaining: 3.14s
959:	total: 1m 13s	remaining: 3.06s
960:	total: 1m 13s	remaining: 2.98s
961:	total: 1m 13s	remaining: 2.91s
962:	total: 1m 13s	remaining: 2.83s
963:	total: 1m 13s	remaining: 2.75s
964:	total: 1m 13s	remaining: 2.68s
965:	total: 1m 13s	remaining: 2.6s
966:	total: 1m 14s	remaining: 2.53s
967:	total: 1m 14s	remaining: 2.45s
968:	total: 1m 14s	remaining: 2.37s
969:	total: 1m 14s	remaining: 2.3s
970:	total: 1m 14s	remaining: 2.22s
971:	total: 1m 14s	remaining: 2.14s
972:	total: 1m 14s	remaining: 2.07s
973:	total: 1m 14s	remaining: 1.99s
974:	total: 1m 14s	remaining: 1.91s
975:	total: 1m 14s	remaining: 1.84s
976:	total: 1m 14s	remaining: 1.76s
977:	total: 1m 14s	remaining: 1.68s
978:	total: 1m 14s	remaining: 1.61s
979:	total: 1m 15s	remaining: 1.53s
980:	total: 1m 15s	remaining: 

<catboost.core.CatBoost at 0x117448b70>

In [33]:
ranked_relevances = get_ranked_relevances(other_catboost_ranking_model, test_X, test_Y)
ndcg = np.array([ndcg_at_k(r, k=5) for r in ranked_relevances]).mean()
print(ndcg)
ranked_relevances = get_ranked_relevances(other_catboost_ranking_model, test_X, test_Y)
ndcg = np.array([ndcg_at_k(r, k=10) for r in ranked_relevances]).mean()
print(ndcg)

0.4738711504978495
0.500528763956977


In [18]:
param = {'loss_function':'QuerySoftMax'}
catboost_ranking_model_3 = CatBoost(param)
catboost_ranking_model_3.fit(train_pool)

0:	learn: 3.6348252	total: 189ms	remaining: 3m 8s
1:	learn: 3.6284355	total: 349ms	remaining: 2m 53s
2:	learn: 3.6230393	total: 531ms	remaining: 2m 56s
3:	learn: 3.6176183	total: 690ms	remaining: 2m 51s
4:	learn: 3.6117692	total: 863ms	remaining: 2m 51s
5:	learn: 3.6066898	total: 1.02s	remaining: 2m 49s
6:	learn: 3.6018049	total: 1.2s	remaining: 2m 50s
7:	learn: 3.5970885	total: 1.38s	remaining: 2m 51s
8:	learn: 3.5924267	total: 1.56s	remaining: 2m 52s
9:	learn: 3.5881311	total: 1.73s	remaining: 2m 51s
10:	learn: 3.5845982	total: 1.91s	remaining: 2m 51s
11:	learn: 3.5805060	total: 2.07s	remaining: 2m 50s
12:	learn: 3.5773972	total: 2.25s	remaining: 2m 50s
13:	learn: 3.5744615	total: 2.4s	remaining: 2m 49s
14:	learn: 3.5709513	total: 2.58s	remaining: 2m 49s
15:	learn: 3.5684856	total: 2.75s	remaining: 2m 49s
16:	learn: 3.5657416	total: 2.93s	remaining: 2m 49s
17:	learn: 3.5628455	total: 3.11s	remaining: 2m 49s
18:	learn: 3.5602459	total: 3.29s	remaining: 2m 49s
19:	learn: 3.5576814	tota

158:	learn: 3.4784217	total: 31.2s	remaining: 2m 44s
159:	learn: 3.4782137	total: 31.4s	remaining: 2m 44s
160:	learn: 3.4779452	total: 31.6s	remaining: 2m 44s
161:	learn: 3.4778272	total: 31.8s	remaining: 2m 44s
162:	learn: 3.4776466	total: 32s	remaining: 2m 44s
163:	learn: 3.4775061	total: 32.2s	remaining: 2m 44s
164:	learn: 3.4773211	total: 32.4s	remaining: 2m 43s
165:	learn: 3.4770550	total: 32.6s	remaining: 2m 43s
166:	learn: 3.4768425	total: 32.8s	remaining: 2m 43s
167:	learn: 3.4766518	total: 33s	remaining: 2m 43s
168:	learn: 3.4764558	total: 33.2s	remaining: 2m 43s
169:	learn: 3.4760082	total: 33.4s	remaining: 2m 43s
170:	learn: 3.4757648	total: 33.7s	remaining: 2m 43s
171:	learn: 3.4754921	total: 33.9s	remaining: 2m 43s
172:	learn: 3.4752950	total: 34.1s	remaining: 2m 43s
173:	learn: 3.4751522	total: 34.3s	remaining: 2m 42s
174:	learn: 3.4750001	total: 34.6s	remaining: 2m 43s
175:	learn: 3.4748113	total: 34.9s	remaining: 2m 43s
176:	learn: 3.4744809	total: 35.1s	remaining: 2m 4

314:	learn: 3.4563403	total: 1m 5s	remaining: 2m 21s
315:	learn: 3.4562234	total: 1m 5s	remaining: 2m 21s
316:	learn: 3.4560457	total: 1m 5s	remaining: 2m 21s
317:	learn: 3.4559544	total: 1m 5s	remaining: 2m 21s
318:	learn: 3.4557174	total: 1m 5s	remaining: 2m 20s
319:	learn: 3.4555980	total: 1m 6s	remaining: 2m 20s
320:	learn: 3.4555953	total: 1m 6s	remaining: 2m 20s
321:	learn: 3.4554527	total: 1m 6s	remaining: 2m 20s
322:	learn: 3.4552811	total: 1m 6s	remaining: 2m 20s
323:	learn: 3.4552329	total: 1m 7s	remaining: 2m 19s
324:	learn: 3.4550514	total: 1m 7s	remaining: 2m 19s
325:	learn: 3.4549712	total: 1m 7s	remaining: 2m 19s
326:	learn: 3.4548738	total: 1m 7s	remaining: 2m 19s
327:	learn: 3.4548735	total: 1m 7s	remaining: 2m 19s
328:	learn: 3.4547928	total: 1m 8s	remaining: 2m 19s
329:	learn: 3.4547045	total: 1m 8s	remaining: 2m 18s
330:	learn: 3.4545462	total: 1m 8s	remaining: 2m 18s
331:	learn: 3.4544588	total: 1m 8s	remaining: 2m 18s
332:	learn: 3.4544239	total: 1m 9s	remaining: 

469:	learn: 3.4438987	total: 1m 38s	remaining: 1m 50s
470:	learn: 3.4438935	total: 1m 38s	remaining: 1m 50s
471:	learn: 3.4438583	total: 1m 38s	remaining: 1m 50s
472:	learn: 3.4437264	total: 1m 38s	remaining: 1m 49s
473:	learn: 3.4436647	total: 1m 38s	remaining: 1m 49s
474:	learn: 3.4436646	total: 1m 39s	remaining: 1m 49s
475:	learn: 3.4435615	total: 1m 39s	remaining: 1m 49s
476:	learn: 3.4435387	total: 1m 39s	remaining: 1m 49s
477:	learn: 3.4435208	total: 1m 39s	remaining: 1m 48s
478:	learn: 3.4434077	total: 1m 39s	remaining: 1m 48s
479:	learn: 3.4433030	total: 1m 39s	remaining: 1m 48s
480:	learn: 3.4432441	total: 1m 40s	remaining: 1m 48s
481:	learn: 3.4431413	total: 1m 40s	remaining: 1m 47s
482:	learn: 3.4431271	total: 1m 40s	remaining: 1m 47s
483:	learn: 3.4430087	total: 1m 40s	remaining: 1m 47s
484:	learn: 3.4428658	total: 1m 40s	remaining: 1m 47s
485:	learn: 3.4428602	total: 1m 41s	remaining: 1m 46s
486:	learn: 3.4428601	total: 1m 41s	remaining: 1m 46s
487:	learn: 3.4428601	total:

622:	learn: 3.4356320	total: 2m 8s	remaining: 1m 17s
623:	learn: 3.4356311	total: 2m 8s	remaining: 1m 17s
624:	learn: 3.4356174	total: 2m 8s	remaining: 1m 17s
625:	learn: 3.4356068	total: 2m 9s	remaining: 1m 17s
626:	learn: 3.4355504	total: 2m 9s	remaining: 1m 16s
627:	learn: 3.4355294	total: 2m 9s	remaining: 1m 16s
628:	learn: 3.4355281	total: 2m 9s	remaining: 1m 16s
629:	learn: 3.4355270	total: 2m 9s	remaining: 1m 16s
630:	learn: 3.4353981	total: 2m 10s	remaining: 1m 16s
631:	learn: 3.4353743	total: 2m 10s	remaining: 1m 15s
632:	learn: 3.4353559	total: 2m 10s	remaining: 1m 15s
633:	learn: 3.4353455	total: 2m 10s	remaining: 1m 15s
634:	learn: 3.4352974	total: 2m 10s	remaining: 1m 15s
635:	learn: 3.4352700	total: 2m 11s	remaining: 1m 15s
636:	learn: 3.4352245	total: 2m 11s	remaining: 1m 14s
637:	learn: 3.4352225	total: 2m 11s	remaining: 1m 14s
638:	learn: 3.4351535	total: 2m 11s	remaining: 1m 14s
639:	learn: 3.4350239	total: 2m 11s	remaining: 1m 14s
640:	learn: 3.4350237	total: 2m 11s	

777:	learn: 3.4287305	total: 2m 37s	remaining: 44.9s
778:	learn: 3.4287304	total: 2m 37s	remaining: 44.7s
779:	learn: 3.4287010	total: 2m 37s	remaining: 44.5s
780:	learn: 3.4286309	total: 2m 37s	remaining: 44.2s
781:	learn: 3.4286179	total: 2m 37s	remaining: 44s
782:	learn: 3.4285897	total: 2m 38s	remaining: 43.8s
783:	learn: 3.4285864	total: 2m 38s	remaining: 43.6s
784:	learn: 3.4284880	total: 2m 38s	remaining: 43.4s
785:	learn: 3.4284221	total: 2m 38s	remaining: 43.2s
786:	learn: 3.4283372	total: 2m 38s	remaining: 43s
787:	learn: 3.4283371	total: 2m 39s	remaining: 42.8s
788:	learn: 3.4283371	total: 2m 39s	remaining: 42.6s
789:	learn: 3.4283370	total: 2m 39s	remaining: 42.4s
790:	learn: 3.4283366	total: 2m 39s	remaining: 42.1s
791:	learn: 3.4282309	total: 2m 39s	remaining: 42s
792:	learn: 3.4281488	total: 2m 39s	remaining: 41.8s
793:	learn: 3.4280398	total: 2m 40s	remaining: 41.6s
794:	learn: 3.4280369	total: 2m 40s	remaining: 41.3s
795:	learn: 3.4280154	total: 2m 40s	remaining: 41.1s

933:	learn: 3.4234298	total: 3m 4s	remaining: 13.1s
934:	learn: 3.4234289	total: 3m 4s	remaining: 12.9s
935:	learn: 3.4234286	total: 3m 5s	remaining: 12.7s
936:	learn: 3.4233636	total: 3m 5s	remaining: 12.5s
937:	learn: 3.4232952	total: 3m 5s	remaining: 12.3s
938:	learn: 3.4232944	total: 3m 5s	remaining: 12.1s
939:	learn: 3.4232588	total: 3m 5s	remaining: 11.9s
940:	learn: 3.4232583	total: 3m 6s	remaining: 11.7s
941:	learn: 3.4232499	total: 3m 6s	remaining: 11.5s
942:	learn: 3.4231588	total: 3m 6s	remaining: 11.3s
943:	learn: 3.4231552	total: 3m 6s	remaining: 11.1s
944:	learn: 3.4231549	total: 3m 6s	remaining: 10.9s
945:	learn: 3.4231541	total: 3m 6s	remaining: 10.7s
946:	learn: 3.4231480	total: 3m 7s	remaining: 10.5s
947:	learn: 3.4231477	total: 3m 7s	remaining: 10.3s
948:	learn: 3.4231457	total: 3m 7s	remaining: 10.1s
949:	learn: 3.4231448	total: 3m 7s	remaining: 9.87s
950:	learn: 3.4231436	total: 3m 7s	remaining: 9.67s
951:	learn: 3.4231408	total: 3m 7s	remaining: 9.47s
952:	learn: 

<catboost.core.CatBoost at 0x1174487f0>

In [19]:
def test_model(model):
    ranked_relevances = get_ranked_relevances(model, test_X, test_Y)
    ndcg = np.array([ndcg_at_k(r, k=5) for r in ranked_relevances]).mean()
    print(ndcg)
    ranked_relevances = get_ranked_relevances(model, test_X, test_Y)
    ndcg = np.array([ndcg_at_k(r, k=10) for r in ranked_relevances]).mean()
    print(ndcg)

In [20]:
test_model(catboost_ranking_model_3)

0.47355161565665016
0.49636876407908664


In [25]:
param = {'loss_function':'QueryRMSE'}
catboost_ranking_model_4 = CatBoost(param)
catboost_ranking_model_4.fit(train_pool)

0:	learn: 0.4700280	total: 73ms	remaining: 1m 12s
1:	learn: 0.4691400	total: 121ms	remaining: 1m
2:	learn: 0.4682552	total: 166ms	remaining: 55s
3:	learn: 0.4674372	total: 216ms	remaining: 53.7s
4:	learn: 0.4667570	total: 266ms	remaining: 52.9s
5:	learn: 0.4659631	total: 317ms	remaining: 52.5s
6:	learn: 0.4652574	total: 365ms	remaining: 51.8s
7:	learn: 0.4646190	total: 418ms	remaining: 51.8s
8:	learn: 0.4639387	total: 471ms	remaining: 51.8s
9:	learn: 0.4632364	total: 526ms	remaining: 52.1s
10:	learn: 0.4626138	total: 580ms	remaining: 52.1s
11:	learn: 0.4620530	total: 635ms	remaining: 52.3s
12:	learn: 0.4614646	total: 682ms	remaining: 51.8s
13:	learn: 0.4608953	total: 731ms	remaining: 51.5s
14:	learn: 0.4603495	total: 785ms	remaining: 51.6s
15:	learn: 0.4598130	total: 832ms	remaining: 51.2s
16:	learn: 0.4593021	total: 883ms	remaining: 51.1s
17:	learn: 0.4588126	total: 931ms	remaining: 50.8s
18:	learn: 0.4583522	total: 985ms	remaining: 50.9s
19:	learn: 0.4579329	total: 1.03s	remaining: 5

164:	learn: 0.4415817	total: 8.32s	remaining: 42.1s
165:	learn: 0.4415544	total: 8.37s	remaining: 42s
166:	learn: 0.4415313	total: 8.41s	remaining: 42s
167:	learn: 0.4415012	total: 8.46s	remaining: 41.9s
168:	learn: 0.4414740	total: 8.51s	remaining: 41.9s
169:	learn: 0.4414405	total: 8.57s	remaining: 41.8s
170:	learn: 0.4414041	total: 8.62s	remaining: 41.8s
171:	learn: 0.4413825	total: 8.67s	remaining: 41.8s
172:	learn: 0.4413431	total: 8.72s	remaining: 41.7s
173:	learn: 0.4413214	total: 8.78s	remaining: 41.7s
174:	learn: 0.4412900	total: 8.83s	remaining: 41.6s
175:	learn: 0.4412589	total: 8.88s	remaining: 41.6s
176:	learn: 0.4412310	total: 8.93s	remaining: 41.5s
177:	learn: 0.4412026	total: 8.98s	remaining: 41.5s
178:	learn: 0.4411746	total: 9.04s	remaining: 41.5s
179:	learn: 0.4411386	total: 9.09s	remaining: 41.4s
180:	learn: 0.4410987	total: 9.14s	remaining: 41.4s
181:	learn: 0.4410527	total: 9.19s	remaining: 41.3s
182:	learn: 0.4410187	total: 9.24s	remaining: 41.3s
183:	learn: 0.44

323:	learn: 0.4368850	total: 16.5s	remaining: 34.5s
324:	learn: 0.4368665	total: 16.6s	remaining: 34.5s
325:	learn: 0.4368307	total: 16.6s	remaining: 34.4s
326:	learn: 0.4368248	total: 16.7s	remaining: 34.3s
327:	learn: 0.4367858	total: 16.7s	remaining: 34.3s
328:	learn: 0.4367747	total: 16.8s	remaining: 34.2s
329:	learn: 0.4367493	total: 16.8s	remaining: 34.2s
330:	learn: 0.4367292	total: 16.9s	remaining: 34.1s
331:	learn: 0.4367083	total: 16.9s	remaining: 34.1s
332:	learn: 0.4366735	total: 17s	remaining: 34s
333:	learn: 0.4366228	total: 17s	remaining: 34s
334:	learn: 0.4365963	total: 17.1s	remaining: 33.9s
335:	learn: 0.4365788	total: 17.1s	remaining: 33.8s
336:	learn: 0.4365532	total: 17.2s	remaining: 33.8s
337:	learn: 0.4365337	total: 17.2s	remaining: 33.7s
338:	learn: 0.4365146	total: 17.3s	remaining: 33.7s
339:	learn: 0.4364755	total: 17.3s	remaining: 33.6s
340:	learn: 0.4364476	total: 17.4s	remaining: 33.6s
341:	learn: 0.4364336	total: 17.4s	remaining: 33.5s
342:	learn: 0.436405

485:	learn: 0.4336918	total: 23.6s	remaining: 24.9s
486:	learn: 0.4336794	total: 23.6s	remaining: 24.9s
487:	learn: 0.4336588	total: 23.7s	remaining: 24.8s
488:	learn: 0.4336206	total: 23.7s	remaining: 24.8s
489:	learn: 0.4335978	total: 23.8s	remaining: 24.7s
490:	learn: 0.4335752	total: 23.8s	remaining: 24.7s
491:	learn: 0.4335441	total: 23.9s	remaining: 24.6s
492:	learn: 0.4335435	total: 23.9s	remaining: 24.6s
493:	learn: 0.4335103	total: 23.9s	remaining: 24.5s
494:	learn: 0.4334843	total: 24s	remaining: 24.5s
495:	learn: 0.4334481	total: 24s	remaining: 24.4s
496:	learn: 0.4334368	total: 24.1s	remaining: 24.4s
497:	learn: 0.4334269	total: 24.1s	remaining: 24.3s
498:	learn: 0.4334140	total: 24.2s	remaining: 24.3s
499:	learn: 0.4333915	total: 24.2s	remaining: 24.2s
500:	learn: 0.4333803	total: 24.3s	remaining: 24.2s
501:	learn: 0.4333637	total: 24.3s	remaining: 24.1s
502:	learn: 0.4333399	total: 24.4s	remaining: 24.1s
503:	learn: 0.4333300	total: 24.4s	remaining: 24s
504:	learn: 0.4333

644:	learn: 0.4306620	total: 30.8s	remaining: 17s
645:	learn: 0.4306410	total: 30.9s	remaining: 16.9s
646:	learn: 0.4306303	total: 30.9s	remaining: 16.9s
647:	learn: 0.4306299	total: 31s	remaining: 16.8s
648:	learn: 0.4306069	total: 31s	remaining: 16.8s
649:	learn: 0.4306067	total: 31s	remaining: 16.7s
650:	learn: 0.4305973	total: 31.1s	remaining: 16.7s
651:	learn: 0.4305590	total: 31.1s	remaining: 16.6s
652:	learn: 0.4305583	total: 31.2s	remaining: 16.6s
653:	learn: 0.4305583	total: 31.2s	remaining: 16.5s
654:	learn: 0.4305492	total: 31.2s	remaining: 16.5s
655:	learn: 0.4305347	total: 31.3s	remaining: 16.4s
656:	learn: 0.4305069	total: 31.3s	remaining: 16.4s
657:	learn: 0.4305049	total: 31.4s	remaining: 16.3s
658:	learn: 0.4304952	total: 31.4s	remaining: 16.3s
659:	learn: 0.4304639	total: 31.5s	remaining: 16.2s
660:	learn: 0.4304495	total: 31.6s	remaining: 16.2s
661:	learn: 0.4304418	total: 31.6s	remaining: 16.1s
662:	learn: 0.4304138	total: 31.6s	remaining: 16.1s
663:	learn: 0.430386

805:	learn: 0.4283224	total: 37.7s	remaining: 9.09s
806:	learn: 0.4283044	total: 37.8s	remaining: 9.04s
807:	learn: 0.4283043	total: 37.8s	remaining: 8.99s
808:	learn: 0.4282982	total: 37.9s	remaining: 8.94s
809:	learn: 0.4282778	total: 37.9s	remaining: 8.89s
810:	learn: 0.4282661	total: 38s	remaining: 8.85s
811:	learn: 0.4282653	total: 38s	remaining: 8.8s
812:	learn: 0.4282562	total: 38s	remaining: 8.75s
813:	learn: 0.4282409	total: 38.1s	remaining: 8.71s
814:	learn: 0.4282339	total: 38.1s	remaining: 8.66s
815:	learn: 0.4281992	total: 38.2s	remaining: 8.61s
816:	learn: 0.4281844	total: 38.2s	remaining: 8.57s
817:	learn: 0.4281780	total: 38.3s	remaining: 8.52s
818:	learn: 0.4281777	total: 38.3s	remaining: 8.47s
819:	learn: 0.4281639	total: 38.4s	remaining: 8.42s
820:	learn: 0.4281637	total: 38.4s	remaining: 8.37s
821:	learn: 0.4281636	total: 38.4s	remaining: 8.32s
822:	learn: 0.4281527	total: 38.5s	remaining: 8.27s
823:	learn: 0.4281340	total: 38.5s	remaining: 8.22s
824:	learn: 0.42811

965:	learn: 0.4261885	total: 44.4s	remaining: 1.56s
966:	learn: 0.4261760	total: 44.5s	remaining: 1.52s
967:	learn: 0.4261293	total: 44.5s	remaining: 1.47s
968:	learn: 0.4261193	total: 44.6s	remaining: 1.43s
969:	learn: 0.4261190	total: 44.6s	remaining: 1.38s
970:	learn: 0.4261170	total: 44.6s	remaining: 1.33s
971:	learn: 0.4261165	total: 44.6s	remaining: 1.29s
972:	learn: 0.4260874	total: 44.7s	remaining: 1.24s
973:	learn: 0.4260732	total: 44.7s	remaining: 1.19s
974:	learn: 0.4260545	total: 44.8s	remaining: 1.15s
975:	learn: 0.4260240	total: 44.8s	remaining: 1.1s
976:	learn: 0.4260240	total: 44.9s	remaining: 1.06s
977:	learn: 0.4260137	total: 44.9s	remaining: 1.01s
978:	learn: 0.4260026	total: 45s	remaining: 965ms
979:	learn: 0.4260025	total: 45s	remaining: 918ms
980:	learn: 0.4259903	total: 45s	remaining: 872ms
981:	learn: 0.4259859	total: 45.1s	remaining: 826ms
982:	learn: 0.4259491	total: 45.1s	remaining: 780ms
983:	learn: 0.4259491	total: 45.1s	remaining: 734ms
984:	learn: 0.42594

<catboost.core.CatBoost at 0x117448ac8>

In [26]:
test_model(catboost_ranking_model_4)

0.4660686668478522
0.4927768336312227


In [27]:
param = {'loss_function':'PairLogitPairwise'}
catboost_ranking_model_5 = CatBoost(param)
catboost_ranking_model_5.fit(train_pool)

0:	learn: 0.6835133	total: 196ms	remaining: 3m 15s
1:	learn: 0.6745593	total: 413ms	remaining: 3m 26s
2:	learn: 0.6661238	total: 597ms	remaining: 3m 18s
3:	learn: 0.6583265	total: 784ms	remaining: 3m 15s
4:	learn: 0.6509552	total: 977ms	remaining: 3m 14s
5:	learn: 0.6440766	total: 1.16s	remaining: 3m 13s
6:	learn: 0.6376502	total: 1.37s	remaining: 3m 14s
7:	learn: 0.6314963	total: 1.57s	remaining: 3m 14s
8:	learn: 0.6257888	total: 1.77s	remaining: 3m 14s
9:	learn: 0.6205143	total: 1.99s	remaining: 3m 16s
10:	learn: 0.6153944	total: 2.18s	remaining: 3m 16s
11:	learn: 0.6105849	total: 2.4s	remaining: 3m 17s
12:	learn: 0.6061624	total: 2.6s	remaining: 3m 17s
13:	learn: 0.6020323	total: 2.81s	remaining: 3m 18s
14:	learn: 0.5978863	total: 3.03s	remaining: 3m 18s
15:	learn: 0.5942853	total: 3.22s	remaining: 3m 18s
16:	learn: 0.5906502	total: 3.44s	remaining: 3m 18s
17:	learn: 0.5872795	total: 3.64s	remaining: 3m 18s
18:	learn: 0.5840060	total: 3.92s	remaining: 3m 22s
19:	learn: 0.5809522	tot

158:	learn: 0.4738178	total: 32.9s	remaining: 2m 54s
159:	learn: 0.4734521	total: 33.1s	remaining: 2m 53s
160:	learn: 0.4730531	total: 33.3s	remaining: 2m 53s
161:	learn: 0.4726168	total: 33.5s	remaining: 2m 53s
162:	learn: 0.4722918	total: 33.7s	remaining: 2m 53s
163:	learn: 0.4719019	total: 34s	remaining: 2m 53s
164:	learn: 0.4715252	total: 34.2s	remaining: 2m 52s
165:	learn: 0.4711462	total: 34.4s	remaining: 2m 52s
166:	learn: 0.4707787	total: 34.6s	remaining: 2m 52s
167:	learn: 0.4704486	total: 34.8s	remaining: 2m 52s
168:	learn: 0.4701373	total: 35.1s	remaining: 2m 52s
169:	learn: 0.4697713	total: 35.4s	remaining: 2m 52s
170:	learn: 0.4693621	total: 35.6s	remaining: 2m 52s
171:	learn: 0.4690703	total: 35.8s	remaining: 2m 52s
172:	learn: 0.4686712	total: 36s	remaining: 2m 52s
173:	learn: 0.4682689	total: 36.3s	remaining: 2m 52s
174:	learn: 0.4679226	total: 36.6s	remaining: 2m 52s
175:	learn: 0.4675216	total: 36.9s	remaining: 2m 52s
176:	learn: 0.4671106	total: 37.2s	remaining: 2m 5

314:	learn: 0.4255797	total: 1m 10s	remaining: 2m 33s
315:	learn: 0.4253637	total: 1m 11s	remaining: 2m 33s
316:	learn: 0.4250912	total: 1m 11s	remaining: 2m 33s
317:	learn: 0.4248457	total: 1m 11s	remaining: 2m 33s
318:	learn: 0.4245256	total: 1m 11s	remaining: 2m 33s
319:	learn: 0.4242916	total: 1m 11s	remaining: 2m 32s
320:	learn: 0.4240083	total: 1m 12s	remaining: 2m 32s
321:	learn: 0.4237343	total: 1m 12s	remaining: 2m 32s
322:	learn: 0.4235091	total: 1m 12s	remaining: 2m 32s
323:	learn: 0.4232728	total: 1m 12s	remaining: 2m 31s
324:	learn: 0.4229761	total: 1m 12s	remaining: 2m 31s
325:	learn: 0.4227021	total: 1m 13s	remaining: 2m 31s
326:	learn: 0.4224254	total: 1m 13s	remaining: 2m 30s
327:	learn: 0.4221013	total: 1m 13s	remaining: 2m 30s
328:	learn: 0.4218261	total: 1m 13s	remaining: 2m 30s
329:	learn: 0.4215695	total: 1m 13s	remaining: 2m 30s
330:	learn: 0.4213165	total: 1m 14s	remaining: 2m 29s
331:	learn: 0.4210947	total: 1m 14s	remaining: 2m 29s
332:	learn: 0.4208793	total:

467:	learn: 0.3902428	total: 1m 42s	remaining: 1m 56s
468:	learn: 0.3900023	total: 1m 42s	remaining: 1m 56s
469:	learn: 0.3898300	total: 1m 42s	remaining: 1m 55s
470:	learn: 0.3896397	total: 1m 42s	remaining: 1m 55s
471:	learn: 0.3894204	total: 1m 43s	remaining: 1m 55s
472:	learn: 0.3891742	total: 1m 43s	remaining: 1m 55s
473:	learn: 0.3890223	total: 1m 43s	remaining: 1m 54s
474:	learn: 0.3888302	total: 1m 43s	remaining: 1m 54s
475:	learn: 0.3886676	total: 1m 44s	remaining: 1m 54s
476:	learn: 0.3885061	total: 1m 44s	remaining: 1m 54s
477:	learn: 0.3883156	total: 1m 44s	remaining: 1m 54s
478:	learn: 0.3880962	total: 1m 44s	remaining: 1m 53s
479:	learn: 0.3879273	total: 1m 44s	remaining: 1m 53s
480:	learn: 0.3877105	total: 1m 45s	remaining: 1m 53s
481:	learn: 0.3874789	total: 1m 45s	remaining: 1m 53s
482:	learn: 0.3873060	total: 1m 45s	remaining: 1m 52s
483:	learn: 0.3870747	total: 1m 45s	remaining: 1m 52s
484:	learn: 0.3868502	total: 1m 45s	remaining: 1m 52s
485:	learn: 0.3866528	total:

620:	learn: 0.3610008	total: 2m 11s	remaining: 1m 20s
621:	learn: 0.3608060	total: 2m 12s	remaining: 1m 20s
622:	learn: 0.3606581	total: 2m 12s	remaining: 1m 20s
623:	learn: 0.3604749	total: 2m 12s	remaining: 1m 19s
624:	learn: 0.3603142	total: 2m 12s	remaining: 1m 19s
625:	learn: 0.3601468	total: 2m 12s	remaining: 1m 19s
626:	learn: 0.3599695	total: 2m 12s	remaining: 1m 19s
627:	learn: 0.3598003	total: 2m 13s	remaining: 1m 18s
628:	learn: 0.3596107	total: 2m 13s	remaining: 1m 18s
629:	learn: 0.3594146	total: 2m 13s	remaining: 1m 18s
630:	learn: 0.3592391	total: 2m 13s	remaining: 1m 18s
631:	learn: 0.3590949	total: 2m 13s	remaining: 1m 17s
632:	learn: 0.3588853	total: 2m 14s	remaining: 1m 17s
633:	learn: 0.3587526	total: 2m 14s	remaining: 1m 17s
634:	learn: 0.3586219	total: 2m 14s	remaining: 1m 17s
635:	learn: 0.3584582	total: 2m 14s	remaining: 1m 17s
636:	learn: 0.3583100	total: 2m 14s	remaining: 1m 16s
637:	learn: 0.3581009	total: 2m 14s	remaining: 1m 16s
638:	learn: 0.3579507	total:

776:	learn: 0.3357971	total: 2m 40s	remaining: 46.1s
777:	learn: 0.3355996	total: 2m 40s	remaining: 45.9s
778:	learn: 0.3354420	total: 2m 40s	remaining: 45.6s
779:	learn: 0.3352570	total: 2m 41s	remaining: 45.4s
780:	learn: 0.3350877	total: 2m 41s	remaining: 45.2s
781:	learn: 0.3349282	total: 2m 41s	remaining: 45s
782:	learn: 0.3347367	total: 2m 41s	remaining: 44.8s
783:	learn: 0.3345776	total: 2m 41s	remaining: 44.6s
784:	learn: 0.3343953	total: 2m 41s	remaining: 44.4s
785:	learn: 0.3342080	total: 2m 42s	remaining: 44.1s
786:	learn: 0.3340404	total: 2m 42s	remaining: 43.9s
787:	learn: 0.3338623	total: 2m 42s	remaining: 43.7s
788:	learn: 0.3337295	total: 2m 42s	remaining: 43.5s
789:	learn: 0.3335536	total: 2m 42s	remaining: 43.3s
790:	learn: 0.3334279	total: 2m 43s	remaining: 43.1s
791:	learn: 0.3332684	total: 2m 43s	remaining: 42.9s
792:	learn: 0.3330888	total: 2m 43s	remaining: 42.7s
793:	learn: 0.3329154	total: 2m 43s	remaining: 42.4s
794:	learn: 0.3327535	total: 2m 43s	remaining: 4

934:	learn: 0.3129299	total: 3m 13s	remaining: 13.5s
935:	learn: 0.3127818	total: 3m 13s	remaining: 13.2s
936:	learn: 0.3126496	total: 3m 13s	remaining: 13s
937:	learn: 0.3125304	total: 3m 14s	remaining: 12.8s
938:	learn: 0.3123601	total: 3m 14s	remaining: 12.6s
939:	learn: 0.3122231	total: 3m 14s	remaining: 12.4s
940:	learn: 0.3120904	total: 3m 14s	remaining: 12.2s
941:	learn: 0.3119925	total: 3m 14s	remaining: 12s
942:	learn: 0.3118384	total: 3m 15s	remaining: 11.8s
943:	learn: 0.3116881	total: 3m 15s	remaining: 11.6s
944:	learn: 0.3115761	total: 3m 15s	remaining: 11.4s
945:	learn: 0.3114389	total: 3m 15s	remaining: 11.2s
946:	learn: 0.3112824	total: 3m 15s	remaining: 11s
947:	learn: 0.3111179	total: 3m 16s	remaining: 10.8s
948:	learn: 0.3109728	total: 3m 16s	remaining: 10.6s
949:	learn: 0.3108338	total: 3m 16s	remaining: 10.3s
950:	learn: 0.3106927	total: 3m 16s	remaining: 10.1s
951:	learn: 0.3105730	total: 3m 16s	remaining: 9.93s
952:	learn: 0.3104705	total: 3m 17s	remaining: 9.72s

<catboost.core.CatBoost at 0x1174480f0>

In [28]:
test_model(catboost_ranking_model_5)

0.45589150629840774
0.4850019949763752


In [29]:
param = {'loss_function':'PairLogit'}
catboost_ranking_model_6 = CatBoost(param)
catboost_ranking_model_6.fit(train_pool)

0:	learn: 0.6856554	total: 121ms	remaining: 2m 1s
1:	learn: 0.6776269	total: 214ms	remaining: 1m 47s
2:	learn: 0.6701220	total: 290ms	remaining: 1m 36s
3:	learn: 0.6632468	total: 385ms	remaining: 1m 35s
4:	learn: 0.6577993	total: 464ms	remaining: 1m 32s
5:	learn: 0.6511662	total: 542ms	remaining: 1m 29s
6:	learn: 0.6445451	total: 631ms	remaining: 1m 29s
7:	learn: 0.6393837	total: 720ms	remaining: 1m 29s
8:	learn: 0.6343466	total: 797ms	remaining: 1m 27s
9:	learn: 0.6295682	total: 881ms	remaining: 1m 27s
10:	learn: 0.6250226	total: 958ms	remaining: 1m 26s
11:	learn: 0.6207548	total: 1.03s	remaining: 1m 25s
12:	learn: 0.6169321	total: 1.13s	remaining: 1m 25s
13:	learn: 0.6127876	total: 1.22s	remaining: 1m 26s
14:	learn: 0.6092561	total: 1.31s	remaining: 1m 26s
15:	learn: 0.6057971	total: 1.41s	remaining: 1m 26s
16:	learn: 0.6030641	total: 1.5s	remaining: 1m 26s
17:	learn: 0.5996601	total: 1.58s	remaining: 1m 26s
18:	learn: 0.5962311	total: 1.66s	remaining: 1m 25s
19:	learn: 0.5931852	tot

158:	learn: 0.5076641	total: 13.7s	remaining: 1m 12s
159:	learn: 0.5073392	total: 13.8s	remaining: 1m 12s
160:	learn: 0.5070563	total: 13.9s	remaining: 1m 12s
161:	learn: 0.5068546	total: 13.9s	remaining: 1m 12s
162:	learn: 0.5066625	total: 14s	remaining: 1m 12s
163:	learn: 0.5063847	total: 14.1s	remaining: 1m 11s
164:	learn: 0.5061383	total: 14.2s	remaining: 1m 11s
165:	learn: 0.5059334	total: 14.3s	remaining: 1m 11s
166:	learn: 0.5055643	total: 14.3s	remaining: 1m 11s
167:	learn: 0.5053517	total: 14.4s	remaining: 1m 11s
168:	learn: 0.5050591	total: 14.5s	remaining: 1m 11s
169:	learn: 0.5047332	total: 14.6s	remaining: 1m 11s
170:	learn: 0.5044926	total: 14.7s	remaining: 1m 11s
171:	learn: 0.5043090	total: 14.7s	remaining: 1m 10s
172:	learn: 0.5040806	total: 14.8s	remaining: 1m 10s
173:	learn: 0.5038005	total: 14.9s	remaining: 1m 10s
174:	learn: 0.5035475	total: 15s	remaining: 1m 10s
175:	learn: 0.5034188	total: 15.1s	remaining: 1m 10s
176:	learn: 0.5032083	total: 15.1s	remaining: 1m 1

318:	learn: 0.4768379	total: 27.1s	remaining: 57.8s
319:	learn: 0.4767099	total: 27.2s	remaining: 57.7s
320:	learn: 0.4765244	total: 27.2s	remaining: 57.6s
321:	learn: 0.4763888	total: 27.3s	remaining: 57.6s
322:	learn: 0.4762486	total: 27.4s	remaining: 57.5s
323:	learn: 0.4760548	total: 27.5s	remaining: 57.4s
324:	learn: 0.4759448	total: 27.6s	remaining: 57.3s
325:	learn: 0.4757223	total: 27.7s	remaining: 57.2s
326:	learn: 0.4756184	total: 27.7s	remaining: 57.1s
327:	learn: 0.4755039	total: 27.8s	remaining: 57s
328:	learn: 0.4753008	total: 27.9s	remaining: 57s
329:	learn: 0.4751859	total: 28s	remaining: 56.9s
330:	learn: 0.4750627	total: 28.1s	remaining: 56.8s
331:	learn: 0.4748915	total: 28.2s	remaining: 56.7s
332:	learn: 0.4747134	total: 28.3s	remaining: 56.7s
333:	learn: 0.4744619	total: 28.4s	remaining: 56.6s
334:	learn: 0.4742693	total: 28.5s	remaining: 56.5s
335:	learn: 0.4740868	total: 28.6s	remaining: 56.4s
336:	learn: 0.4739342	total: 28.6s	remaining: 56.4s
337:	learn: 0.4737

478:	learn: 0.4576109	total: 40.4s	remaining: 44s
479:	learn: 0.4575599	total: 40.5s	remaining: 43.9s
480:	learn: 0.4575020	total: 40.6s	remaining: 43.8s
481:	learn: 0.4575013	total: 40.6s	remaining: 43.6s
482:	learn: 0.4575012	total: 40.7s	remaining: 43.5s
483:	learn: 0.4572880	total: 40.8s	remaining: 43.5s
484:	learn: 0.4572827	total: 40.8s	remaining: 43.3s
485:	learn: 0.4571652	total: 40.9s	remaining: 43.3s
486:	learn: 0.4571649	total: 41s	remaining: 43.1s
487:	learn: 0.4571641	total: 41s	remaining: 43s
488:	learn: 0.4570478	total: 41.1s	remaining: 42.9s
489:	learn: 0.4570422	total: 41.1s	remaining: 42.8s
490:	learn: 0.4570422	total: 41.2s	remaining: 42.7s
491:	learn: 0.4567073	total: 41.3s	remaining: 42.6s
492:	learn: 0.4565719	total: 41.3s	remaining: 42.5s
493:	learn: 0.4565623	total: 41.4s	remaining: 42.4s
494:	learn: 0.4564613	total: 41.5s	remaining: 42.3s
495:	learn: 0.4564609	total: 41.5s	remaining: 42.2s
496:	learn: 0.4564485	total: 41.6s	remaining: 42.1s
497:	learn: 0.456324

637:	learn: 0.4446636	total: 51.3s	remaining: 29.1s
638:	learn: 0.4446635	total: 51.4s	remaining: 29s
639:	learn: 0.4444665	total: 51.5s	remaining: 28.9s
640:	learn: 0.4443604	total: 51.5s	remaining: 28.9s
641:	learn: 0.4443157	total: 51.6s	remaining: 28.8s
642:	learn: 0.4443136	total: 51.7s	remaining: 28.7s
643:	learn: 0.4442276	total: 51.8s	remaining: 28.6s
644:	learn: 0.4440408	total: 51.9s	remaining: 28.6s
645:	learn: 0.4440358	total: 52s	remaining: 28.5s
646:	learn: 0.4440209	total: 52s	remaining: 28.4s
647:	learn: 0.4440206	total: 52.1s	remaining: 28.3s
648:	learn: 0.4440204	total: 52.2s	remaining: 28.2s
649:	learn: 0.4440094	total: 52.3s	remaining: 28.1s
650:	learn: 0.4439344	total: 52.4s	remaining: 28.1s
651:	learn: 0.4438574	total: 52.4s	remaining: 28s
652:	learn: 0.4438573	total: 52.5s	remaining: 27.9s
653:	learn: 0.4438490	total: 52.5s	remaining: 27.8s
654:	learn: 0.4437716	total: 52.6s	remaining: 27.7s
655:	learn: 0.4437632	total: 52.7s	remaining: 27.6s
656:	learn: 0.443737

799:	learn: 0.4359437	total: 1m 2s	remaining: 15.7s
800:	learn: 0.4359183	total: 1m 2s	remaining: 15.6s
801:	learn: 0.4358184	total: 1m 2s	remaining: 15.5s
802:	learn: 0.4357631	total: 1m 2s	remaining: 15.4s
803:	learn: 0.4357619	total: 1m 3s	remaining: 15.4s
804:	learn: 0.4357618	total: 1m 3s	remaining: 15.3s
805:	learn: 0.4356377	total: 1m 3s	remaining: 15.2s
806:	learn: 0.4356313	total: 1m 3s	remaining: 15.1s
807:	learn: 0.4354956	total: 1m 3s	remaining: 15.1s
808:	learn: 0.4352966	total: 1m 3s	remaining: 15s
809:	learn: 0.4352531	total: 1m 3s	remaining: 14.9s
810:	learn: 0.4352156	total: 1m 3s	remaining: 14.8s
811:	learn: 0.4352155	total: 1m 3s	remaining: 14.7s
812:	learn: 0.4350958	total: 1m 3s	remaining: 14.7s
813:	learn: 0.4350957	total: 1m 3s	remaining: 14.6s
814:	learn: 0.4348825	total: 1m 3s	remaining: 14.5s
815:	learn: 0.4348817	total: 1m 3s	remaining: 14.4s
816:	learn: 0.4348816	total: 1m 3s	remaining: 14.3s
817:	learn: 0.4348781	total: 1m 4s	remaining: 14.2s
818:	learn: 0.

956:	learn: 0.4287361	total: 1m 13s	remaining: 3.31s
957:	learn: 0.4287361	total: 1m 13s	remaining: 3.23s
958:	learn: 0.4286407	total: 1m 13s	remaining: 3.15s
959:	learn: 0.4286381	total: 1m 13s	remaining: 3.07s
960:	learn: 0.4286380	total: 1m 13s	remaining: 3s
961:	learn: 0.4285576	total: 1m 13s	remaining: 2.92s
962:	learn: 0.4283589	total: 1m 13s	remaining: 2.84s
963:	learn: 0.4283588	total: 1m 14s	remaining: 2.77s
964:	learn: 0.4283584	total: 1m 14s	remaining: 2.69s
965:	learn: 0.4282020	total: 1m 14s	remaining: 2.61s
966:	learn: 0.4282020	total: 1m 14s	remaining: 2.53s
967:	learn: 0.4282019	total: 1m 14s	remaining: 2.45s
968:	learn: 0.4282010	total: 1m 14s	remaining: 2.38s
969:	learn: 0.4282010	total: 1m 14s	remaining: 2.3s
970:	learn: 0.4281974	total: 1m 14s	remaining: 2.22s
971:	learn: 0.4281973	total: 1m 14s	remaining: 2.14s
972:	learn: 0.4281857	total: 1m 14s	remaining: 2.07s
973:	learn: 0.4281857	total: 1m 14s	remaining: 1.99s
974:	learn: 0.4281828	total: 1m 14s	remaining: 1.9

<catboost.core.CatBoost at 0x117448a90>

In [30]:
test_model(catboost_ranking_model_6)

0.4708135017415518
0.49388111741441454


In [40]:
all_X, all_Y, _, _ = train_test_split(X, Y, test_size=0)
grouped_all_X, grouped_all_Y, grouped_all_doc_ids = grouped(all_X, all_Y)
all_pool = Pool(grouped_all_X, grouped_all_Y, group_id=grouped_all_doc_ids)

In [42]:
param = {'loss_function':'YetiRankPairwise'}
best_model = CatBoost(param)
best_model.fit(all_pool)

0:	total: 65.1ms	remaining: 1m 5s
1:	total: 133ms	remaining: 1m 6s
2:	total: 202ms	remaining: 1m 7s
3:	total: 280ms	remaining: 1m 9s
4:	total: 360ms	remaining: 1m 11s
5:	total: 432ms	remaining: 1m 11s
6:	total: 507ms	remaining: 1m 11s
7:	total: 580ms	remaining: 1m 11s
8:	total: 648ms	remaining: 1m 11s
9:	total: 718ms	remaining: 1m 11s
10:	total: 788ms	remaining: 1m 10s
11:	total: 856ms	remaining: 1m 10s
12:	total: 927ms	remaining: 1m 10s
13:	total: 998ms	remaining: 1m 10s
14:	total: 1.07s	remaining: 1m 10s
15:	total: 1.13s	remaining: 1m 9s
16:	total: 1.21s	remaining: 1m 9s
17:	total: 1.28s	remaining: 1m 9s
18:	total: 1.36s	remaining: 1m 10s
19:	total: 1.44s	remaining: 1m 10s
20:	total: 1.51s	remaining: 1m 10s
21:	total: 1.59s	remaining: 1m 10s
22:	total: 1.66s	remaining: 1m 10s
23:	total: 1.73s	remaining: 1m 10s
24:	total: 1.81s	remaining: 1m 10s
25:	total: 1.88s	remaining: 1m 10s
26:	total: 1.95s	remaining: 1m 10s
27:	total: 2.02s	remaining: 1m 10s
28:	total: 2.1s	remaining: 1m 10s
29

240:	total: 19.1s	remaining: 1m
241:	total: 19.1s	remaining: 60s
242:	total: 19.2s	remaining: 59.9s
243:	total: 19.3s	remaining: 59.8s
244:	total: 19.4s	remaining: 59.8s
245:	total: 19.5s	remaining: 59.7s
246:	total: 19.6s	remaining: 59.7s
247:	total: 19.7s	remaining: 59.6s
248:	total: 19.8s	remaining: 59.6s
249:	total: 19.9s	remaining: 59.6s
250:	total: 19.9s	remaining: 59.5s
251:	total: 20s	remaining: 59.5s
252:	total: 20.1s	remaining: 59.5s
253:	total: 20.2s	remaining: 59.4s
254:	total: 20.3s	remaining: 59.4s
255:	total: 20.4s	remaining: 59.3s
256:	total: 20.5s	remaining: 59.3s
257:	total: 20.6s	remaining: 59.2s
258:	total: 20.7s	remaining: 59.2s
259:	total: 20.8s	remaining: 59.1s
260:	total: 20.9s	remaining: 59s
261:	total: 20.9s	remaining: 59s
262:	total: 21s	remaining: 58.9s
263:	total: 21.1s	remaining: 58.9s
264:	total: 21.2s	remaining: 58.8s
265:	total: 21.3s	remaining: 58.8s
266:	total: 21.4s	remaining: 58.7s
267:	total: 21.5s	remaining: 58.6s
268:	total: 21.5s	remaining: 58.6

479:	total: 41s	remaining: 44.4s
480:	total: 41.1s	remaining: 44.3s
481:	total: 41.2s	remaining: 44.2s
482:	total: 41.3s	remaining: 44.2s
483:	total: 41.3s	remaining: 44.1s
484:	total: 41.4s	remaining: 44s
485:	total: 41.5s	remaining: 43.9s
486:	total: 41.6s	remaining: 43.8s
487:	total: 41.7s	remaining: 43.8s
488:	total: 41.8s	remaining: 43.7s
489:	total: 41.9s	remaining: 43.6s
490:	total: 42s	remaining: 43.5s
491:	total: 42.1s	remaining: 43.5s
492:	total: 42.2s	remaining: 43.4s
493:	total: 42.3s	remaining: 43.3s
494:	total: 42.4s	remaining: 43.2s
495:	total: 42.4s	remaining: 43.1s
496:	total: 42.5s	remaining: 43s
497:	total: 42.6s	remaining: 43s
498:	total: 42.7s	remaining: 42.9s
499:	total: 42.8s	remaining: 42.8s
500:	total: 42.9s	remaining: 42.7s
501:	total: 43s	remaining: 42.6s
502:	total: 43.1s	remaining: 42.5s
503:	total: 43.1s	remaining: 42.5s
504:	total: 43.2s	remaining: 42.4s
505:	total: 43.3s	remaining: 42.3s
506:	total: 43.4s	remaining: 42.2s
507:	total: 43.5s	remaining: 42.

719:	total: 1m 2s	remaining: 24.2s
720:	total: 1m 2s	remaining: 24.1s
721:	total: 1m 2s	remaining: 24s
722:	total: 1m 2s	remaining: 23.9s
723:	total: 1m 2s	remaining: 23.8s
724:	total: 1m 2s	remaining: 23.8s
725:	total: 1m 2s	remaining: 23.7s
726:	total: 1m 2s	remaining: 23.6s
727:	total: 1m 2s	remaining: 23.5s
728:	total: 1m 2s	remaining: 23.4s
729:	total: 1m 3s	remaining: 23.3s
730:	total: 1m 3s	remaining: 23.2s
731:	total: 1m 3s	remaining: 23.2s
732:	total: 1m 3s	remaining: 23.1s
733:	total: 1m 3s	remaining: 23s
734:	total: 1m 3s	remaining: 22.9s
735:	total: 1m 3s	remaining: 22.8s
736:	total: 1m 3s	remaining: 22.7s
737:	total: 1m 3s	remaining: 22.6s
738:	total: 1m 3s	remaining: 22.6s
739:	total: 1m 3s	remaining: 22.5s
740:	total: 1m 4s	remaining: 22.4s
741:	total: 1m 4s	remaining: 22.3s
742:	total: 1m 4s	remaining: 22.2s
743:	total: 1m 4s	remaining: 22.1s
744:	total: 1m 4s	remaining: 22.1s
745:	total: 1m 4s	remaining: 22s
746:	total: 1m 4s	remaining: 21.9s
747:	total: 1m 4s	remainin

953:	total: 1m 23s	remaining: 4.01s
954:	total: 1m 23s	remaining: 3.92s
955:	total: 1m 23s	remaining: 3.83s
956:	total: 1m 23s	remaining: 3.75s
957:	total: 1m 23s	remaining: 3.66s
958:	total: 1m 23s	remaining: 3.57s
959:	total: 1m 23s	remaining: 3.48s
960:	total: 1m 23s	remaining: 3.4s
961:	total: 1m 23s	remaining: 3.31s
962:	total: 1m 23s	remaining: 3.22s
963:	total: 1m 23s	remaining: 3.14s
964:	total: 1m 24s	remaining: 3.05s
965:	total: 1m 24s	remaining: 2.96s
966:	total: 1m 24s	remaining: 2.88s
967:	total: 1m 24s	remaining: 2.79s
968:	total: 1m 24s	remaining: 2.7s
969:	total: 1m 24s	remaining: 2.61s
970:	total: 1m 24s	remaining: 2.53s
971:	total: 1m 24s	remaining: 2.44s
972:	total: 1m 24s	remaining: 2.35s
973:	total: 1m 24s	remaining: 2.27s
974:	total: 1m 24s	remaining: 2.18s
975:	total: 1m 25s	remaining: 2.09s
976:	total: 1m 25s	remaining: 2s
977:	total: 1m 25s	remaining: 1.92s
978:	total: 1m 25s	remaining: 1.83s
979:	total: 1m 25s	remaining: 1.74s
980:	total: 1m 25s	remaining: 1.6

<catboost.core.CatBoost at 0x1174f9860>

In [43]:
def save_prediction(model, filename, dataset):
    res = dataset[['docid', 'quid', 'feature_0']].copy()
    res['feature_0'] = model.predict(dataset[dataset.columns.delete((0, 1))])
    res.head()
    csv = res.sort_values(['quid', 'feature_0'], ascending=False)[['docid', 'quid']]
    csv.to_csv(filename, index=False)

In [44]:
test = pd.read_csv('test.tsv', sep='\t')
test.head()

Unnamed: 0,docid,quid,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,...,feature_36,feature_37,feature_38,feature_39,feature_40,feature_41,feature_42,feature_43,feature_44,feature_45
0,1,7968,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0
1,2,7968,0.205882,0.0,0.0,0.0,0.205882,0.0,0.0,0.0,...,0.916516,0.686669,0.674704,0.708795,0.217889,0.272727,0.0,0.333333,0.42,0.0
2,3,7968,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.031069,0.090909,0.092593,0.5,0.68,0.0
3,4,7968,0.088235,0.0,0.0,0.0,0.088235,0.0,0.0,0.0,...,0.900721,0.789495,0.536695,0.789458,0.562992,0.045455,0.0,0.666667,0.86,0.0
4,5,7968,0.558824,0.0,0.0,0.0,0.558824,0.0,0.0,0.0,...,0.557493,0.452582,0.660139,0.38995,0.120613,0.772727,0.0,0.5,0.46,0.0


In [45]:
save_prediction(best_model, "YetiRankPairwise.tsv", test)