In [133]:
import math
import optuna
import pickle
from typing import List, Tuple

import numpy as np
import torch
from catboost.datasets import msrank_10k
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from torch import Tensor


class Solution:
    def __init__(self, n_estimators: int = 231,
                 lr: float = 0.39127022258826616,
                 ndcg_top_k: int = 10,
                 subsample: float = 0.53005362889185,
                 colsample_bytree: float = 0.6304279014613413,
                 max_depth: int = 9,
                 min_samples_leaf: int = 25):
        self._prepare_data()
        self.num_input_features = self.X_train.shape[1]
        self.num_train_objects = self.X_train.shape[0]
        self.unique_train_groups = np.unique(self.query_ids_train)
        self.num_test_objects = self.X_test.shape[0]
        self.num_features_to_choice = int(colsample_bytree *
                                          self.num_input_features)
        self.num_groups_to_choice = int(
            subsample * len(self.unique_train_groups))

        self.ndcg_top_k = ndcg_top_k
        self.n_estimators = n_estimators
        self.lr = lr
        self.max_depth = max_depth
        self.min_samples_leaf = min_samples_leaf

        self.trees = []
        self.best_ndcg = -1

    def _get_data(self) -> List[np.ndarray]:
        try:
            train_df, test_df = msrank_10k()
        except:
            train_df, test_df = msrank_10k()

        X_train = train_df.drop([0, 1], axis=1).values
        y_train = train_df[0].values
        query_ids_train = train_df[1].values.astype(int)

        X_test = test_df.drop([0, 1], axis=1).values
        y_test = test_df[0].values
        query_ids_test = test_df[1].values.astype(int)

        return [X_train, y_train, query_ids_train, X_test, y_test,
                query_ids_test]

    def _prepare_data(self) -> None:
        (X_train, y_train, self.query_ids_train,
         X_test, y_test, self.query_ids_test) = self._get_data()
        # допишите ваш код здесь
        X_train = self._scale_features_in_query_groups(
            X_train, self.query_ids_train
        )
        X_test = self._scale_features_in_query_groups(
            X_test, self.query_ids_test
        )

        self.X_train = torch.from_numpy(X_train).type(torch.FloatTensor)
        self.ys_train = torch.from_numpy(y_train).type(
            torch.FloatTensor).reshape(-1, 1)
        self.X_test = torch.from_numpy(X_test).type(torch.FloatTensor)
        self.ys_test = torch.from_numpy(y_test).type(
            torch.FloatTensor).reshape(-1, 1)

    def _scale_features_in_query_groups(self, inp_feat_array: np.ndarray,
                                        inp_query_ids: np.ndarray) -> \
            np.ndarray:
        # допишите ваш код здесь
        for query_id in np.unique(inp_query_ids):
            mask = inp_query_ids == query_id
            scaler = StandardScaler()
            scaled_part = scaler.fit_transform(inp_feat_array[mask])
            inp_feat_array[mask] = scaled_part
        return inp_feat_array

    def _train_one_tree(self, cur_tree_idx: int,
                        train_preds: torch.FloatTensor
                        ) -> Tuple[DecisionTreeRegressor, np.ndarray]:
        # допишите ваш код здесь
        lambdas = np.zeros((self.num_train_objects, 1))
        groups_to_train_on = np.random.choice(self.unique_train_groups,
                                              size=self.num_groups_to_choice,
                                              replace=False)
        for query_id in groups_to_train_on:
            mask = self.query_ids_train == query_id
            group_y = self.ys_train[mask]
            group_preds = train_preds[mask]
            group_lambdas = self._compute_lambdas(group_y, group_preds)
            lambdas[mask] = group_lambdas.numpy()

        dt = DecisionTreeRegressor(max_depth=self.max_depth,
                                   min_samples_leaf=self.min_samples_leaf,
                                   random_state=cur_tree_idx)

        rows = np.isin(self.query_ids_train, groups_to_train_on)
        cols = np.random.choice(np.arange(self.num_input_features),
                                size=self.num_features_to_choice,
                                replace=False)

        sample_X_train = self.X_train[rows][:, cols].numpy()
        lambdas = lambdas[rows]
        dt.fit(sample_X_train, lambdas)
        return dt, cols

    def fit(self):
        np.random.seed(0)
        # допишите ваш код здесь
        best_ndcg_ind = max_ndcg = 0
        prev_preds = torch.zeros(
            self.num_train_objects, 1).type(torch.FloatTensor)
        valid_preds = torch.zeros(
            self.num_test_objects, 1).type(torch.FloatTensor)

        for idx in range(1, self.n_estimators + 1):
            dt, train_cols = self._train_one_tree(idx, prev_preds)
            self.trees.append((dt, train_cols))
            prev_preds -= self.lr * torch.FloatTensor(dt.predict(
                self.X_train[:, train_cols].numpy())).reshape(-1, 1)
            valid_preds -= self.lr * torch.FloatTensor(dt.predict(
                self.X_test[:, train_cols].numpy())).reshape(-1, 1)
            ndcg = self._calc_data_ndcg(self.query_ids_test, self.ys_test,
                                        valid_preds)

            if ndcg > self.best_ndcg:
                best_ndcg_ind = idx
                self.best_ndcg = ndcg
                
        self.trees = self.trees[:best_ndcg_ind]

    def predict(self, data: torch.FloatTensor) -> torch.FloatTensor:
        preds = torch.zeros(data.shape[0], 1).type(torch.FloatTensor)
        for dt, cols in self.trees:
            tmp_preds = dt.predict(data[:, cols].numpy())
            preds -= self.lr * torch.FloatTensor(tmp_preds).reshape(-1, 1)
        return preds

    def _compute_lambdas(self, y_true: torch.FloatTensor,
                         y_pred: torch.FloatTensor) -> Tensor:
        def compute_ideal_dcg(ys_true: torch.Tensor) -> float:
            ys_true, _ = torch.sort(ys_true, dim=0, descending=True)

            sum_dcg = 0
            for i, y_true in enumerate(ys_true, 1):
                sum_dcg += (2 ** y_true - 1) / math.log2(i + 1)
            return sum_dcg

        def compute_labels_in_batch(y_true):
            rel_diff = y_true - y_true.t()
            pos_pairs = (rel_diff > 0).type(torch.float32)
            neg_pairs = (rel_diff < 0).type(torch.float32)
            Sij = pos_pairs - neg_pairs
            return Sij

        _, rank_order = torch.sort(y_true, descending=True, dim=0)
        rank_order += 1

        pos_pairs_score_diff = 1.0 + torch.exp((y_pred - y_pred.t()))

        Sij = compute_labels_in_batch(y_true)

        gain_diff = torch.pow(2.0, y_true) - torch.pow(2.0, y_true.t())
        decay_diff = (1.0 / torch.log2(rank_order + 1.0)) - (
                1.0 / torch.log2(rank_order.t() + 1.0))
        ideal_dcg = compute_ideal_dcg(y_true)
        N = 1 / (ideal_dcg + 1)
        delta_ndcg = torch.abs(N * gain_diff * decay_diff)

        lambda_update = (0.5 * (
                    1 - Sij) - 1 / pos_pairs_score_diff) * delta_ndcg
        lambda_update = torch.sum(lambda_update, dim=1, keepdim=True)
        return lambda_update

    def _dcg(self, ys_true: torch.Tensor, ys_pred: torch.Tensor,
             k: int) -> float:
        ys_pred, indices = torch.sort(ys_pred, dim=0, descending=True)
        ys_true = ys_true[indices[:k]]

        sum_dcg = 0
        for i, y_true in enumerate(ys_true, 1):
            sum_dcg += (2 ** y_true - 1) / math.log2(i + 1)
        return sum_dcg

    def _ndcg_k(self, ys_true: torch.Tensor, ys_pred: torch.Tensor,
                ndcg_top_k: int) -> float:
        ideal_dcg = self._dcg(ys_true, ys_true, ndcg_top_k)
        case_dcg = self._dcg(ys_true, ys_pred, ndcg_top_k)
        return float(case_dcg / ideal_dcg)

    def _calc_data_ndcg(self, queries_list: np.ndarray,
                        true_labels: torch.FloatTensor,
                        preds: torch.FloatTensor) -> float:
        # допишите ваш код здесь
        unique_queries = np.unique(queries_list)
        ndcgs = []
        for query_id in unique_queries:
            group_y = true_labels[queries_list == query_id]
            y_pred = preds[queries_list == query_id]
            group_dcg = self._ndcg_k(group_y, y_pred, self.ndcg_top_k)
            if np.isnan(group_dcg):
                ndcgs.append(0)
                continue
            ndcgs.append(group_dcg)
        return float(np.mean(ndcgs))

    def save_model(self, path: str):
        state = {
            'trees': self.trees,
            'lr': self.lr,
            'best_ndcg': self.best_ndcg
        }
        f = open(path, 'wb')
        pickle.dump(state, f)

    def load_model(self, path: str):
        f = open(path, 'rb')
        state = pickle.load(f)
        self.trees = state['trees']
        self.lr = state['lr']
        self.best_ndcg = state['best_ndcg']


In [134]:
train_df, test_df = msrank_10k()
X_train = train_df.drop([0, 1], axis=1).values
y_train = train_df[0].values
query_ids_train = train_df[1].values.astype(int)

X_test = test_df.drop([0, 1], axis=1).values
y_test = test_df[0].values
query_ids_test = test_df[1].values.astype(int)

In [138]:
X_test = torch.from_numpy(X_test)
y_test = torch.from_numpy(y_test).reshape(-1, 1)

In [135]:
best_params = {'lr': 0.39127022258826616, 'subsample': 0.53005362889185, 'colsample_bytree': 0.6304279014613413, 'n_estimators': 231, 'max_depth': 9, 'min_samples_leaf': 25}

In [136]:
slt = Solution(**best_params)
slt.fit()

In [132]:
slt.save_model('model.pickle')

In [139]:
valid_preds = slt.predict(X_test)
ndcg = slt._calc_data_ndcg(query_ids_test, y_test, valid_preds)
ndcg

0.405036643242732

In [108]:
def objective(trial, X_test=X_test, y_test=y_test, query_ids_test=query_ids_test):
    param = {
        'lr': trial.suggest_float('lr', 1e-2, 7e-1),
        'subsample': trial.suggest_float('subsample', 5e-1, 75e-2),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 5e-1, 75e-2),
        'n_estimators': trial.suggest_categorical('n_estimators', list(range(100, 300))),
        'max_depth': trial.suggest_categorical('max_depth', list(range(3, 10))),
        'min_samples_leaf': trial.suggest_categorical('min_samples_leaf', list(range(5, 30))),
    }
    
    model = Solution(**param)
    model.fit()
    
    valid_preds = model.predict(X_test)
    ndcg = model._calc_data_ndcg(query_ids_test, y_test, valid_preds)
    return ndcg

In [109]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)

print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

[32m[I 2022-12-01 16:07:03,719][0m A new study created in memory with name: no-name-fada752e-2b29-49ad-b315-95289a4d9cbb[0m
[32m[I 2022-12-01 16:08:33,965][0m Trial 0 finished with value: 0.3325798035402666 and parameters: {'lr': 0.6733902726356289, 'subsample': 0.7184164792440743, 'colsample_bytree': 0.6514877160809808, 'n_estimators': 171, 'max_depth': 6, 'min_samples_leaf': 20}. Best is trial 0 with value: 0.3325798035402666.[0m
[32m[I 2022-12-01 16:09:46,371][0m Trial 1 finished with value: 0.36110045451198264 and parameters: {'lr': 0.07022961889627585, 'subsample': 0.6433092225710995, 'colsample_bytree': 0.5296259152124865, 'n_estimators': 156, 'max_depth': 7, 'min_samples_leaf': 18}. Best is trial 1 with value: 0.36110045451198264.[0m
[32m[I 2022-12-01 16:10:53,655][0m Trial 2 finished with value: 0.36390404752005795 and parameters: {'lr': 0.5163773071376371, 'subsample': 0.5428969217641779, 'colsample_bytree': 0.6302990292761107, 'n_estimators': 153, 'max_depth': 7, '

[32m[I 2022-12-01 16:44:47,586][0m Trial 26 finished with value: 0.3588065614068063 and parameters: {'lr': 0.5635832426466061, 'subsample': 0.5657192650974765, 'colsample_bytree': 0.6491170815452761, 'n_estimators': 154, 'max_depth': 9, 'min_samples_leaf': 24}. Best is trial 3 with value: 0.39505608451747115.[0m
[32m[I 2022-12-01 16:46:51,532][0m Trial 27 finished with value: 0.29667944886641545 and parameters: {'lr': 0.6202677715903917, 'subsample': 0.5246543262472545, 'colsample_bytree': 0.6023382267421029, 'n_estimators': 287, 'max_depth': 9, 'min_samples_leaf': 5}. Best is trial 3 with value: 0.39505608451747115.[0m
[32m[I 2022-12-01 16:48:28,979][0m Trial 28 finished with value: 0.3788683789177119 and parameters: {'lr': 0.5218864061699272, 'subsample': 0.5559687487095291, 'colsample_bytree': 0.6720793224442154, 'n_estimators': 216, 'max_depth': 8, 'min_samples_leaf': 23}. Best is trial 3 with value: 0.39505608451747115.[0m
[32m[I 2022-12-01 16:49:22,154][0m Trial 29 fin

[32m[I 2022-12-01 17:17:24,117][0m Trial 52 finished with value: 0.3632308896182124 and parameters: {'lr': 0.4550425215277508, 'subsample': 0.5020670764952132, 'colsample_bytree': 0.6086113270150338, 'n_estimators': 294, 'max_depth': 3, 'min_samples_leaf': 7}. Best is trial 3 with value: 0.39505608451747115.[0m
[32m[I 2022-12-01 17:18:00,326][0m Trial 53 finished with value: 0.3529370538535778 and parameters: {'lr': 0.594156594281773, 'subsample': 0.5124910000554491, 'colsample_bytree': 0.5660069449278542, 'n_estimators': 109, 'max_depth': 3, 'min_samples_leaf': 11}. Best is trial 3 with value: 0.39505608451747115.[0m
[32m[I 2022-12-01 17:19:37,809][0m Trial 54 finished with value: 0.36233737310340675 and parameters: {'lr': 0.3319202941440617, 'subsample': 0.5426913345196381, 'colsample_bytree': 0.5897169295726097, 'n_estimators': 237, 'max_depth': 8, 'min_samples_leaf': 22}. Best is trial 3 with value: 0.39505608451747115.[0m
[32m[I 2022-12-01 17:21:30,918][0m Trial 55 fini

[32m[I 2022-12-01 19:31:33,488][0m Trial 78 finished with value: 0.3492112833809804 and parameters: {'lr': 0.26816322881524074, 'subsample': 0.5657460012920917, 'colsample_bytree': 0.6505711126024449, 'n_estimators': 221, 'max_depth': 9, 'min_samples_leaf': 20}. Best is trial 75 with value: 0.4036155929598163.[0m
[32m[I 2022-12-01 19:33:10,648][0m Trial 79 finished with value: 0.37146683042180456 and parameters: {'lr': 0.361788093056469, 'subsample': 0.5362538540376904, 'colsample_bytree': 0.66536282365261, 'n_estimators': 223, 'max_depth': 9, 'min_samples_leaf': 24}. Best is trial 75 with value: 0.4036155929598163.[0m
[32m[I 2022-12-01 19:34:24,500][0m Trial 80 finished with value: 0.36709132729765187 and parameters: {'lr': 0.33728863678188453, 'subsample': 0.5523224989140021, 'colsample_bytree': 0.6444349338133104, 'n_estimators': 163, 'max_depth': 9, 'min_samples_leaf': 18}. Best is trial 75 with value: 0.4036155929598163.[0m
[32m[I 2022-12-01 19:35:39,480][0m Trial 81 fi

[32m[I 2022-12-01 20:17:22,454][0m Trial 104 finished with value: 0.38294853549412017 and parameters: {'lr': 0.49555879501234695, 'subsample': 0.5388147022880513, 'colsample_bytree': 0.6307755536872297, 'n_estimators': 139, 'max_depth': 9, 'min_samples_leaf': 25}. Best is trial 75 with value: 0.4036155929598163.[0m
[32m[I 2022-12-01 20:18:34,685][0m Trial 105 finished with value: 0.3642857678217488 and parameters: {'lr': 0.374090829530839, 'subsample': 0.5079781308044262, 'colsample_bytree': 0.6032328751442716, 'n_estimators': 175, 'max_depth': 9, 'min_samples_leaf': 27}. Best is trial 75 with value: 0.4036155929598163.[0m
[32m[I 2022-12-01 20:19:17,801][0m Trial 106 finished with value: 0.3478158802795384 and parameters: {'lr': 0.6438460992023878, 'subsample': 0.5277058056600936, 'colsample_bytree': 0.6625598937356052, 'n_estimators': 105, 'max_depth': 5, 'min_samples_leaf': 8}. Best is trial 75 with value: 0.4036155929598163.[0m
[32m[I 2022-12-01 20:20:36,767][0m Trial 107

[32m[I 2022-12-01 20:55:48,717][0m Trial 130 finished with value: 0.35772130505193767 and parameters: {'lr': 0.2788894355912752, 'subsample': 0.5385301101512113, 'colsample_bytree': 0.6413885340342238, 'n_estimators': 142, 'max_depth': 9, 'min_samples_leaf': 20}. Best is trial 75 with value: 0.4036155929598163.[0m
[32m[I 2022-12-01 20:57:57,479][0m Trial 131 finished with value: 0.3882730575531955 and parameters: {'lr': 0.3048333739097296, 'subsample': 0.5300527115015934, 'colsample_bytree': 0.6306127414550369, 'n_estimators': 290, 'max_depth': 9, 'min_samples_leaf': 26}. Best is trial 75 with value: 0.4036155929598163.[0m
[32m[I 2022-12-01 21:00:07,729][0m Trial 132 finished with value: 0.3722251332867427 and parameters: {'lr': 0.2987640968601289, 'subsample': 0.556343440660245, 'colsample_bytree': 0.6314117021711022, 'n_estimators': 279, 'max_depth': 9, 'min_samples_leaf': 26}. Best is trial 75 with value: 0.4036155929598163.[0m
[32m[I 2022-12-01 21:01:06,601][0m Trial 133

[32m[I 2022-12-01 21:36:58,347][0m Trial 156 finished with value: 0.3454605637596689 and parameters: {'lr': 0.45280578850287395, 'subsample': 0.5459175387947164, 'colsample_bytree': 0.6335295510614434, 'n_estimators': 240, 'max_depth': 9, 'min_samples_leaf': 25}. Best is trial 75 with value: 0.4036155929598163.[0m
[32m[I 2022-12-01 21:39:04,693][0m Trial 157 finished with value: 0.39113495950066707 and parameters: {'lr': 0.4363575091175289, 'subsample': 0.537126886213129, 'colsample_bytree': 0.6515590653673028, 'n_estimators': 292, 'max_depth': 9, 'min_samples_leaf': 25}. Best is trial 75 with value: 0.4036155929598163.[0m
[32m[I 2022-12-01 21:40:22,163][0m Trial 158 finished with value: 0.385092616856014 and parameters: {'lr': 0.4271356849084547, 'subsample': 0.5366942415960759, 'colsample_bytree': 0.6625826645777237, 'n_estimators': 181, 'max_depth': 8, 'min_samples_leaf': 25}. Best is trial 75 with value: 0.4036155929598163.[0m
[32m[I 2022-12-01 21:42:30,315][0m Trial 159

[32m[I 2022-12-01 22:15:08,821][0m Trial 182 finished with value: 0.28533526012106003 and parameters: {'lr': 0.4044433401276457, 'subsample': 0.5793773381883969, 'colsample_bytree': 0.6406489860899919, 'n_estimators': 165, 'max_depth': 9, 'min_samples_leaf': 12}. Best is trial 172 with value: 0.405036643242732.[0m
[32m[I 2022-12-01 22:16:12,813][0m Trial 183 finished with value: 0.38078772628726976 and parameters: {'lr': 0.42473419835779763, 'subsample': 0.5276134328761706, 'colsample_bytree': 0.6552566037226921, 'n_estimators': 132, 'max_depth': 9, 'min_samples_leaf': 25}. Best is trial 172 with value: 0.405036643242732.[0m
[32m[I 2022-12-01 22:17:34,011][0m Trial 184 finished with value: 0.37968537781387374 and parameters: {'lr': 0.38293464354938067, 'subsample': 0.5353971894701481, 'colsample_bytree': 0.6690343073895301, 'n_estimators': 169, 'max_depth': 9, 'min_samples_leaf': 20}. Best is trial 172 with value: 0.405036643242732.[0m
[32m[I 2022-12-01 22:18:26,621][0m Tria

Number of finished trials: 200
Best trial: {'lr': 0.39127022258826616, 'subsample': 0.53005362889185, 'colsample_bytree': 0.6304279014613413, 'n_estimators': 231, 'max_depth': 9, 'min_samples_leaf': 25}


### STM solution

In [113]:
import math
import pickle
import random
from typing import List, Tuple

import numpy as np
import torch
from catboost.datasets import msrank_10k
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from tqdm.auto import tqdm


class Solution:
    def __init__(self, n_estimators: int = 100, lr: float = 0.5, ndcg_top_k: int = 10,
                 subsample: float = 0.6, colsample_bytree: float = 0.9,
                 max_depth: int = 7, min_samples_leaf: int = 8):
        self._prepare_data()
        self.num_input_features = self.X_train.shape[1]
        self.num_train_objects = self.X_train.shape[0]
        self.num_test_objects = self.X_test.shape[0]

        self.features_to_choice = int(
            self.num_input_features * colsample_bytree)
        self.objects_to_choice = int(self.num_train_objects * subsample)

        self.ndcg_top_k = ndcg_top_k
        self.n_estimators = n_estimators
        self.lr = lr
        self.max_depth = max_depth
        self.min_samples_leaf = min_samples_leaf

        self.trees = None
        self.trees_feat_idxs = None
        self.best_ndcg = -1
        self.best_iter_idx = -1

    def _get_data(self) -> List[np.array]:
        try:
            train_df, test_df = msrank_10k()
        except:
            train_df, test_df = msrank_10k()
        X_train = train_df.drop([0, 1], axis=1).values
        y_train = train_df[0].values
        query_ids_train = train_df[1].values.astype(int)

        X_test = test_df.drop([0, 1], axis=1).values
        y_test = test_df[0].values
        query_ids_test = test_df[1].values.astype(int)

        return [X_train, y_train, query_ids_train, X_test, y_test, query_ids_test]

    def _prepare_data(self) -> None:
        (X_train, y_train, self.query_ids_train,
            X_test, y_test, self.query_ids_test) = self._get_data()

        X_train = self._scale_features_in_query_groups(
            X_train, self.query_ids_train)
        X_test = self._scale_features_in_query_groups(
            X_test, self.query_ids_test)

        self.X_train = torch.FloatTensor(X_train)
        self.X_test = torch.FloatTensor(X_test)

        self.ys_train = torch.FloatTensor(y_train).reshape(-1, 1)
        self.ys_test = torch.FloatTensor(y_test).reshape(-1, 1)

    def _scale_features_in_query_groups(self, inp_feat_array: np.array,
                                        inp_query_ids: np.array) -> np.array:
        for cur_id in np.unique(inp_query_ids):
            mask = inp_query_ids == cur_id
            tmp_array = inp_feat_array[mask]
            scaler = StandardScaler()
            inp_feat_array[mask] = scaler.fit_transform(tmp_array)

        return inp_feat_array

    def _train_one_tree(self, cur_tree_idx: int,
                        train_preds: torch.FloatTensor
                        ) -> Tuple[DecisionTreeRegressor, np.array]:
        lambdas = torch.zeros(self.num_train_objects, 1)
        for cur_id in np.unique(self.query_ids_train):
            train_mask = self.query_ids_train == cur_id
            lambda_update = self._compute_lambdas(
                self.ys_train[train_mask], train_preds[train_mask])
            if any(torch.isnan(lambda_update)):
                lambda_update = torch.zeros_like(lambda_update)
            lambdas[train_mask] = lambda_update

        tree = DecisionTreeRegressor(
            max_depth=self.max_depth, min_samples_leaf=self.min_samples_leaf, random_state=cur_tree_idx)

        this_tree_feats = np.random.choice(
            list(range(self.num_input_features)), self.features_to_choice, replace=False)
        this_tree_objs = np.random.choice(
            list(range(self.num_train_objects)), self.objects_to_choice, replace=False)

        tree.fit(
            self.X_train[this_tree_objs.reshape(-1)
                         ][:, this_tree_feats].numpy(),
            -lambdas[this_tree_objs.reshape(-1), :].numpy()
        )

        return tree, this_tree_feats

    def _calc_data_ndcg(self, queries_list: np.array,
                        true_labels: torch.FloatTensor, preds: torch.FloatTensor) -> float:
        ndcgs = []
        for cur_id in np.unique(queries_list):
            mask = queries_list == cur_id
            cur_ndcg = self._ndcg_k(
                true_labels[mask], preds[mask], self.ndcg_top_k)
            if np.isnan(cur_ndcg):
                ndcgs.append(0)
                continue
            ndcgs.append(cur_ndcg)
        return np.mean(ndcgs)

    def fit(self):
        np.random.seed(0)
        self.trees = []
        self.trees_feat_idxs = []
        self.best_ndcg = -1
        self.best_iter_idx = -1

        train_preds = torch.zeros(self.num_train_objects, 1)
        test_preds = torch.zeros(self.num_test_objects, 1)

        train_ndcgs, test_ndcgs = [], []

        #p_bar = tqdm(range(self.n_estimators))
        for cur_tree_idx in range(self.n_estimators):
            tree, this_tree_feats = self._train_one_tree(
                cur_tree_idx, train_preds)
            self.trees.append(tree)
            self.trees_feat_idxs.append(this_tree_feats)

            cur_tree_train_data = self.X_train[:, this_tree_feats].numpy()
            train_preds += self.lr * \
                torch.FloatTensor(tree.predict(
                    cur_tree_train_data)).reshape(-1, 1)
            train_ndcg = self._calc_data_ndcg(
                self.query_ids_train, self.ys_train, train_preds)

            cur_tree_test_data = self.X_test[:, this_tree_feats].numpy()
            test_preds += self.lr * \
                torch.FloatTensor(tree.predict(
                    cur_tree_test_data)).reshape(-1, 1)
            test_ndcg = self._calc_data_ndcg(
                self.query_ids_test, self.ys_test, test_preds)

            if self.best_ndcg < test_ndcg:
                self.best_ndcg = test_ndcg
                self.best_iter_idx = cur_tree_idx

            train_ndcgs.append(train_ndcg)
            test_ndcgs.append(test_ndcg)
#             p_bar.set_description_str(
#                 f'Test nDCG@{self.ndcg_top_k}={round(test_ndcg, 5)}')

        cut_idx = self.best_iter_idx + 1
        self.trees = self.trees[:cut_idx]
        self.trees_feat_idxs = self.trees_feat_idxs[:cut_idx]

    def predict(self, data: torch.FloatTensor) -> torch.FloatTensor:
        preds = torch.zeros(data.shape[0], 1)
        for cur_tree_idx in range(len(self.trees)):
            tree = self.trees[cur_tree_idx]
            feat_idx = self.trees_feat_idxs[cur_tree_idx]
            tmp_preds = tree.predict(data[:, feat_idx].numpy())
            preds += self.lr * torch.FloatTensor(tmp_preds).reshape(-1, 1)

        return preds

    def _compute_ideal_dcg(self, ys_true: torch.FloatTensor) -> float:
        def dcg(ys_true, ys_pred):
            _, argsort = torch.sort(ys_pred, descending=True, dim=0)
            ys_true_sorted = ys_true[argsort]
            ret = 0
            for i, l in enumerate(ys_true_sorted, 1):
                ret += (2 ** l - 1) / np.log2(1 + i)
            return ret
        ideal_dcg = dcg(ys_true, ys_true)
        return ideal_dcg

    def _compute_lambdas(self, y_true, y_pred):
        # рассчитаем нормировку, IdealDCG
        ideal_dcg = self._compute_ideal_dcg(y_true)
        N = 1 / ideal_dcg

        # рассчитаем порядок документов согласно оценкам релевантности
        _, rank_order = torch.sort(y_true, descending=True, axis=0)
        rank_order += 1

        with torch.no_grad():
            # получаем все попарные разницы скоров в батче
            pos_pairs_score_diff = 1.0 + torch.exp((y_pred - y_pred.t()))

            # поставим разметку для пар, 1 если первый документ релевантнее
            # -1 если второй документ релевантнее
            Sij = self._compute_labels_in_batch(y_true)
            # посчитаем изменение gain из-за перестановок
            gain_diff = self._compute_gain_diff(y_true)

            # посчитаем изменение знаменателей-дискаунтеров
            decay_diff = (1.0 / torch.log2(rank_order + 1.0)) - \
                (1.0 / torch.log2(rank_order.t() + 1.0))
            # посчитаем непосредственное изменение nDCG
            delta_ndcg = torch.abs(N * gain_diff * decay_diff)
            # посчитаем лямбды
            lambda_update = (0.5 * (1 - Sij) - 1 /
                             pos_pairs_score_diff) * delta_ndcg
            lambda_update = torch.sum(lambda_update, dim=1, keepdim=True)

            return lambda_update

    def _compute_labels_in_batch(self, y_true):
        rel_diff = y_true - y_true.t()
        pos_pairs = (rel_diff > 0).type(torch.float32)
        neg_pairs = (rel_diff < 0).type(torch.float32)
        Sij = pos_pairs - neg_pairs
        return Sij

    def _compute_gain_diff(self, y_true):
        gain_diff = torch.pow(2.0, y_true) - torch.pow(2.0, y_true.t())
        return gain_diff

    def _ndcg_k(self, ys_true, ys_pred, ndcg_top_k) -> float:
        def dcg(ys_true, ys_pred):
            _, argsort = torch.sort(ys_pred, descending=True, dim=0)
            argsort = argsort[:ndcg_top_k]
            ys_true_sorted = ys_true[argsort]
            ret = 0
            for i, l in enumerate(ys_true_sorted, 1):
                ret += (2 ** l - 1) / math.log2(1 + i)
            return ret
        ideal_dcg = dcg(ys_true, ys_true)
        pred_dcg = dcg(ys_true, ys_pred)
        return (pred_dcg / ideal_dcg).item()

    def save_model(self, path: str):
        state = {
            'trees': self.trees,
            'trees_feat_idxs': self.trees_feat_idxs,
            'best_ndcg': self.best_ndcg,
            'lr': self.lr
        }
        f = open(path, 'wb')
        pickle.dump(state, f)

    def load_model(self, path: str):
        f = open(path, 'rb')
        state = pickle.load(f)
        self.trees = state['trees']
        self.trees_feat_idxs = state['trees_feat_idxs']
        self.best_ndcg = state['best_ndcg']
        self.lr = state['lr']

In [114]:
def objective(trial, X_test=X_test, y_test=y_test, query_ids_test=query_ids_test):
    param = {
        'lr': trial.suggest_float('lr', 1e-2, 7e-1),
        'subsample': trial.suggest_float('subsample', 5e-1, 75e-2),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 5e-1, 75e-2),
        'n_estimators': trial.suggest_categorical('n_estimators', list(range(100, 300))),
        'max_depth': trial.suggest_categorical('max_depth', list(range(3, 10))),
        'min_samples_leaf': trial.suggest_categorical('min_samples_leaf', list(range(5, 30))),
    }
    
    model = Solution(**param)
    model.fit()
    
    valid_preds = model.predict(X_test)
    ndcg = model._calc_data_ndcg(query_ids_test, y_test, valid_preds)
    return ndcg

In [115]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

[32m[I 2022-12-01 22:48:11,157][0m A new study created in memory with name: no-name-9296a0d0-b416-47e4-949a-f0d413323920[0m
[32m[I 2022-12-01 22:50:16,053][0m Trial 0 finished with value: 0.3968041932000024 and parameters: {'lr': 0.32066850216370657, 'subsample': 0.7191939783019947, 'colsample_bytree': 0.5788282663912702, 'n_estimators': 178, 'max_depth': 3, 'min_samples_leaf': 11}. Best is trial 0 with value: 0.3968041932000024.[0m
[32m[I 2022-12-01 22:54:02,885][0m Trial 1 finished with value: 0.33377637144430206 and parameters: {'lr': 0.3553411749148994, 'subsample': 0.6621899338488452, 'colsample_bytree': 0.7324755777520062, 'n_estimators': 278, 'max_depth': 8, 'min_samples_leaf': 23}. Best is trial 0 with value: 0.3968041932000024.[0m
[32m[I 2022-12-01 22:56:23,017][0m Trial 2 finished with value: 0.36798600296973966 and parameters: {'lr': 0.23320464557464765, 'subsample': 0.5139990073771055, 'colsample_bytree': 0.5372293543064008, 'n_estimators': 210, 'max_depth': 4, '

[32m[I 2022-12-02 00:08:24,891][0m Trial 26 finished with value: 0.38998624739189774 and parameters: {'lr': 0.45885949910141793, 'subsample': 0.6363380270617899, 'colsample_bytree': 0.556873273965026, 'n_estimators': 214, 'max_depth': 3, 'min_samples_leaf': 16}. Best is trial 0 with value: 0.3968041932000024.[0m
[32m[I 2022-12-02 00:10:22,247][0m Trial 27 finished with value: 0.37437749489555167 and parameters: {'lr': 0.35072614151620174, 'subsample': 0.6354132834768735, 'colsample_bytree': 0.5551453040270729, 'n_estimators': 121, 'max_depth': 9, 'min_samples_leaf': 16}. Best is trial 0 with value: 0.3968041932000024.[0m
[32m[I 2022-12-02 00:13:10,954][0m Trial 28 finished with value: 0.36229364655529256 and parameters: {'lr': 0.5855560790441392, 'subsample': 0.6084666066400632, 'colsample_bytree': 0.6218807471069333, 'n_estimators': 197, 'max_depth': 4, 'min_samples_leaf': 6}. Best is trial 0 with value: 0.3968041932000024.[0m
[32m[I 2022-12-02 00:16:44,543][0m Trial 29 fin

[32m[I 2022-12-02 07:50:49,052][0m Trial 52 finished with value: 0.34764510947123256 and parameters: {'lr': 0.49573129000235433, 'subsample': 0.7317848443814603, 'colsample_bytree': 0.5711309653915507, 'n_estimators': 142, 'max_depth': 3, 'min_samples_leaf': 14}. Best is trial 0 with value: 0.3968041932000024.[0m
[32m[I 2022-12-02 07:52:06,464][0m Trial 53 finished with value: 0.3397889449004722 and parameters: {'lr': 0.41718908151864936, 'subsample': 0.7235693991966778, 'colsample_bytree': 0.5533469945736422, 'n_estimators': 112, 'max_depth': 3, 'min_samples_leaf': 7}. Best is trial 0 with value: 0.3968041932000024.[0m
[32m[I 2022-12-02 07:53:26,163][0m Trial 54 finished with value: 0.35803398900730393 and parameters: {'lr': 0.3655661030263622, 'subsample': 0.747386817259006, 'colsample_bytree': 0.5867185286283099, 'n_estimators': 113, 'max_depth': 3, 'min_samples_leaf': 19}. Best is trial 0 with value: 0.3968041932000024.[0m
[32m[I 2022-12-02 07:55:40,335][0m Trial 55 fini

[32m[I 2022-12-02 08:47:07,357][0m Trial 78 finished with value: 0.34733805918586486 and parameters: {'lr': 0.5009848893842999, 'subsample': 0.5023519289096037, 'colsample_bytree': 0.5765253121251992, 'n_estimators': 164, 'max_depth': 8, 'min_samples_leaf': 13}. Best is trial 0 with value: 0.3968041932000024.[0m
[32m[I 2022-12-02 08:48:48,939][0m Trial 79 finished with value: 0.36467839926070006 and parameters: {'lr': 0.18560070680241703, 'subsample': 0.6266163852986824, 'colsample_bytree': 0.7184539778638545, 'n_estimators': 149, 'max_depth': 3, 'min_samples_leaf': 28}. Best is trial 0 with value: 0.3968041932000024.[0m
[32m[I 2022-12-02 08:52:03,491][0m Trial 80 finished with value: 0.3809790577873725 and parameters: {'lr': 0.4052672025150617, 'subsample': 0.5820320735197279, 'colsample_bytree': 0.7426337031557444, 'n_estimators': 296, 'max_depth': 3, 'min_samples_leaf': 24}. Best is trial 0 with value: 0.3968041932000024.[0m
[32m[I 2022-12-02 08:54:28,209][0m Trial 81 fin

[32m[I 2022-12-02 09:58:34,336][0m Trial 104 finished with value: 0.3579367682870012 and parameters: {'lr': 0.391298752897742, 'subsample': 0.6199462841164796, 'colsample_bytree': 0.5736519154174027, 'n_estimators': 141, 'max_depth': 3, 'min_samples_leaf': 18}. Best is trial 0 with value: 0.3968041932000024.[0m
[32m[I 2022-12-02 09:59:58,666][0m Trial 105 finished with value: 0.3377231456330811 and parameters: {'lr': 0.27398056976063606, 'subsample': 0.7281169530143239, 'colsample_bytree': 0.5561527484246128, 'n_estimators': 103, 'max_depth': 5, 'min_samples_leaf': 11}. Best is trial 0 with value: 0.3968041932000024.[0m
[32m[I 2022-12-02 10:01:37,705][0m Trial 106 finished with value: 0.3669615743949375 and parameters: {'lr': 0.45195112127620884, 'subsample': 0.6123496467906684, 'colsample_bytree': 0.5466175200372382, 'n_estimators': 132, 'max_depth': 3, 'min_samples_leaf': 26}. Best is trial 0 with value: 0.3968041932000024.[0m
[32m[I 2022-12-02 10:04:18,150][0m Trial 107 f

[32m[I 2022-12-02 11:11:05,026][0m Trial 130 finished with value: 0.38115242536939226 and parameters: {'lr': 0.5269837608905068, 'subsample': 0.6580495387135008, 'colsample_bytree': 0.6601884337408834, 'n_estimators': 261, 'max_depth': 3, 'min_samples_leaf': 15}. Best is trial 111 with value: 0.40230476581879465.[0m
[32m[I 2022-12-02 11:14:09,132][0m Trial 131 finished with value: 0.38646852311548036 and parameters: {'lr': 0.5439747157384796, 'subsample': 0.6628060461726663, 'colsample_bytree': 0.6425352374406426, 'n_estimators': 268, 'max_depth': 3, 'min_samples_leaf': 13}. Best is trial 111 with value: 0.40230476581879465.[0m
[32m[I 2022-12-02 11:16:26,085][0m Trial 132 finished with value: 0.3853887093449097 and parameters: {'lr': 0.4997800024273253, 'subsample': 0.6701898191663251, 'colsample_bytree': 0.6464779913090137, 'n_estimators': 198, 'max_depth': 3, 'min_samples_leaf': 15}. Best is trial 111 with value: 0.40230476581879465.[0m
[32m[I 2022-12-02 11:18:13,866][0m T

[32m[I 2022-12-02 12:18:35,547][0m Trial 156 finished with value: 0.381971523479078 and parameters: {'lr': 0.5226469494793189, 'subsample': 0.6309793153630332, 'colsample_bytree': 0.5186650834324664, 'n_estimators': 244, 'max_depth': 3, 'min_samples_leaf': 10}. Best is trial 111 with value: 0.40230476581879465.[0m
[32m[I 2022-12-02 12:21:02,497][0m Trial 157 finished with value: 0.3699212933714602 and parameters: {'lr': 0.32674034324071666, 'subsample': 0.6577694707506687, 'colsample_bytree': 0.5077838122485993, 'n_estimators': 207, 'max_depth': 3, 'min_samples_leaf': 12}. Best is trial 111 with value: 0.40230476581879465.[0m
[32m[I 2022-12-02 12:23:16,207][0m Trial 158 finished with value: 0.3397040564069444 and parameters: {'lr': 0.4363282350741084, 'subsample': 0.640180584487794, 'colsample_bytree': 0.5136260372792001, 'n_estimators': 175, 'max_depth': 6, 'min_samples_leaf': 14}. Best is trial 111 with value: 0.40230476581879465.[0m
[32m[I 2022-12-02 12:24:39,349][0m Tria

[32m[I 2022-12-02 13:14:38,918][0m Trial 182 finished with value: 0.370241001075809 and parameters: {'lr': 0.4818732321684118, 'subsample': 0.6761502272071541, 'colsample_bytree': 0.5417881461638091, 'n_estimators': 253, 'max_depth': 3, 'min_samples_leaf': 16}. Best is trial 111 with value: 0.40230476581879465.[0m
[32m[I 2022-12-02 13:17:47,283][0m Trial 183 finished with value: 0.3704082067447189 and parameters: {'lr': 0.5180519392078061, 'subsample': 0.6703501098097121, 'colsample_bytree': 0.5398750518437708, 'n_estimators': 288, 'max_depth': 3, 'min_samples_leaf': 16}. Best is trial 111 with value: 0.40230476581879465.[0m
[32m[I 2022-12-02 13:19:34,923][0m Trial 184 finished with value: 0.37038304790879306 and parameters: {'lr': 0.4913412347208978, 'subsample': 0.6689853489330039, 'colsample_bytree': 0.5574811842237661, 'n_estimators': 163, 'max_depth': 3, 'min_samples_leaf': 16}. Best is trial 111 with value: 0.40230476581879465.[0m
[32m[I 2022-12-02 13:22:37,551][0m Tri

Number of finished trials: 200
Best trial: {'lr': 0.5099655429753616, 'subsample': 0.6540233528557746, 'colsample_bytree': 0.6343128384876874, 'n_estimators': 260, 'max_depth': 3, 'min_samples_leaf': 12}


In [116]:
best_params = study.best_trial.params

In [117]:
slt = Solution(**best_params)
slt.fit()

In [118]:
valid_preds = slt.predict(X_test)
ndcg = slt._calc_data_ndcg(query_ids_test, y_test, valid_preds)
ndcg

0.40230476581879465

In [119]:
slt.save_model('stm.pickle')