In [None]:
!pip install catboost

Collecting catboost
  Downloading catboost-0.26.1-cp37-none-manylinux1_x86_64.whl (67.4 MB)
[K     |████████████████████████████████| 67.4 MB 88 kB/s 
Installing collected packages: catboost
Successfully installed catboost-0.26.1


In [None]:
import math

import numpy as np
import torch
from catboost.datasets import msrank_10k
from sklearn.preprocessing import StandardScaler

from typing import List

In [None]:
class ListNet(torch.nn.Module):
    def __init__(self, num_input_features, hidden_dim=10):
        super().__init__()
        self.hidden_dim = hidden_dim

    def weights_init_uniform(self):
        classname = self.__class__.__name__
        # for every Linear layer in a model..
        if classname.find('Linear') != -1:
            # apply a uniform distribution to the weights and a bias=0
            self.weight.data.uniform_(0.0, 1.0)
            self.bias.data.fill_(0)

    # takes in a module and applies the specified weight initialization
    def weights_init_uniform_rule(self):
        classname = self.__class__.__name__
        # for every Linear layer in a model..
        if classname.find('Linear') != -1:
            # get the number of the inputs
            n = self.in_features
            y = 1.0/np.sqrt(n)
            self.weight.data.uniform_(-y, y)
            self.bias.data.fill_(0)

    def weights_init_normal(self):
        '''Takes in a module and initializes all linear layers with weight
           values taken from a normal distribution.'''

        classname = self.__class__.__name__
        # for every Linear layer in a model
        if classname.find('Linear') != -1:
            y = self.in_features
        # m.weight.data shoud be taken from a normal distribution
            self.weight.data.normal_(0.0,1/np.sqrt(y))
        # m.bias.data should be 0
            self.bias.data.fill_(0)

    self.model = torch.nn.Sequential(
            torch.nn.Linear(num_input_features, self.hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Linear(self.hidden_dim, 1),
    )
    #self.model.apply(weights_init_normal)
    #применить инициализацию весов
  

    def forward(self, input_1: torch.Tensor) -> torch.Tensor:
        logits = self.model(input_1)
        return logits

In [None]:
class Solution:
    
  def __init__(self, n_epochs: int = 6, listnet_hidden_dim: int = 30,
               lr: float = 0.001, ndcg_top_k: int = 10):
    self._prepare_data()
    self.num_input_features = self.X_train.shape[1]
    self.ndcg_top_k = ndcg_top_k
    self.n_epochs = n_epochs

    self.model = self._create_model(
        self.num_input_features, listnet_hidden_dim)
    self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)

  def _get_data(self) -> List[np.ndarray]:

    train_df, test_df = msrank_10k()
    
    X_train = train_df.drop([0, 1], axis=1).values
    y_train = train_df[0].values
    query_ids_train = train_df[1].values.astype(int)

    X_test = test_df.drop([0, 1], axis=1).values
    y_test = test_df[0].values
    query_ids_test = test_df[1].values.astype(int)

    return [X_train, y_train, query_ids_train, X_test, y_test, query_ids_test]

  def _prepare_data(self) -> None:
    X_train, y_train, self.query_ids_train, X_test, y_test, self.query_ids_test = self._get_data()

    self.X_train = torch.FloatTensor(
        self._scale_features_in_query_groups(X_train,self.query_ids_train)
    )
    self.ys_train = torch.FloatTensor(y_train)

    self.X_test = torch.FloatTensor(
        self._scale_features_in_query_groups(X_test,self.query_ids_test)
    )
    self.ys_test = torch.FloatTensor(y_test)

  def _scale_features_in_query_groups(self, inp_feat_array: np.ndarray,inp_query_ids: np.ndarray) -> np.ndarray:
    for ids in inp_query_ids:
      indices = (inp_query_ids==ids).nonzero()[0].ravel()
      inp_feat_array[indices] = StandardScaler().fit_transform(inp_feat_array[indices])
        
    return inp_feat_array
      
  def _create_model(self, listnet_num_input_features: int,
                    listnet_hidden_dim: int) -> torch.nn.Module:
    torch.manual_seed(0)
    net = ListNet(num_input_features=listnet_num_input_features,
                  hidden_dim=listnet_hidden_dim)
    
    return net
      
  def fit(self) -> List[float]:
    scores = []
    for epoch in range(self.n_epochs):
      self._train_one_epoch()
      score = self._eval_test_set()
      scores.append(score)
    return scores

  def _calc_loss(self, batch_ys: torch.FloatTensor,
                 batch_pred: torch.FloatTensor) -> torch.FloatTensor:

    def listnet_kl_loss(y_i, z_i):
      """
      y_i: (n_i, 1) GT
      z_i: (n_i, 1) preds
      """                     
      P_y_i = torch.softmax(y_i, dim=0)
      P_z_i = torch.softmax(z_i, dim=0)
      return -torch.sum(P_y_i * torch.log(P_z_i/P_y_i))
        
    return listnet_kl_loss(batch_ys, batch_pred)

  def _train_one_epoch(self) -> None:
        self.model.train()

        for ids in np.unique(self.query_ids_train):

          indices = (self.query_ids_train == ids).nonzero()[0].ravel()
          batch_X = self.X_train[indices]
          batch_ys = self.ys_train[indices]      
          self.optimizer.zero_grad()
          if len(batch_X) > 0:
              batch_pred = self.model(batch_X)
              batch_pred = batch_pred.reshape(batch_pred.shape[0] * batch_pred.shape[1])
              self.batch_loss = self._calc_loss(batch_ys, batch_pred)
              self.batch_loss.backward(retain_graph=True)
              self.optimizer.step()


  def _eval_test_set(self) -> float:
        with torch.no_grad():
            self.model.eval()
            ndcgs = []
            for ids in np.unique(self.query_ids_test):
              indices = (self.query_ids_test == ids).nonzero()[0].ravel()
              batch_X = self.X_test[indices]
              batch_ys = self.ys_test[indices]
              valid_pred = self.model(batch_X)
              try:
                ndcg_score = self._ndcg_k(batch_ys, valid_pred, self.ndcg_top_k)
              except:
                ndcg_score = 0.0
              ndcgs.append(ndcg_score)
              
            return np.mean(ndcgs)

  def _ndcg_k(self, ys_true: torch.Tensor, ys_pred: torch.Tensor,
                ndcg_top_k: int) -> float:

      def compute_gain(y_value: float, gain_scheme: str) -> float:
        if gain_scheme == 'const':
          return float(y_value)
        if gain_scheme == 'exp2':
          return 2 ** float(y_value) - 1

      def dcg_k(ys_true: torch.Tensor, ys_pred: torch.Tensor, gain_scheme: str, ndcg_top_k) -> float:
        sorted_preds, indices = ys_pred.sort(descending=True, dim=0)
        sorted_true = ys_true[indices]

        current_gain = 0.0
        for i in range(ndcg_top_k):
            current_gain = current_gain + compute_gain(float(sorted_true[i]), gain_scheme) / math.log2(i + 2)

        return current_gain

      def ndcg_k(ys_true: torch.Tensor, ys_pred: torch.Tensor, ndcg_top_k, gain_scheme: str = 'exp2') -> float:
          calculated_dcg = dcg_k(ys_true, ys_pred, gain_scheme, ndcg_top_k)
          sorted_true, _ = ys_true.sort(descending=True, dim=0)

          current_best_gain = 0.0
          for i in range(ndcg_top_k):
              current_best_gain = current_best_gain + compute_gain(float(sorted_true[i]), gain_scheme) / math.log2(i + 2)

          return calculated_dcg / current_best_gain

      return ndcg_k(ys_true, ys_pred, ndcg_top_k)

In [None]:
s = Solution()

In [None]:
s.fit()

[0.4181002571962249,
 0.42562789990111294,
 0.4274442735655671,
 0.43125976846512676,
 0.42806892845563294,
 0.4315980496560358]

In [None]:
s.fit()

[0.4009657486203707,
 0.4222865833483446,
 0.423861283114252,
 0.4231192179988211,
 0.4257273528713354,
 0.4281771176780357]

In [None]:
s.fit()

[0.38129495256327245,
 0.39070755353332404,
 0.39646020027496437,
 0.39569807760592857,
 0.40528041657807473,
 0.4044198775719709]

In [None]:
s.model(x_)

Learning prepare Data

In [None]:
train_df, test_df = msrank_10k()

X_train = train_df.drop([0, 1], axis=1).values
y_train = train_df[0].values
query_ids_train = train_df[1].values.astype(int)

X_test = test_df.drop([0, 1], axis=1).values
y_test = test_df[0].values
query_ids_test = test_df[1].values.astype(int)

In [None]:
type(query_ids_train)

numpy.ndarray

In [None]:
query_ids_train

array([   1,    1,    1, ..., 1291, 1291, 1291])

In [None]:
i_temp = (query_ids_train==1291).nonzero()[0].ravel()

In [None]:
t = np.array([1, 2, 2, 3, 4, 3])
print(t)

[1 2 2 3 4 3]


In [None]:
(t == 3).nonzero()

(array([3, 5]),)

In [None]:
(t == 3).nonzero()[0].ravel()

array([3, 5])

In [None]:
i_temp

array([9876, 9877, 9878, 9879, 9880, 9881, 9882, 9883, 9884, 9885, 9886,
       9887, 9888, 9889, 9890, 9891, 9892, 9893, 9894, 9895, 9896, 9897,
       9898, 9899, 9900, 9901, 9902, 9903, 9904, 9905, 9906, 9907, 9908,
       9909, 9910, 9911, 9912, 9913, 9914, 9915, 9916, 9917, 9918, 9919,
       9920, 9921, 9922, 9923, 9924, 9925, 9926, 9927, 9928, 9929, 9930,
       9931, 9932, 9933, 9934, 9935, 9936, 9937, 9938, 9939, 9940, 9941,
       9942, 9943, 9944, 9945, 9946, 9947, 9948, 9949, 9950, 9951, 9952,
       9953, 9954, 9955, 9956, 9957, 9958, 9959, 9960, 9961, 9962, 9963,
       9964, 9965, 9966, 9967, 9968, 9969, 9970, 9971, 9972, 9973, 9974,
       9975, 9976, 9977, 9978, 9979, 9980, 9981, 9982, 9983, 9984, 9985,
       9986, 9987, 9988, 9989, 9990, 9991, 9992, 9993, 9994, 9995, 9996,
       9997, 9998, 9999])

In [None]:
X_train[i_temp]

array([[2., 0., 1., ..., 0., 0., 0.],
       [2., 0., 1., ..., 0., 0., 0.],
       [2., 0., 1., ..., 0., 0., 0.],
       ...,
       [2., 0., 2., ..., 0., 0., 0.],
       [2., 0., 1., ..., 0., 0., 0.],
       [2., 1., 1., ..., 0., 0., 0.]])

In [None]:
StandardScaler().fit_transform(X_train[i_temp])

array([[ 0.47864437, -0.41208169, -0.06750527, ...,  0.        ,
        -0.15519424, -0.37209505],
       [ 0.47864437, -0.41208169, -0.06750527, ...,  0.        ,
        -0.15519424, -0.37209505],
       [ 0.47864437, -0.41208169, -0.06750527, ...,  0.        ,
        -0.15519424, -0.37209505],
       ...,
       [ 0.47864437, -0.41208169,  1.32760372, ...,  0.        ,
        -0.15519424, -0.37209505],
       [ 0.47864437, -0.41208169, -0.06750527, ...,  0.        ,
        -0.15519424, -0.37209505],
       [ 0.47864437,  1.71700705, -0.06750527, ...,  0.        ,
        -0.15519424, -0.37209505]])