In [2]:
import torch
import numpy as np
from metrics import compute_ideal_dcg, ndcg

In [3]:
# в y_true лежат оценки релевантности
y_true = torch.LongTensor([[5, 3, 2, 5, 1, 1]]).reshape(-1, 1)
y_pred = torch.FloatTensor([3.2, 0.4, -0.1, -2.1, 0.5, 0.01]).reshape(-1, 1)

In [4]:
y_pred

tensor([[ 3.2000],
        [ 0.4000],
        [-0.1000],
        [-2.1000],
        [ 0.5000],
        [ 0.0100]])

In [5]:
%%latex
$$\lambda = \left(0.5 * (1 - S_{ij}) - \frac {1} {1 + e^{s_i - s_j}}\right) |\Delta nDCG|$$

<IPython.core.display.Latex object>

In [6]:
%%latex
$$\Delta nDCG = \frac {1} {IdealDCG} (2^i - 2^j) \left(\frac {1} {log_2(1+i)} - \frac {1} {log_2(1+j)}\right)$$

<IPython.core.display.Latex object>

In [26]:
def compute_lambdas(y_true, y_pred, ndcg_scheme='exp2'):
    # рассчитаем нормировку, IdealDCG
    ideal_dcg = compute_ideal_dcg(y_true, ndcg_scheme=ndcg_scheme)
    N = 1 / ideal_dcg
    
    # рассчитаем порядок документов согласно оценкам релевантности
    _, rank_order = torch.sort(y_true, descending=True, axis=0)
    rank_order += 1
    
    with torch.no_grad():
        # получаем все попарные разницы скоров в батче
        pos_pairs_score_diff = 1.0 + torch.exp((y_pred - y_pred.t()))
        
        # поставим разметку для пар, 1 если первый документ релевантнее
        # -1 если второй документ релевантнее
        Sij = compute_labels_in_batch(y_true)
        # посчитаем изменение gain из-за перестановок
        gain_diff = compute_gain_diff(y_true, ndcg_scheme)
        
        # посчитаем изменение знаменателей-дискаунтеров
        decay_diff = (1.0 / torch.log2(rank_order + 1.0)) - (1.0 / torch.log2(rank_order.t() + 1.0))
        # посчитаем непосредственное изменение nDCG
        delta_ndcg = torch.abs(N * gain_diff * decay_diff)
        # посчитаем лямбды
        lambda_update =  (0.5 * (1 - Sij) - 1 / pos_pairs_score_diff) * delta_ndcg
        lambda_update = torch.sum(lambda_update, dim=1, keepdim=True)
        
        return Sij, gain_diff, decay_diff, delta_ndcg, lambda_update
    
    
def compute_labels_in_batch(y_true):
    # разница релевантностей каждого с каждым объектом
    rel_diff = y_true - y_true.t()
    
    # 1 в этой матрице - объект более релевантен
    pos_pairs = (rel_diff > 0).type(torch.float32)
    
    # 1 тут - объект менее релевантен
    neg_pairs = (rel_diff < 0).type(torch.float32)
    Sij = pos_pairs - neg_pairs
    return Sij

def compute_gain_diff(y_true, gain_scheme):
    if gain_scheme == "exp2":
        gain_diff = torch.pow(2.0, y_true) - torch.pow(2.0, y_true.t())
    elif gain_scheme == "diff":
        gain_diff = y_true - y_true.t()
    else:
        raise ValueError(f"{gain_scheme} method not supported")
    return gain_diff

In [46]:
y_pred - y_pred.t()

tensor([[ 0.0000,  2.8000,  3.3000,  5.3000,  2.7000,  3.1900],
        [-2.8000,  0.0000,  0.5000,  2.5000, -0.1000,  0.3900],
        [-3.3000, -0.5000,  0.0000,  2.0000, -0.6000, -0.1100],
        [-5.3000, -2.5000, -2.0000,  0.0000, -2.6000, -2.1100],
        [-2.7000,  0.1000,  0.6000,  2.6000,  0.0000,  0.4900],
        [-3.1900, -0.3900,  0.1100,  2.1100, -0.4900,  0.0000]])

In [47]:
y_true - y_true.t()

tensor([[ 0,  2,  3,  0,  4,  4],
        [-2,  0,  1, -2,  2,  2],
        [-3, -1,  0, -3,  1,  1],
        [ 0,  2,  3,  0,  4,  4],
        [-4, -2, -1, -4,  0,  0],
        [-4, -2, -1, -4,  0,  0]])

In [18]:
# в y_true лежат оценки релевантности
y_true = torch.LongTensor([[5, 3, 2, 5, 1, 1]]).reshape(-1, 1)
y_pred = torch.FloatTensor([3.2, 0.4, -0.1, -2.1, 0.5, 0.01]).reshape(-1, 1)

In [27]:
Sij, gain_diff, decay_diff, delta_ndcg, lambda_update = compute_lambdas(y_true, y_pred)

In [29]:
y_true

tensor([[5],
        [3],
        [2],
        [5],
        [1],
        [1]])

In [25]:
# N = 1 / (ideal_dcg + 1)
lambda_update

tensor([[-0.0297],
        [ 0.0164],
        [ 0.0318],
        [-0.1138],
        [ 0.0461],
        [ 0.0491]])

In [28]:
# N = 1 / ideal_dcg
lambda_update

tensor([[-0.0300],
        [ 0.0166],
        [ 0.0322],
        [-0.1149],
        [ 0.0466],
        [ 0.0496]])

In [49]:
Sij

tensor([[ 0.,  1.,  1.,  0.,  1.,  1.],
        [-1.,  0.,  1., -1.,  1.,  1.],
        [-1., -1.,  0., -1.,  1.,  1.],
        [ 0.,  1.,  1.,  0.,  1.,  1.],
        [-1., -1., -1., -1.,  0.,  0.],
        [-1., -1., -1., -1.,  0.,  0.]])

In [50]:
gain_diff

tensor([[  0.,  24.,  28.,   0.,  30.,  30.],
        [-24.,   0.,   4., -24.,   6.,   6.],
        [-28.,  -4.,   0., -28.,   2.,   2.],
        [  0.,  24.,  28.,   0.,  30.,  30.],
        [-30.,  -6.,  -2., -30.,   0.,   0.],
        [-30.,  -6.,  -2., -30.,   0.,   0.]])

In [51]:
# пример вычисления элемента gain diff для первого (релевантность 5) и последнего документа (1); 
# для первого (5) и второго (3) документа
(2**5 - 1) - (2**1 -1), (2**5 - 1) - (2**3-1)

(30, 24)

In [52]:
decay_diff

tensor([[ 0.0000,  0.5693,  0.3691,  0.5000,  0.6131,  0.6438],
        [-0.5693,  0.0000, -0.2003, -0.0693,  0.0438,  0.0745],
        [-0.3691,  0.2003,  0.0000,  0.1309,  0.2441,  0.2747],
        [-0.5000,  0.0693, -0.1309,  0.0000,  0.1131,  0.1438],
        [-0.6131, -0.0438, -0.2441, -0.1131,  0.0000,  0.0306],
        [-0.6438, -0.0745, -0.2747, -0.1438, -0.0306,  0.0000]])

In [53]:
# посчитаем изменение знаменателей-дискаунтеров для первого и последнего документа
(1 / np.log2(1+1)) - (1 / np.log2(1+6))

0.6437928128919779

In [54]:
delta_ndcg

tensor([[0.0000, 0.1334, 0.1009, 0.0000, 0.1796, 0.1885],
        [0.1334, 0.0000, 0.0078, 0.0162, 0.0026, 0.0044],
        [0.1009, 0.0078, 0.0000, 0.0358, 0.0048, 0.0054],
        [0.0000, 0.0162, 0.0358, 0.0000, 0.0331, 0.0421],
        [0.1796, 0.0026, 0.0048, 0.0331, 0.0000, 0.0000],
        [0.1885, 0.0044, 0.0054, 0.0421, 0.0000, 0.0000]])

In [44]:
lambda_update

tensor([[-0.0300],
        [ 0.0166],
        [ 0.0322],
        [-0.1149],
        [ 0.0466],
        [ 0.0496]])

In [21]:
lambda_update

tensor([[-0.0297],
        [ 0.0164],
        [ 0.0318],
        [-0.1138],
        [ 0.0461],
        [ 0.0491]])

In [56]:
for _ in range(100):
    _, _, _, _, lambda_update = compute_lambdas(y_true, y_pred)
    y_pred -= lambda_update

In [57]:
rank_indexes = torch.argsort(y_pred, dim=0, descending=True)

In [58]:
y_pred

tensor([[ 4.1108],
        [-0.1161],
        [-0.9574],
        [ 1.5135],
        [-1.1228],
        [-1.5180]])

In [20]:
y_true[rank_indexes]

tensor([[[5]],

        [[5]],

        [[3]],

        [[2]],

        [[1]],

        [[1]]])

In [21]:
# полностью правильное ранжирование
torch.sort(y_true, dim=0, descending=True)[0]

tensor([[5],
        [5],
        [3],
        [2],
        [1],
        [1]])

In [22]:
y_true = torch.LongTensor([[5,3,2,5,1,1]]).reshape(-1,1)

# совсем плохие предсказанные скоры в начале
y_pred = torch.FloatTensor([-3.0, 2.0, 3.0, -4.0, 6.0, 8.5]).reshape(-1,1)

In [23]:
ndcg(y_true, y_pred)

tensor([[1.]])

In [24]:
for _ in range(10):
    _, _, _, _, lambda_update = compute_lambdas(y_true, y_pred)
    y_pred -= lambda_update

In [25]:
ndcg(y_true, y_pred)

tensor([[1.]])

In [26]:
# полностью правильное ранжирование при увеличении количества итераций
y_pred

tensor([[ 2.4742],
        [ 0.9774],
        [ 1.8288],
        [-2.7303],
        [ 3.8481],
        [ 6.1018]])