In [58]:
# https://www.microsoft.com/en-us/research/uploads/prod/2016/02/MSR-TR-2010-82.pdf

In [1]:
import torch
import numpy as np
from utils import ndcg, compute_ideal_dcg

In [31]:
# в y_true лежат оценки релевантности
y_true = torch.LongTensor([[5, 3, 2, 5, 1, 1]]).reshape(-1, 1)
y_pred = torch.FloatTensor([3.2, 0.4, -0.1, -2.1, 0.5, 0.01]).reshape(-1, 1)

In [32]:
y_pred

tensor([[ 3.2000],
        [ 0.4000],
        [-0.1000],
        [-2.1000],
        [ 0.5000],
        [ 0.0100]])

$$\lambda = \left(0.5 * (1 - S_{ij}) - \frac {1} {1 + e^{s_i - s_j}}\right) |\Delta nDCG|$$

$$\Delta nDCG = \frac {1} {IdealDCG} (2^i - 2^j) \left(\frac {1} {log_2(1+i)} - \frac {1} {log_2(1+j)}\right)$$

In [35]:
def compute_lambdas(y_true, y_pred, gain_scheme='exp2'):
    # рассчитаем нормировку, IdealDCG
    ideal_dcg = compute_ideal_dcg(y_true, gain_scheme=gain_scheme)
    N = 1 / ideal_dcg
    
    # рассчитаем порядок документов согласно оценкам релевантности
    _, rank_order = torch.sort(y_true, descending=True, axis=0)
    rank_order += 1
    
    with torch.no_grad():
        # получаем все попарные разницы скоров в батче
        pos_pairs_score_diff = 1.0 + torch.exp((y_pred - y_pred.t()))
        
        # поставим разметку для пар, 1 если первый документ релевантнее
        # -1 если второй документ релевантнее
        Sij = compute_labels_in_batch(y_true)
        # посчитаем изменение gain из-за перестановок
        gain_diff = compute_gain_diff(y_true, gain_scheme)
        
        # посчитаем изменение знаменателей-дискаунтеров
        decay_diff = (1.0 / torch.log2(rank_order + 1.0)) - (1.0 / torch.log2(rank_order.t() + 1.0))
        # посчитаем непосредственное изменение nDCG
        delta_ndcg = torch.abs(N * gain_diff * decay_diff)
        # посчитаем лямбды
        lambda_update =  (0.5 * (1 - Sij) - 1 / pos_pairs_score_diff) * delta_ndcg
        lambda_update = torch.sum(lambda_update, dim=1, keepdim=True)
        
        return Sij, gain_diff, decay_diff, delta_ndcg, lambda_update
    
    
def compute_labels_in_batch(y_true):
    
    # разница релевантностей каждого с каждым объектом
    rel_diff = y_true - y_true.t()
    
    # 1 в этой матрице - объект более релевантен
    pos_pairs = (rel_diff > 0).type(torch.float32)
    
    # 1 тут - объект менее релевантен
    neg_pairs = (rel_diff < 0).type(torch.float32)
    Sij = pos_pairs - neg_pairs
    return Sij

def compute_gain_diff(y_true, gain_scheme):
    if gain_scheme == "exp2":
        gain_diff = torch.pow(2.0, y_true) - torch.pow(2.0, y_true.t())
    elif gain_scheme == "const":
        gain_diff = y_true - y_true.t()
    else:
        raise ValueError(f"{gain_scheme} method not supported")
    return gain_diff

In [36]:
# вручную

# считаем идеальный DCG
ideal_dcg = compute_ideal_dcg(y_true, gain_scheme='exp2')
print(f'Ideal DCG:{ideal_dcg}', end='\n\n')

correct_ranks = torch.sort(y_true, descending=True, axis=0)[1]

print('True and predicted:', y_true, y_pred, sep='\n', end='\n\n')
elem = 0

print(f'Correct ranking indexes:\n{correct_ranks}')

s_ij = (y_true < y_true[elem]).type(torch.float32)
print(f'S_ij for elem={elem}:', s_ij, sep='\n', end='\n\n')

s_i_j = y_pred[elem] - y_pred
print(f'Diff preds:\n{s_i_j}', end='\n\n')

gain_diff = torch.pow(2.0, y_true[elem]) - torch.pow(2.0, y_true)
print(f'Diff in gain:\n{gain_diff}', end='\n\n')

decay_diff = (1.0 / torch.log2(correct_ranks[elem]+2)) - (1.0 / torch.log2(correct_ranks+2))
print(f'Decay diff:\n{decay_diff}', end='\n\n')

lambda_i = (0.5 * (1 - s_ij) - 1/(1+torch.exp(s_i_j))) * (1/ideal_dcg) * gain_diff * decay_diff
print(f'Lambda_i for each j:\n{lambda_i}', end='\n\n')

lambda_i = torch.sum(lambda_i)
print(f'Lambda_i:\n{lambda_i}', end='\n\n')

Ideal DCG:56.09391403198242

True and predicted:
tensor([[5],
        [3],
        [2],
        [5],
        [1],
        [1]])
tensor([[ 3.2000],
        [ 0.4000],
        [-0.1000],
        [-2.1000],
        [ 0.5000],
        [ 0.0100]])

Correct ranking indexes:
tensor([[0],
        [3],
        [1],
        [2],
        [4],
        [5]])
S_ij for elem=0:
tensor([[0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.]])

Diff preds:
tensor([[0.0000],
        [2.8000],
        [3.3000],
        [5.3000],
        [2.7000],
        [3.1900]])

Diff in gain:
tensor([[ 0.],
        [24.],
        [28.],
        [ 0.],
        [30.],
        [30.]])

Decay diff:
tensor([[0.0000],
        [0.5693],
        [0.3691],
        [0.5000],
        [0.6131],
        [0.6438]])

Lambda_i for each j:
tensor([[ 0.0000],
        [-0.0140],
        [-0.0066],
        [ 0.0000],
        [-0.0207],
        [-0.0136]])

Lambda_i:
-0.05478230118751526



In [37]:
y_pred - y_pred.t()

tensor([[ 0.0000,  2.8000,  3.3000,  5.3000,  2.7000,  3.1900],
        [-2.8000,  0.0000,  0.5000,  2.5000, -0.1000,  0.3900],
        [-3.3000, -0.5000,  0.0000,  2.0000, -0.6000, -0.1100],
        [-5.3000, -2.5000, -2.0000,  0.0000, -2.6000, -2.1100],
        [-2.7000,  0.1000,  0.6000,  2.6000,  0.0000,  0.4900],
        [-3.1900, -0.3900,  0.1100,  2.1100, -0.4900,  0.0000]])

In [38]:
y_true - y_true.t()

tensor([[ 0,  2,  3,  0,  4,  4],
        [-2,  0,  1, -2,  2,  2],
        [-3, -1,  0, -3,  1,  1],
        [ 0,  2,  3,  0,  4,  4],
        [-4, -2, -1, -4,  0,  0],
        [-4, -2, -1, -4,  0,  0]])

In [39]:
Sij, gain_diff, decay_diff, delta_ndcg, lambda_update = compute_lambdas(y_true, y_pred)

In [40]:
lambda_update

tensor([[-0.0548],
        [ 0.0303],
        [ 0.0587],
        [-0.2099],
        [ 0.0851],
        [ 0.0906]])

In [41]:
Sij

tensor([[ 0.,  1.,  1.,  0.,  1.,  1.],
        [-1.,  0.,  1., -1.,  1.,  1.],
        [-1., -1.,  0., -1.,  1.,  1.],
        [ 0.,  1.,  1.,  0.,  1.,  1.],
        [-1., -1., -1., -1.,  0.,  0.],
        [-1., -1., -1., -1.,  0.,  0.]])

In [42]:
gain_diff

tensor([[  0.,  24.,  28.,   0.,  30.,  30.],
        [-24.,   0.,   4., -24.,   6.,   6.],
        [-28.,  -4.,   0., -28.,   2.,   2.],
        [  0.,  24.,  28.,   0.,  30.,  30.],
        [-30.,  -6.,  -2., -30.,   0.,   0.],
        [-30.,  -6.,  -2., -30.,   0.,   0.]])

In [43]:
# пример вычисления элемента gain diff для первого (релевантность 5) и последнего документа (1); 
# для первого (5) и второго (3) документа
(2**5 - 1) - (2**1 -1), (2**5 - 1) - (2**3-1)

(30, 24)

In [44]:
decay_diff

tensor([[ 0.0000,  0.5693,  0.3691,  0.5000,  0.6131,  0.6438],
        [-0.5693,  0.0000, -0.2003, -0.0693,  0.0438,  0.0745],
        [-0.3691,  0.2003,  0.0000,  0.1309,  0.2441,  0.2747],
        [-0.5000,  0.0693, -0.1309,  0.0000,  0.1131,  0.1438],
        [-0.6131, -0.0438, -0.2441, -0.1131,  0.0000,  0.0306],
        [-0.6438, -0.0745, -0.2747, -0.1438, -0.0306,  0.0000]])

In [45]:
# посчитаем изменение знаменателей-дискаунтеров для первого и последнего документа
(1 / np.log2(1+1)) - (1 / np.log2(1+6))

0.6437928128919779

In [46]:
delta_ndcg

tensor([[0.0000, 0.2436, 0.1842, 0.0000, 0.3279, 0.3443],
        [0.2436, 0.0000, 0.0143, 0.0297, 0.0047, 0.0080],
        [0.1842, 0.0143, 0.0000, 0.0654, 0.0087, 0.0098],
        [0.0000, 0.0297, 0.0654, 0.0000, 0.0605, 0.0769],
        [0.3279, 0.0047, 0.0087, 0.0605, 0.0000, 0.0000],
        [0.3443, 0.0080, 0.0098, 0.0769, 0.0000, 0.0000]])

In [47]:
lambda_update

tensor([[-0.0548],
        [ 0.0303],
        [ 0.0587],
        [-0.2099],
        [ 0.0851],
        [ 0.0906]])

In [48]:
for _ in range(100):
    _, _, _, _, lambda_update = compute_lambdas(y_true, y_pred)
    y_pred -= lambda_update

In [49]:
rank_indexes = torch.argsort(y_pred, dim=0, descending=True)

In [50]:
y_pred

tensor([[ 4.3905],
        [-0.1219],
        [-1.0463],
        [ 2.0759],
        [-1.5135],
        [-1.8746]])

In [51]:
y_true[rank_indexes]

tensor([[[5]],

        [[5]],

        [[3]],

        [[2]],

        [[1]],

        [[1]]])

In [52]:
# полностью правильное ранжирование
torch.sort(y_true, dim=0, descending=True)[0]

tensor([[5],
        [5],
        [3],
        [2],
        [1],
        [1]])

In [53]:
y_true = torch.LongTensor([[5,3,2,5,1,1]]).reshape(-1,1)

# совсем плохие предсказанные скоры в начале
y_pred = torch.FloatTensor([-3.0, 2.0, 3.0, -4.0, 6.0, 8.5]).reshape(-1,1)

In [54]:
ndcg(y_true, y_pred)

0.6784099365897729

In [55]:
for _ in range(100):
    _, _, _, _, lambda_update = compute_lambdas(y_true, y_pred)
    y_pred -= lambda_update

In [56]:
ndcg(y_true, y_pred)

1.0

In [57]:
# полностью правильное ранжирование при увеличении количества итераций
y_pred

tensor([[6.2690],
        [1.2374],
        [0.5772],
        [3.6842],
        [0.3482],
        [0.3840]])