In [46]:
import pandas as pd
import numpy as np
from random import randint, seed

In [47]:
random_state = 42
seed(random_state)
#id list
recommended_list = [143, 156, 1134, 991, 27, 1543, 3345, 533, 11, 43]

# generate prices
recommended_prices = [randint(10, 10000) for _ in range(len(recommended_list))]

#bought id list
bought_list = [521, 32, 143, 991]

# generate prices for bought items
bought_prices = []

for item in bought_list:
    try:
        index = recommended_list.index(item)
        bought_prices.append(recommended_prices[index])
    except ValueError:
        bought_prices.append(randint(10, 10000))

In [48]:
recommended_prices

[1834, 419, 4516, 4022, 3667, 2296, 1689, 8945, 1434, 9684]

In [49]:
bought_prices

[6922, 530, 1834, 4022]

## Задание 1. Реализовать метрики Recall@k и  Money Recall@k

*Recall* - доля рекомендованных товаров среди релевантных = Какой % купленных товаров был среди рекомендованных

$$\Large Recall@K(i) = \frac {\sum_{j=1}^{K}\mathbb{1}_{r_{ij}}}{|Rel_i|}$$

$\Large |Rel_i|$ -- количество релевантных товаров для пользователя $i$

$$\Large MoneyRecall@K(i) = \frac {\sum_{j=1}^{K}\mathbb{1}_{r_{ij}}\cdot Price(j)}{\sum_{s\in Rel_i}Price(s)}$$

In [74]:
def indicate_at_k(recommended_list: list, bought_list: list, k=-1):
    recommended_list = np.asarray(recommended_list) if k == -1 else np.asarray(recommended_list)[:k]
    bought_list = np.asarray(bought_list)
    
    return np.isin(recommended_list, bought_list)
    

In [75]:
def recall_at_k(recommended_list: list, bought_list: list, k =-1):
    if len(bought_list) == 0:
        result = 0
    else:
        indication = indicate_at_k(recommended_list, bought_list, k=k)
        result = indication.sum() / len(bought_list)
    return result

In [76]:
def money_recall_at_k(recommended_list: list, bought_list: list, recommended_prices: list, bought_prices: list, k=-1):
    if len(bought_list) == 0:
        result = 0
    else:
        rec_prices = np.asarray(recommended_prices) if k == -1 else np.asarray(recommended_prices)[:k]
        buy_prices = np.asarray(bought_prices)
        indication = indicate_at_k(recommended_list, bought_list, k=k)
        
        result = np.sum(indication * rec_prices) / buy_prices.sum()
        
    return result
        

Examles

In [77]:
# indication
result = indicate_at_k(recommended_list, bought_list)
result

array([ True, False, False,  True, False, False, False, False, False,
       False])

In [78]:
# indicate at 5
result_5 = indicate_at_k(recommended_list, bought_list, k = 5)
result_5

array([ True, False, False,  True, False])

In [81]:
#recall
recall_result = recall_at_k(recommended_list, bought_list)
recall_result # indication == 2 ; numer of items = 4 => recall = 2 / 4 = 0.5

0.5

In [86]:
#recall at 3
recall_at_3 = recall_at_k(recommended_list, bought_list, k=3)
recall_at_3 # indication = 1 => recall = 1/4 = 0.25

0.25

In [88]:
# money recall
money_result = money_recall_at_k(recommended_list, bought_list, recommended_prices=recommended_prices, bought_prices=bought_prices)
money_result

0.4400360685302074

In [89]:
# money_recall at 3
m_recall_at_3 = money_recall_at_k(recommended_list, bought_list, recommended_prices=recommended_prices, bought_prices=bought_prices, k=3)
m_recall_at_3

0.13781184250075143

## Задание 2. Реализовать метрику MRR@k

Mean Reciprocal Rank

- Считаем для первых k рекоммендаций
- Найти ранк первого релевантного предсказания $\Large rank_j$
- Посчитать reciprocal rank = $\Large\frac{1}{rank_j}$

$$\Large  MMR(i)@k=\frac {1}{\min\limits_{j\in Rel(i)} rank_j}$$

In [109]:
def mrr_at_k(recommended_list, bought_list, k=-1):
    indication = indicate_at_k(recommended_list, bought_list, k=k)
    r_k = np.argmax(indication)
    if r_k == 0 and not indication[0]:
        result = 0
    else:
        result = 1 / (r_k+1)
        
    return result

In [110]:
test = mrr_at_k(recommended_list, bought_list)
test

1.0

In [111]:
test_bought_list = [11, 43, 20, 18]
test = mrr_at_k(recommended_list, test_bought_list)
test # / first k = 8 -> mrr = 1 / 9 

0.1111111111111111

In [113]:
test = mrr_at_k(recommended_list, test_bought_list, k = 5)
test # no bought items in top 5 => 0

0

For MRR@k need to mean values by i, in example i = 1 => mrr == rr

## Задание 3*. Реализовать метрику nDCG@k
Normalized discounted cumulative gain. Эту метрику реализовать будет немного сложнее.

$$\Large DCG@K(i) = \sum_{j=1}^{K}\frac{\mathbb{1}_{r_{ij}}}{\log_2 (j+1)}$$


$\Large \mathbb{1}_{r_{ij}}$ -- индикаторная функция показывает что пользователь $i$ провзаимодействовал с продуктом $j$

Для подсчета $nDCG$ нам необходимо найти максимально возможный $DCG$ для пользователя $i$  и рекомендаций длины $K$.
Максимальный $DCG$ достигается когда мы порекомендовали максимально возможное количество релевантных продуктов и все они в начале списка рекомендаций.

$$\Large IDCG@K(i) = max(DCG@K(i)) = \sum_{j=1}^{K}\frac{\mathbb{1}_{j\le|Rel_i|}}{\log_2 (j+1)}$$

$$\Large nDCG@K(i) = \frac {DCG@K(i)}{IDCG@K(i)}$$

$\Large |Rel_i|$ -- количество релевантных продуктов для пользователя $i$



In [128]:
def discount(j):
    return 1/np.log2(j+1)
vec_disc = np.vectorize(discount)

In [144]:
def nDCG_at_k(recommended_list, bought_list, k = -1):
    indication = indicate_at_k(recommended_list, bought_list, k=k)
    bought_id = range(1, len(bought_list) + 1)
    indication_id = range(1, indication.shape[0]+1)
    
    dcg_at_k = indication * vec_disc(indication_id)
    i_dcg_at_k = vec_disc(bought_id)
    if k != -1:
        i_dcg_at_k = i_dcg_at_k[:k]
    
    nDCG_at_k = dcg_at_k.sum() / i_dcg_at_k.sum()
    
    return nDCG_at_k

In [145]:
test = nDCG_at_k(recommended_list, bought_list, k=5)
test

0.5585075862632192

In [146]:
test = nDCG_at_k(recommended_list, bought_list, k = 3)
test

0.46927872602275644