### Домашнее задание:

Реализовать стандартные метрики для рекомендаций:
- hit rate at k
- money_precision_at_k
- recall_at_k
- money_recall_at_k
- map@k

Метрики ранжирования*: (использовать torch)
- Mean Reciprocal Rank
- NDCG@k - для оценок пользователей (1-3) по учебному примеру из лекции (можно не в pyTorch, а в Numpy - в лекции пример массивов)

In [1]:
import pandas as pd
import numpy as np
import torch

# hit rate at k

In [2]:
def hit_rate_at_k(recommended_list, bought_list, k=5):
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    flags = np.isin(bought_list, recommended_list[:k])
    hit_rate = (flags.sum() > 0) * 1    
    
    return hit_rate

In [3]:
recommended_list = [156, 1134, 143, 991, 27, 1543, 3345, 533, 11, 43]
bought_list = [521, 32, 143, 991]

In [4]:
hit_rate_at_k(recommended_list, bought_list, k=2)

0

# money_precision_at_k

In [5]:
def money_precision_at_k(recommended_list, bought_list, prices_recommended, k=5):
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list[:k])
    prices_recommended = np.array(prices_recommended[:k])    
    
    flags = np.isin(recommended_list, bought_list)
    precision = (flags@prices_recommended.T) / (np.ones(k)@prices_recommended.T)    
    
    return precision

In [6]:
prices_recommended = [400, 60, 40, 40 , 90]
recommended_list=[12,13,14,15,16]
bought_list=[12, 100, 101, 102, 16]

In [7]:
money_precision_at_k(recommended_list, bought_list, prices_recommended, k=5)

0.7777777777777778

# recall_at_k, 
# money_recall_at_k

In [8]:
def recall_at_k(recommended_list, bought_list, k=5):
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list[:k])
    
    flags = np.isin(bought_list, recommended_list)
    recall = flags.sum() / len(bought_list)
    
    return recall


def money_recall_at_k(recommended_list, bought_list, prices_recommended, prices_bought, k=5):
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list[:k])
    prices_bought = np.array(prices_bought)    
    prices_recommended = np.array(prices_recommended[:k]) 
    
    flags = np.isin(recommended_list, bought_list)
    recall = (flags@prices_recommended.T) / (np.ones(k)@prices_bought.T)
    
    return recall

In [9]:
prices_recommended = [400, 60, 40, 40 , 90]
recommended_list=[12,13,14,15,16]
bought_list=[12, 100, 101, 102, 16]
prices_bought=[400, 25, 150, 50, 90]

In [10]:
recall_at_k(recommended_list, bought_list, k=4)

0.2

In [11]:
money_recall_at_k(recommended_list, bought_list, prices_recommended, prices_bought, k=5)

0.6853146853146853

# map@k

In [12]:
def precision_at_k(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    bought_list = bought_list 
    recommended_list = recommended_list[:k]
    
    flags = np.isin(bought_list, recommended_list)
    precision = flags.sum() / len(recommended_list)
    
    return precision


def ap_k(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    flags = np.isin(recommended_list, bought_list)
    
    if sum(flags) == 0:
        return 0
    
    sum_ = 0
    for i in range(0, k-1):
        if flags[i] == True:
            p_k = precision_at_k(recommended_list, bought_list, k=i+1)
            sum_ += p_k
            
    result = sum_ / sum(flags)
    
    return result

In [13]:
def map_k(recommended_list, bought_list, k=50, u=1):
    sum_ = 0
    for user in np.arange(u):   
        sum_ += ap_k(recommended_list[user], bought_list[user], k)        
    result = sum_/u        
    return result

In [14]:
recommended_list = [[143, 156, 1134, 991, 27, 1543, 3345, 533, 11, 43],
                    [146, 156, 1134, 991, 27, 1543, 3345, 533, 11, 43],] #id товаров
bought_list = [[521, 32, 143, 991], [146, 29]]

In [15]:
map_k(recommended_list, bought_list, k=10, u=2)

0.875

# Mean Reciprocal Rank

In [16]:
def reciprocal_rank(recommended_list, bought_list):
    sum_ = 0
    for bought in bought_list:
        for idx, recommended in enumerate(recommended_list, 1):
            if bought == recommended:
                rrk = 1/idx
                sum_ +=rrk
                break
    result = sum_/len(bought_list)        
    return result

In [17]:
recommended_list = [143, 156, 1134, 991, 27, 1543, 3345, 533, 11, 43] #id товаров
bought_list = [521, 32, 143, 991]

In [18]:
reciprocal_rank(recommended_list, bought_list)

0.3125

# NDCG@k

In [19]:
def compute_gain(y_value: float, gain_scheme: str) -> float:
    if gain_scheme == "exp2":
        gain = 2 ** y_value - 1
    elif gain_scheme == "const":
        gain = y_value
    else:
        raise ValueError(f"{gain_scheme} method not supported, only exp2 and const.")
    return float(gain)

In [20]:
def dcg(ys_true: torch.Tensor, ys_pred: torch.Tensor, gain_scheme: str) -> float:
    _, argsort = torch.sort(ys_pred, descending=True, dim=0)
    ys_true_sorted = ys_true[argsort]
    ret = 0
    for idx, cur_y in enumerate(ys_true_sorted, 1):
        gain = compute_gain(cur_y, gain_scheme)
        log_ = np.log2(idx+1)
        ret +=gain/log_
    return ret

In [21]:
def ndcg(ys_true: torch.Tensor, ys_pred: torch.Tensor, gain_scheme: str = 'const') -> float:
    pred_dcg = dcg(ys_true, ys_pred, gain_scheme)
    ideal_dcg = dcg(ys_true, ys_true, gain_scheme)
    ndcg = pred_dcg / ideal_dcg
    return ndcg

In [22]:
y_pred = torch.tensor([0.7,0.6,0.5,0.45,0.44,0.35,0.2])
y_true = torch.tensor([3,2,1,1,3,1,2])

In [23]:
ndcg(y_true, y_pred)

0.9419493913323128