In [1]:
import pandas as pd
import numpy as np

### 1. Hit rate

In [4]:
#Hit rate = (был ли хотя бы 1 релевантный товар среди рекомендованных)   
def hit_rate(recommended_list, bought_list):
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    flags = np.isin(bought_list, recommended_list)
    return (flags.sum() > 0) * 1

#Hit rate@k = (был ли хотя бы 1 релевантный товар среди топ-k рекомендованных)
def hit_rate_at_k(recommended_list, bought_list, k=5):
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)[:k]
    flags = np.isin(bought_list, recommended_list)
    return (flags.sum() > 0) * 1

In [5]:
recommended_list = [1430,156, 1134, 27, 1543, 143, 533, 11, 43] #id товаров
bought_list = [521, 32, 143, 991]

In [6]:
hit_rate(recommended_list, bought_list)

1

In [7]:
hit_rate_at_k(recommended_list, bought_list)

0

### 2. Precision

In [8]:
recommended_list = [1430, 156, 1134, 991, 27, 1543, 143, 533, 11, 43] #id товаров
bought_list = [521, 32, 143, 991]
prices_recommended= [1,2,3,4,5,6,7,8,9,10]
prices_bought=[11,12,7,4]

In [9]:
#Precision= (# of recommended items that are relevant) / (# of recommended items)
def precision(recommended_list, bought_list):
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    flags = np.isin(bought_list, recommended_list)
    return flags.sum() / len(recommended_list)

#Precision@k = (# of recommended items @k that are relevant) / (# of recommended i##tems @k)
def precision_at_k(recommended_list, bought_list, k=5):
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)[:k]
    flags = np.isin(bought_list, recommended_list)
    return flags.sum() / len(recommended_list)


#Money Precision@k = (revenue of recommended items @k that are relevant) / (revenue of recommended items @k)
def money_precision_at_k_(recommended_list, bought_list, prices_recommended, k=5):
    recommend_list = np.array(recommended_list)[:k]
    prices_recommended = np.array(prices_recommended)[:k]
    flags = np.isin(recommend_list, bought_list)
    precision = np.dot(flags, prices_recommended).sum() / prices_recommended.sum()
    return precision

In [10]:
precision(recommended_list, bought_list)

0.2

In [11]:
precision_at_k(recommended_list, bought_list, k=5)

0.2

In [12]:
money_precision_at_k_(recommended_list, bought_list, prices_recommended)

0.26666666666666666

### 3. Recall

In [13]:
#Recall= (# of recommended items that are relevant) / (# of relevant items)  
def recall(recommended_list, bought_list):
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    flags = np.isin(bought_list, recommended_list)
    return flags.sum() / len(bought_list)
    
#Recall@k = (# of recommended items @k that are relevant) / (# of relevant items)
def recall_at_k(recommended_list, bought_list, k=5):
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)[:k]
    flags = np.isin(bought_list, recommended_list)
    return flags.sum() / len(bought_list)


#Money Recall@k = (revenue of recommended items @k that are relevant) / (revenue of relevant items)  
def money_recall_at_k(recommended_list, bought_list, prices_recommended, prices_bought, k=5):
    recommend_list = np.array(recommended_list)[:k]
    prices_recommended = np.array(prices_recommended)[:k]
    flags = np.isin(recommend_list, bought_list)
    recall = np.dot(flags, prices_recommended).sum() / prices_recommended.sum()
    return recall

In [14]:
recall(recommended_list, bought_list)

0.5

In [15]:
recall_at_k(recommended_list, bought_list)

0.25

In [16]:
money_recall_at_k(recommended_list, bought_list, prices_recommended, prices_bought)

0.26666666666666666

## AP@k
AP@k - average precision at k

$$AP@k = \frac{1}{r} \sum{[recommended_{relevant_i}] * precision@k}$$

- r - кол-во релевантный среди рекомендованных
- Суммируем по всем релевантным товарам
- Зависит от порядка реокмендаций

In [17]:
recommended_list = [110,210,3,4,234,234,234,666] #id товаров
bought_list = [1,2,3,4,5,6,7,8,9]

In [18]:
def ap_k(recommended_list, bought_list, k=5):    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)[:k]
    relevant_indexes = np.nonzero(np.isin(recommended_list, bought_list))[0]    
    if len(relevant_indexes) == 0:
        return 0    
    amount_relevant = len(relevant_indexes)    
    sum_ = sum([precision_at_k(recommended_list, bought_list, k=index_relevant+1) for index_relevant in relevant_indexes])
    return sum_/amount_relevant

In [19]:
ap_k(recommended_list, bought_list, k=5)

0.41666666666666663

### MAP@k

$$MAP@k = \frac{1}{|U|} \sum_u{AP_k}$$

In [20]:
# теперь список из 3 пользователей
recommended_list_3_users = [[143, 1403, 1134, 991, 27, 1543, 3345, 533, 11, 43], 
                    [1134, 533, 14, 4, 15, 1543, 1, 99, 27, 3345],
                    [991, 3345, 27, 533, 43, 143, 1543, 156, 1134, 11]
                           ]

bought_list_3_users = [[521, 32, 143],  # юзер 1
                       [143, 156, 533, 4, 11], # юзер 2
                       [1,2]] # юзер 3

In [21]:
def map_k(recommended_lists, bought_lists, k=5):
    apk_sum=0
    for i in range(len(recommended_lists)):
        apk=ap_k(recommended_lists[:][i], bought_lists[:][i], k=k)
        apk_sum+=apk
    map_kk=apk_sum/len(recommended_lists)
    map_kk
    return apk_sum/len(recommended_lists)

In [22]:
map_k(recommended_list_3_users, bought_list_3_users)

0.5

### Normalized discounted cumulative gain ( NDCG@k)


$$DCG = \frac{1}{|r|} \sum_u{\frac{[bought fact]}{discount(i)}}$$  
$$NDCG = \frac{DCG}{ideal DCG}$$

In [23]:
#Список документов ранжированных от 0(неважные) до 3(важные)
docs=[3,2,3,0,1,2]

In [24]:
from  math  import  log2
def DCG(y_true, ideal=False, k=6):
    if ideal==True:
        y_true=sorted(y_true, reverse=True)
    ind=0
    logs=[]
    for k in y_true[:k]:
        ind+=1
        logs.append(k/log2(ind+1))
    return sum(logs)

# нормализованный DCG
def ndcg_at_k(y_true, k=6):
    return DCG(docs, k=k)/DCG(docs, ideal=True,k=k)

In [25]:
DCG(docs)

6.861126688593502

In [26]:
DCG(docs, ideal=True)

7.1409951840957

In [27]:
ndcg_at_k(docs)

0.9608081943360617

### Mean Reciprocal Rank ( MRR@k )


- Считаем для первых k рекоммендаций
- Найти ранк первого релевантного предсказания $k_u$
- Посчитать reciprocal rank = $\frac{1}{k_u}$

$$MRR = mean(\frac{1}{k_u})$$

In [28]:
def reciprocal_rank(recommended_list, bought_list, k=5):
    recommended_list=recommended_list[:k]
    rang=[]
    for i in range(len(recommended_list)):
        indexs=np.nonzero(np.isin(recommended_list[:][i], bought_list[:][i]))[0]
        if len(indexs)>0:
            rang.append(indexs.min()+1)
        else:
            rang.append(0)
    return np.array(rang).mean()
    # сделать дома

In [29]:
reciprocal_rank(recommended_list_3_users, bought_list_3_users, k=5)

1.0