# Вебинар 1. Введение, примеры задач, бизнес- и ML-метрики 

In [1]:
import pandas as pd
import numpy as np

# ML-mетрики качества

Представим, что в магазине всего 10 товаров

In [2]:
recommended_list = [143, 156, 1134, 991, 27, 1543, 3345, 533, 11, 43] #id товаров
bought_list = [521, 32, 143, 991]

### 1. Hit rate

Hit rate = был ли хотя бы 1 релевантный товар среди рекомендованных

- Иногда применяется, когда продаются достаточно дрогие товары (например, бытовая техника) 

----
Hit rate = (был ли хотя бы 1 релевантный товар среди рекомендованных)   

Hit rate@k = (был ли хотя бы 1 релевантный товар среди топ-k рекомендованных)

In [3]:
def hit_rate(recommended_list, bought_list):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    flags = np.isin(bought_list, recommended_list)
    
    hit_rate = (flags.sum() > 0) * 1
    
    return hit_rate


def hit_rate_at_k(recommended_list, bought_list, k=5):
    
    return hit_rate(recommended_list[:k], bought_list)

In [4]:
hit_rate(recommended_list, bought_list)

1

In [5]:
hit_rate_at_k(recommended_list, bought_list)

1

### 2. Precision

In [6]:
def precision(recommended_list, bought_list):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    flags = np.isin(bought_list, recommended_list)
    
    precision = flags.sum() / len(recommended_list)
    
    return precision


def precision_at_k(recommended_list, bought_list, k=5):
       
    return precision(recommended_list[:k], bought_list)


def money_precision_at_k(recommended_list, bought_list, prices_recommended, k=5):
        
    recommend_list = np.array(recommended_list)[:k]
    prices_recommended = np.array(prices_recommended)[:k]
    
    flags = np.isin(recommend_list, bought_list)
    
    precision = np.dot(flags, prices_recommended).sum() / prices_recommended.sum()
    
    return precision

In [7]:
precision(recommended_list, bought_list)

0.2

In [8]:
precision_at_k(recommended_list, bought_list, k=5)

0.4

In [9]:
precision_at_k(recommended_list, bought_list, k=3)

0.3333333333333333

### 3. Recall

In [10]:
def recall(recommended_list, bought_list):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    flags = np.isin(bought_list, recommended_list)
    
    recall = flags.sum() / len(bought_list)
    
    return recall


def recall_at_k(recommended_list, bought_list, k=5):
    
    return recall(recommended_list[:k], bought_list)


def money_recall_at_k(recommended_list, bought_list, prices_recommended, prices_bought, k=5):
    
    recommend_list = np.array(recommended_list)[:k]
    prices_recommended = np.array(prices_recommended)[:k]
    prices_bought = np.array(prices_bought)
    
    flags = np.isin(recommend_list, bought_list)
    
    recall = np.dot(flags, prices_recommended).sum() / prices_bought.sum()
    
    return recall

In [11]:
recall(recommended_list, bought_list)

0.5

In [13]:
recall_at_k(recommended_list, bought_list)

0.5

In [15]:
prices_recommended = [100, 40, 60, 50, 40, 200, 400, 10, 25, 55]
prices_bought = [70, 110, 100, 50]

money_recall_at_k(recommended_list, bought_list, prices_recommended, prices_bought)

0.45454545454545453

# Метрики ранжирования

## AP@k
AP@k - average precision at k

$$AP@k = \frac{1}{r} \sum{[recommended_{relevant_i}] * precision@k}$$

- r - кол-во релевантный среди рекомендованных
- Суммируем по всем релевантным товарам
- Зависит от порядка реокмендаций

In [26]:
recommended_list = [22, 21, 4, 1, 234, 232432,234234,666] #id товаров
bought_list = [1, 221, 3, 4,]

In [27]:
def ap_k(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)[:k]
    
    relevant_indexes = np.nonzero(np.isin(recommended_list, bought_list))[0]
    if len(relevant_indexes) == 0:
        return 0
    
    amount_relevant = len(relevant_indexes)
            
    sum_ = sum([precision_at_k(recommended_list, bought_list, k=index_relevant+1) for index_relevant in relevant_indexes])
    return sum_/amount_relevant

In [28]:
ap_k(recommended_list, bought_list, k=5)

0.41666666666666663

### MAP@k

MAP@k (Mean Average Precision@k)  
Среднее AP@k по всем юзерам
- Показывает средневзвешенную точность рекомендаций

$$MAP@k = \frac{1}{|U|} \sum_u{AP_k}$$
  
|U| - кол-во юзеров

In [29]:
# теперь список из 3 пользователей
recommended_list_3_users = [[143, 156, 1134, 991, 27, 1543, 3345, 533, 11, 43], 
                    [1134, 533, 14, 4, 15, 1543, 1, 99, 27, 3345],
                    [991, 3345, 27, 533, 43, 143, 1543, 156, 1134, 11]
                    ]

bought_list_3_users = [[521, 32, 143],  # юзер 1
                       [143, 156, 991, 43, 11], # юзер 2
                       [1,2]] # юзер 3

In [30]:
def map_k(recommended_list, bought_list, k=5):
    
    users_count = len(recommended_list_3_users)
    result = 0
    
    for i in range(users_count):
        result += ap_k(recommended_list_3_users[i], bought_list_3_users[i], k=5)
    
    result = result / users_count
    
    return result

In [31]:
map_k(recommended_list, bought_list)

0.3333333333333333

### MRR@k
Mean Reciprocal Rank

- Считаем для первых k рекоммендаций
- Найти ранк первого релевантного предсказания $k_u$
- Посчитать reciprocal rank = $\frac{1}{k_u}$

$$MRR = mean(\frac{1}{k_u})$$

In [32]:
def reciprocal_rank(recommended_list, bought_list):
    
    rank_list = []
    
    for i in range(len(recommended_list_3_users)):
        relevant_index = np.nonzero(np.isin(recommended_list_3_users[i][:k], bought_list_3_users[i]))[0]
        if relevant_index.size:
            rank = (relevant_index[0] + 1) / len(recommended_list_3_users[i][:k])
            rank_list.append(rank)
        else:
            rank_list.append(0)
       
    return np.mean(rank_list)
#     return result

In [33]:
mean_reciprocal_rank(recommended_list_3_users, bought_list_3_users)

NameError: name 'mean_reciprocal_rank' is not defined