In [None]:
!pip install rectools[visuals]==0.12.0

# Метрики

## Imports

In [None]:
import pandas as pd
import numpy as np
import numba as nb
import requests

from tqdm.auto import tqdm
from rectools import Columns


In [None]:
np.random.seed(23)


## Get KION dataset

In [None]:
url = 'https://github.com/irsafilo/KION_DATASET/raw/f69775be31fa5779907cf0a92ddedb70037fb5ae/data_original.zip'


In [None]:
req = requests.get(url, stream=True)

with open('kion.zip', 'wb') as fd:
    total_size_in_bytes = int(req.headers.get('Content-Length', 0))
    progress_bar = tqdm(desc='kion dataset download', total=total_size_in_bytes, unit='iB', unit_scale=True)
    for chunk in req.iter_content(chunk_size=2 ** 20):
        progress_bar.update(len(chunk))
        fd.write(chunk)


kion dataset download:   0%|          | 0.00/78.8M [00:00<?, ?iB/s]

In [None]:
import zipfile as zf

files = zf.ZipFile('kion.zip','r')
files.extractall()
files.close()


## Read data

In [None]:
interactions = pd.read_csv('data_original/interactions.csv', parse_dates=['last_watch_dt'])

interactions.rename(
    columns={
        'last_watch_dt': Columns.Datetime,
        'total_dur': Columns.Weight
    },
    inplace=True)


In [None]:
def headtail(df):
    return pd.concat([df.head(), df.tail()])

headtail(interactions)


Unnamed: 0,user_id,item_id,datetime,weight,watched_pct
0,176549,9506,2021-05-11,4250,72.0
1,699317,1659,2021-05-29,8317,100.0
2,656683,7107,2021-05-09,10,0.0
3,864613,7638,2021-07-05,14483,100.0
4,964868,9506,2021-04-30,6725,100.0
5476246,648596,12225,2021-08-13,76,0.0
5476247,546862,9673,2021-04-13,2308,49.0
5476248,697262,15297,2021-08-20,18307,63.0
5476249,384202,16197,2021-04-19,6203,100.0
5476250,319709,4436,2021-08-15,3921,45.0


In [None]:
interactions.info(memory_usage='deep')


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5476251 entries, 0 to 5476250
Data columns (total 5 columns):
 #   Column       Dtype         
---  ------       -----         
 0   user_id      int64         
 1   item_id      int64         
 2   datetime     datetime64[ns]
 3   weight       int64         
 4   watched_pct  float64       
dtypes: datetime64[ns](1), float64(1), int64(3)
memory usage: 208.9 MB


Выделим небольшой кусок из данных, чтобы не слишком страдать. Представим, что это наши тестовые интеракции

In [None]:
sample_users = [57607, 403227, 70720]
df = interactions[interactions[Columns.User].isin(sample_users)].sort_values("user_id").reset_index(drop=True)
del df[Columns.Datetime], df[Columns.Weight], df['watched_pct']
df


Unnamed: 0,user_id,item_id
0,57607,4151
1,57607,10440
2,57607,13865
3,70720,4880
4,70720,4881
5,70720,6327
6,403227,6353
7,403227,1736
8,403227,5336
9,403227,181


In [None]:
print('Users', df[Columns.User].unique())
sample_items = df[Columns.Item].unique()
print('Items', sample_items)


Users [ 57607  70720 403227]
Items [ 4151 10440 13865  4880  4881  6327  6353  1736  5336   181]


## Regression

В регрессией все относительно просто. По (user, item) мы знаем таргет (рейтинг чаще всего) и по такой же паре предсказываем его

In [None]:
df['target'] = np.random.choice([3, 4, 5], df.shape[0])
df['predict'] = np.random.rand(df.shape[0]) * 3 + 2
df


Unnamed: 0,user_id,item_id,target,predict
0,57607,4151,5,2.658958
1,57607,10440,3,4.500017
2,57607,13865,4,3.280296
3,70720,4880,5,4.263719
4,70720,4881,3,4.990597
5,70720,6327,4,2.000249
6,403227,6353,3,4.927865
7,403227,1736,5,3.257364
8,403227,5336,4,4.044672
9,403227,181,5,4.257889


Общая оценка

In [None]:
mae = (df['target'] - df['predict']).abs().mean()
print(mae)


1.374467645324227


Оценка по пользователю с последюущим усреднением

In [None]:
df['diff'] = (df['target'] - df['predict']).abs()
average_mae = df.groupby(Columns.User)['diff'].mean()
print(average_mae.mean())
average_mae


1.4033728201405007


Unnamed: 0_level_0,diff
user_id,Unnamed: 1_level_1
57607,1.520254
70720,1.575543
403227,1.114321


Видно, что в данном случае метрики близки к друг другу, но это не всегда так

In [None]:
del df['target'], df['predict'], df['diff']


## Classification

Сгенерируем случайные рекомендации.

In [None]:
top_k = 5
recs = np.array([
    np.random.choice(sample_items, top_k, replace=False),
    np.random.choice(sample_items, top_k, replace=False),
    np.random.choice(sample_items, top_k, replace=False),
])
recs


array([[ 4151,  6353, 13865,  6327,  4880],
       [  181,  6327,  4151,  1736,  4881],
       [10440,  4880,  4881, 13865,  6327]])

Преобразуем в длинный датафрейм и проставим ранг

In [None]:
df_recs = pd.DataFrame({
    Columns.User: np.repeat(sample_users, top_k),
    Columns.Item: recs.ravel()
})
df_recs[Columns.Rank] = df_recs.groupby(Columns.User).cumcount() + 1
df_recs


Unnamed: 0,user_id,item_id,rank
0,57607,4151,1
1,57607,6353,2
2,57607,13865,3
3,57607,6327,4
4,57607,4880,5
5,403227,181,1
6,403227,6327,2
7,403227,4151,3
8,403227,1736,4
9,403227,4881,5


Ключевой момент. Именно ради него преобразовывали данные и именно это позволяет считать метрики быстрее.

Мы получил датафрейм с тестовыми интеракциями и предсказанными рангом от модели.

In [None]:
df_recs = df.merge(df_recs, how='left', left_on=Columns.UserItem, right_on=Columns.UserItem)
df_recs = df_recs.sort_values(by=[Columns.User, Columns.Rank])
df_recs


Unnamed: 0,user_id,item_id,rank
0,57607,4151,1.0
2,57607,13865,3.0
1,57607,10440,
3,70720,4880,2.0
4,70720,4881,3.0
5,70720,6327,5.0
9,403227,181,1.0
7,403227,1736,4.0
6,403227,6353,
8,403227,5336,


### Precision@K

In [None]:
df_recs[f'TP@5'] = df_recs['rank'] < 6
df_recs


Unnamed: 0,user_id,item_id,rank,TP@5
0,57607,4151,1.0,True
2,57607,13865,3.0,True
1,57607,10440,,False
3,70720,4880,2.0,True
4,70720,4881,3.0,True
5,70720,6327,5.0,True
9,403227,181,1.0,True
7,403227,1736,4.0,True
6,403227,6353,,False
8,403227,5336,,False


In [None]:
df_recs[df_recs[Columns.Rank].notnull()]


Unnamed: 0,user_id,item_id,rank,TP@5
0,57607,4151,1.0,True
2,57607,13865,3.0,True
3,70720,4880,2.0,True
4,70720,4881,3.0,True
5,70720,6327,5.0,True
9,403227,181,1.0,True
7,403227,1736,4.0,True


Посчитаем вручную Precision@5 (усредняем по юзерам): (2/5 + 3/5 + 2/5) / 3

In [None]:
(2/5 + 3/5 + 2/5) / 3


0.4666666666666666

Посчитаем через groupby

In [None]:
df_recs['TP@5/5'] = df_recs['TP@5'] / top_k

p5 = df_recs.groupby(Columns.User)['TP@5/5'].sum().mean()

print(f'Precision@5 = {p5}')


Precision@5 = 0.4666666666666666


In [None]:
df_recs


Unnamed: 0,user_id,item_id,rank,TP@5,TP@5/5
0,57607,4151,1.0,True,0.2
2,57607,13865,3.0,True,0.2
1,57607,10440,,False,0.0
3,70720,4880,2.0,True,0.2
4,70720,4881,3.0,True,0.2
5,70720,6327,5.0,True,0.2
9,403227,181,1.0,True,0.2
7,403227,1736,4.0,True,0.2
6,403227,6353,,False,0.0
8,403227,5336,,False,0.0


Используем тот факт, что мы знаем количество пользователей, а значит groupby не нужен

In [None]:
p5 = df_recs['TP@5/5'].sum() / len(sample_users)
print(f'Precision@5 = {p5}')


Precision@5 = 0.46666666666666673


### Recall@K

In [None]:
df_recs['actual'] = df_recs.groupby(Columns.User)[Columns.Item].transform('count')
df_recs


Unnamed: 0,user_id,item_id,rank,TP@5,TP@5/5,actual
0,57607,4151,1.0,True,0.2,3
2,57607,13865,3.0,True,0.2,3
1,57607,10440,,False,0.0,3
3,70720,4880,2.0,True,0.2,3
4,70720,4881,3.0,True,0.2,3
5,70720,6327,5.0,True,0.2,3
9,403227,181,1.0,True,0.2,4
7,403227,1736,4.0,True,0.2,4
6,403227,6353,,False,0.0,4
8,403227,5336,,False,0.0,4


In [None]:
df_recs['TP@5/actual'] = df_recs['TP@5'] / df_recs['actual']
df_recs


Unnamed: 0,user_id,item_id,rank,TP@5,TP@5/5,actual,TP@5/actual
0,57607,4151,1.0,True,0.2,3,0.333333
2,57607,13865,3.0,True,0.2,3,0.333333
1,57607,10440,,False,0.0,3,0.0
3,70720,4880,2.0,True,0.2,3,0.333333
4,70720,4881,3.0,True,0.2,3,0.333333
5,70720,6327,5.0,True,0.2,3,0.333333
9,403227,181,1.0,True,0.2,4,0.25
7,403227,1736,4.0,True,0.2,4,0.25
6,403227,6353,,False,0.0,4,0.0
8,403227,5336,,False,0.0,4,0.0


In [None]:
(2/3 + 3/3 + 2/4) / 3


0.7222222222222222

In [None]:
r5 = df_recs.groupby(Columns.User)['TP@5/actual'].sum().mean()
print(f'Recall@5 = {r5}')


Recall@5 = 0.7222222222222222


In [None]:
r5 = df_recs['TP@5/actual'].sum() / len(sample_users)
print(f'Recall@5 = {r5}')


Recall@5 = 0.7222222222222222


## Ranking: MAP@K


$MAP@k = \frac{1}{|U|}\sum_{u \in U}AP_u@k$

$AP_u@k = \frac{1}{|R_u|}(\sum_{i=1}^{k} Precision@i * rel_u(i))$

In [None]:
df_recs


Unnamed: 0,user_id,item_id,rank,TP@5,TP@5/5,actual,TP@5/actual
0,57607,4151,1.0,True,0.2,3,0.333333
2,57607,13865,3.0,True,0.2,3,0.333333
1,57607,10440,,False,0.0,3,0.0
3,70720,4880,2.0,True,0.2,3,0.333333
4,70720,4881,3.0,True,0.2,3,0.333333
5,70720,6327,5.0,True,0.2,3,0.333333
9,403227,181,1.0,True,0.2,4,0.25
7,403227,1736,4.0,True,0.2,4,0.25
6,403227,6353,,False,0.0,4,0.0
8,403227,5336,,False,0.0,4,0.0


In [None]:
df_recs['cumTP@5'] = df_recs.groupby(Columns.User)['TP@5'].cumsum()
df_recs


Unnamed: 0,user_id,item_id,rank,TP@5,TP@5/5,actual,TP@5/actual,cumTP@5
0,57607,4151,1.0,True,0.2,3,0.333333,1
2,57607,13865,3.0,True,0.2,3,0.333333,2
1,57607,10440,,False,0.0,3,0.0,2
3,70720,4880,2.0,True,0.2,3,0.333333,1
4,70720,4881,3.0,True,0.2,3,0.333333,2
5,70720,6327,5.0,True,0.2,3,0.333333,3
9,403227,181,1.0,True,0.2,4,0.25,1
7,403227,1736,4.0,True,0.2,4,0.25,2
6,403227,6353,,False,0.0,4,0.0,2
8,403227,5336,,False,0.0,4,0.0,2


$AP_u@k = \frac{1}{|R_u|}(\sum_{i=1}^{k} Precision@i * rel_u(i))$

In [None]:
df_recs['Prec@i'] = df_recs['cumTP@5'] / df_recs[Columns.Rank]
df_recs


Unnamed: 0,user_id,item_id,rank,TP@5,TP@5/5,actual,TP@5/actual,cumTP@5,Prec@i
0,57607,4151,1.0,True,0.2,3,0.333333,1,1.0
2,57607,13865,3.0,True,0.2,3,0.333333,2,0.666667
1,57607,10440,,False,0.0,3,0.0,2,
3,70720,4880,2.0,True,0.2,3,0.333333,1,0.5
4,70720,4881,3.0,True,0.2,3,0.333333,2,0.666667
5,70720,6327,5.0,True,0.2,3,0.333333,3,0.6
9,403227,181,1.0,True,0.2,4,0.25,1,1.0
7,403227,1736,4.0,True,0.2,4,0.25,2,0.5
6,403227,6353,,False,0.0,4,0.0,2,
8,403227,5336,,False,0.0,4,0.0,2,


$AP_u@k = \sum_{i=1}^{k} \frac{Precision@i * rel_u(i)}{|R_u|}$

In [None]:
df_recs['Prec@i/actual'] = df_recs['Prec@i'] / df_recs['actual']
df_recs


Unnamed: 0,user_id,item_id,rank,TP@5,TP@5/5,actual,TP@5/actual,cumTP@5,Prec@i,Prec@i/actual
0,57607,4151,1.0,True,0.2,3,0.333333,1,1.0,0.333333
2,57607,13865,3.0,True,0.2,3,0.333333,2,0.666667,0.222222
1,57607,10440,,False,0.0,3,0.0,2,,
3,70720,4880,2.0,True,0.2,3,0.333333,1,0.5,0.166667
4,70720,4881,3.0,True,0.2,3,0.333333,2,0.666667,0.222222
5,70720,6327,5.0,True,0.2,3,0.333333,3,0.6,0.2
9,403227,181,1.0,True,0.2,4,0.25,1,1.0,0.25
7,403227,1736,4.0,True,0.2,4,0.25,2,0.5,0.125
6,403227,6353,,False,0.0,4,0.0,2,,
8,403227,5336,,False,0.0,4,0.0,2,,


In [None]:
ap = df_recs.groupby(Columns.User)['Prec@i/actual'].sum()
print(f"Mean Average Precision: {ap.mean()}")
ap


Mean Average Precision: 0.5064814814814814


Unnamed: 0_level_0,Prec@i/actual
user_id,Unnamed: 1_level_1
57607,0.555556
70720,0.588889
403227,0.375


## Naive vs Numba vs Pandas

In [None]:
df = interactions[Columns.UserItem]
df


Unnamed: 0,user_id,item_id
0,176549,9506
1,699317,1659
2,656683,7107
3,864613,7638
4,964868,9506
...,...,...
5476246,648596,12225
5476247,546862,9673
5476248,697262,15297
5476249,384202,16197


In [None]:
target = df.values
target


array([[176549,   9506],
       [699317,   1659],
       [656683,   7107],
       ...,
       [697262,  15297],
       [384202,  16197],
       [319709,   4436]])

In [None]:
target[target[:, 0] == 176549][:, 1]  # таргет для 1 случайного юзера


array([ 9506, 15469,  9164, 12250,  5695, 11345,  7582, 10881,  5051,
        7453,  3258,  7626, 13243, 10761,  5600,  5533, 16197, 12302,
        6626,  9352, 10605,  1343,  8972,  6649, 11919,  1132,   899,
        5087, 14684,  4387,  4756, 15096,  1659,  1641, 10954,  2358,
       13018,  1105, 13424, 10732,  4360, 14689,  8211, 12324,   349,
       11654, 14006,  2956,  8437, 12770,  2722,   149, 10688, 14217,
        8283,  7000,  3182, 12104,  5171,  5411, 15915,  5956,  3834,
       11494,  6870, 15719,  2505, 15464, 14317, 11689, 10544, 15531,
       12448,  9728,   101,  7102, 11539, 16166,  4718,  4273,  7401,
       14470])

In [None]:
recs


array([[ 4151,  6353, 13865,  6327,  4880],
       [  181,  6327,  4151,  1736,  4881],
       [10440,  4880,  4881, 13865,  6327]])

In [None]:
def precision_naive(target, users, recs, k):
    precision = []
    for i, user in enumerate(users):
        p = 0
        user_target = target[target[:, 0] == user][:, 1]
        for rec in recs[i]:
            if rec in user_target:
                p += 1
        precision.append(p / k)
    return sum(precision) / len(users)


In [None]:
precision_naive(target, sample_users, recs, 5)


0.4666666666666666

In [None]:
@nb.njit(cache=True, parallel=True)
def precision_numba(target, users, recs, k):
    precision = np.zeros(len(users))
    for i in nb.prange(len(users)):
        user = users[i]
        p = 0
        user_target = target[target[:, 0] == user][:, 1]
        for rec in recs[i]:
            if rec in user_target:
                p += 1
        precision[i] = p / k
    return precision.mean()


In [None]:
precision_numba(target, np.array(sample_users), recs, 5)


0.4666666666666666

In [None]:
def precision_pandas(df, users, recs, k):
    df_recs = pd.DataFrame({
        Columns.User: np.repeat(users, k),
        Columns.Item: recs.ravel()
    })
    df_recs[Columns.Rank] = df_recs.groupby(Columns.User).cumcount() + 1
    df_recs = df.merge(df_recs, how='left', left_on=Columns.UserItem, right_on=Columns.UserItem)
    tp_k = f'TP@{k}'
    df_recs[tp_k] = df_recs[Columns.Rank] < (k + 1)
    p = df_recs[tp_k].sum() / k / len(users)
    return p


In [None]:
precision_pandas(df, sample_users, recs, 5)


0.4666666666666666

Посмотрим через `timeit`

In [None]:
%timeit precision_naive(target, sample_users, recs, 5)


52.6 ms ± 276 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [None]:
%timeit precision_numba(target, sample_users, recs, 5)


28.6 ms ± 388 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [None]:
%timeit precision_pandas(df, sample_users, recs, 5)


2.58 s ± 268 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


Посчитаем на больших данных

In [None]:
def generate_subsample(users_count, top_k):
    users = np.random.choice(interactions[Columns.User].unique(), users_count, replace=False)
    df = interactions[interactions[Columns.User].isin(users)].reset_index(drop=True)
    del df[Columns.Datetime], df[Columns.Weight], df['watched_pct']

    recs = np.random.choice(df[Columns.Item], size=(users_count, top_k))
    return df, users, recs


In [None]:
top_k = 10
df, users, recs = generate_subsample(10000, top_k)
target = df.values


In [None]:
%timeit precision_naive(target, users, recs, top_k)


2.13 s ± 268 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
%timeit precision_numba(target, users, recs, top_k) # ускорили в 3 раза


943 ms ± 188 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
%timeit precision_pandas(df, users, recs, top_k) # ускорили в 80 раз


39 ms ± 1.53 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


# RecTools: метрики

Рассмотрим, как использовать библиотеку от МТС для подсчета метрик.

Полный гайд тут - [RecTools/examples/3_metrics.ipynb](https://github.com/MobileTeleSystems/RecTools/blob/main/examples/3_metrics.ipynb)

Вначале вспомним, какие данные есть у нас

In [None]:
df.shape, users.shape, recs.shape


((56805, 2), (10000,), (10000, 10))

In [None]:
from rectools.metrics import Precision, Recall, MAP, calc_metrics
metrics = {
    "prec@1": Precision(k=1),
    "prec@10": Precision(k=10),
    "recall@10": Recall(k=10),
    "MAP@5": MAP(k=5),
    "MAP@10": MAP(k=10),
}


In [None]:
df_recs = pd.DataFrame({
    Columns.User: np.repeat(users, top_k),
    Columns.Item: recs.ravel()
})
df_recs[Columns.Rank] = df_recs.groupby(Columns.User).cumcount() + 1

df_recs

Unnamed: 0,user_id,item_id,rank
0,628072,12849,1
1,628072,9728,2
2,628072,10878,3
3,628072,15373,4
4,628072,9419,5
...,...,...,...
99995,28484,12756,6
99996,28484,4740,7
99997,28484,4475,8
99998,28484,7793,9


In [None]:
metric_values = calc_metrics(
    metrics,
    reco=df_recs,  # actual reco
    interactions=df,  # test interactions
)
metric_values


{'prec@1': 0.0289,
 'prec@10': 0.03048,
 'recall@10': 0.08462718883430417,
 'MAP@5': 0.01938148253052879,
 'MAP@10': 0.026219275497237186}

Как посчитать одну метрику

In [None]:
Precision(10).calc(df_recs, df)

In [None]:
metrics['prec@10'].calc(df_recs, df)


0.03048

In [None]:
%timeit metrics['prec@10'].calc(df_recs, df)


47.9 ms ± 7.06 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [None]:
metrics['prec@10'].calc_per_user(df_recs, df)


Unnamed: 0_level_0,0
user_id,Unnamed: 1_level_1
24,0.0
263,0.0
477,0.0
509,0.1
522,0.0
...,...
1097062,0.0
1097151,0.1
1097227,0.0
1097343,0.0


# RecTools: валидация "из коробки"

In [None]:
from rectools.models import PopularModel, RandomModel
from rectools.model_selection import cross_validate, TimeRangeSplitter
from rectools.dataset import Dataset
from rectools.metrics import Intersection, MeanInvUserFreq, Serendipity, CatalogCoverage, NDCG, Precision, AvgRecPopularity, CoveredUsers
from rectools.visuals import MetricsApp

In [None]:
dataset = Dataset.construct(interactions)

models = {
    "popular_all": PopularModel(),
    "popular_30": PopularModel.from_params({"period.days": 30}),
    "random": RandomModel(random_state=32),
}

splitter = TimeRangeSplitter(
    test_size="7D",
    n_splits=3,
    filter_already_seen=True,
    filter_cold_items=True,
    filter_cold_users=True,
    )

K = 10
metrics = {
    "precision": Precision(K), # классификация
    "ndcg": NDCG(K, divide_by_achievable=True),  # ранжирование
    "miuf": MeanInvUserFreq(K),  # новизна
    "arp": AvgRecPopularity(K),  # popularity bias
    "serendipity": Serendipity(K),  # WOW effect
    "catalog_coverage": CatalogCoverage(K),  # AggregateDiversity
    "covered_users": CoveredUsers(K),  # Data Quality
    "intersections": Intersection(K),  # Пересечение рекомендаций
}

In [None]:
res = cross_validate(dataset, splitter, metrics, models, K, filter_viewed=True, ref_models=["popular_all"])

In [None]:
pd.DataFrame(res["splits"])

Unnamed: 0,i_split,start,end,train,train_users,train_items,test,test_users,test_items
0,0,2021-08-02,2021-08-09,4266013,797423,15237,263681,98184,6602
1,1,2021-08-09,2021-08-16,4649162,850489,15415,279422,103511,6698
2,2,2021-08-16,2021-08-23,5051815,906071,15577,298878,110076,6679


In [None]:
metrics_df = pd.DataFrame(res["metrics"])
metrics_df

Unnamed: 0,model,i_split,precision,ndcg,miuf,catalog_coverage,arp,serendipity,intersections_popular_all,covered_users
0,popular_30,0,0.040093,0.138811,4.021955,31,66654.115971,1.2e-05,0.71485,1.0
1,random,0,0.000158,0.000364,15.594118,15237,261.624694,5e-06,0.000608,1.0
2,popular_30,1,0.036951,0.12836,4.096282,34,70177.410793,1.2e-05,0.650747,1.0
3,random,1,0.000195,0.000407,15.609541,15415,284.238802,6e-06,0.000616,1.0
4,popular_30,2,0.03621,0.124493,4.15208,36,73925.58424,1e-05,0.604543,1.0
5,random,2,0.000167,0.000345,15.633068,15577,307.026866,6e-06,0.000639,1.0


In [None]:
metrics_df.drop(columns=["i_split"]).groupby("model").mean()

Unnamed: 0_level_0,precision,ndcg,miuf,catalog_coverage,arp,serendipity,intersections_popular_all,covered_users
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
popular_30,0.037751,0.130555,4.090106,33.666667,70252.370335,1.1e-05,0.656713,1.0
random,0.000173,0.000372,15.612242,15409.666667,284.296787,6e-06,0.000621,1.0


In [None]:
app = MetricsApp.construct(metrics_df)

VBox(children=(Tab(children=(VBox(children=(HBox(children=(Checkbox(value=True, description='Average folds'), …

In [None]:
app.fig