# Open files

In [None]:
import os
import numpy as np 
import pandas as pd 
import scipy.sparse as sp
from itertools import islice, cycle
from more_itertools import pairwise
from tqdm.auto import tqdm
from datetime import datetime

In [None]:
print('Dataset:')
for dirname, _, filenames in os.walk('input'): # Os_walk генерирует имена файлов в дереве каталогов
    for filename in filenames:
        print(os.path.join(dirname, filename))

Dataset:


In [None]:
data_inter = pd.read_csv('interactions_preprocessed.csv')
data_users = pd.read_csv('users_preprocessed.csv')
data_items = pd.read_csv('items_preprocessed.csv')
data_inter['start_date'] = pd.to_datetime(data_inter['start_date']) # До этого у data_inter['start_date'] был тип object
data_inter.shape, data_users.shape, data_items.shape

((461101, 5), (137254, 3), (63759, 5))

In [None]:
# Добавим пропущенные значения к изначальному фрейму данных, с рандомными колонками(кроме user_id)
lew_line_us_id_10309 = {'user_id':10309, 'item_id': 82910,'progress': 0, 'rating': data_inter['rating'][0], 'start_date' : data_inter['start_date'][0]}
lew_line_us_id_156235 = {'user_id':156235, 'item_id': 257548,'progress': 0, 'rating': data_inter['rating'][0], 'start_date' :data_inter['start_date'][0]}
lew_line_us_id_102147 = {'user_id':102147, 'item_id': 331068,'progress': 0, 'rating': data_inter['rating'][0], 'start_date' :data_inter['start_date'][0]}
data_inter = data_inter.append(lew_line_us_id_10309, ignore_index=True)
data_inter = data_inter.append(lew_line_us_id_156235, ignore_index=True)
data_inter = data_inter.append(lew_line_us_id_102147, ignore_index=True)
data_inter

Unnamed: 0,user_id,item_id,progress,rating,start_date
0,90133,82910.0,100.0,,2018-01-01
1,159130,331068.0,70.0,,2018-01-01
2,80061,26540.0,69.0,4.0,2018-01-01
3,12811,301895.0,16.0,,2018-01-01
4,5778,127872.0,100.0,,2018-01-01
...,...,...,...,...,...
461099,154587,270227.0,100.0,,2018-08-06
461100,835,,,,NaT
461101,10309,82910.0,0.0,,2018-01-01
461102,156235,257548.0,0.0,,2018-01-01


In [None]:
users_inv_mapping = dict(enumerate(data_inter['user_id'].unique()))
users_mapping = {v: k for k, v in users_inv_mapping.items()}
len(users_mapping)

139664

In [None]:
items_inv_mapping = dict(enumerate(data_inter['item_id'].unique()))
items_mapping = {v: k for k, v in items_inv_mapping.items()}
len(items_mapping)

63384

In [None]:
# Приводим к нижнему регистру
data_items['title'] = data_items['title'].str.lower()

In [None]:
item_titles = pd.Series(data_items['title'].values, index=data_items['id']).to_dict()
len(item_titles), item_titles[248031]

(63759, 'ворон-челобитчик')

In [None]:
title_items = data_items.groupby('title')['id'].agg(list) # group by
title_items

title
"бегущий по лезвию 2049", «между нами горы", «борг/макинрой", «жизнь впереди"            [277167.0]
"железная леди" маргарет тетчер - величайшая женщина хх века                             [203030.0]
"зверский детектив" и "боги манго", "мия", "заяц на взлетной полосе", "страница один"    [232845.0]
"рэд 2", «смурфики 2", «византия" и др.                                                  [123247.0]
"сабля, водка, конь гусарский". история гусаров                                          [173381.0]
                                                                                            ...    
…чума на оба ваши дома!                                                                  [341578.0]
№ 12, или история одного прекрасного юноши                                               [386131.0]
伦巴德人的故事                                                                                  [278588.0]
地球への旅                                                                                    [3725

In [None]:
title_count = title_items.map(len)
title_count.value_counts()

1     53608
2      3623
3       488
4       151
5        61
6        23
7        17
8         8
9         5
18        1
10        1
11        1
12        1
35        1
13        1
15        1
51        1
Name: id, dtype: int64

In [None]:
title_items[title_count > 9].tail(n=20) # Регулируя title_count > {число} можно бырать самые попуярные книги

title
возвращение                      [98549.0, 79870.0, 34953.0, 157205.0, 147753.0...
исповедь                         [316236.0, 131052.0, 219194.0, 105144.0, 28253...
полное собрание стихотворений    [68120.0, 316168.0, 201535.0, 195985.0, 49392....
противостояние                   [32261.0, 281928.0, 340975.0, 78019.0, 162415....
рассказы                         [20836.0, 168843.0, 135746.0, 129471.0, 80132....
сказки                           [183603.0, 176799.0, 145698.0, 134323.0, 14955...
стихи                            [105529.0, 65221.0, 94553.0, 96281.0, 261236.0...
стихотворения                    [212134.0, 167446.0, 211242.0, 235727.0, 36292...
Name: id, dtype: object

In [None]:
data_items[data_items['title'] == 'исповедь']

Unnamed: 0,id,title,genres,authors,year
5,316236.0,исповедь,"Стихи и поэзия,Литература 19 века",Михаил Лермонтов,1829-1830
136,131052.0,исповедь,Русская классика,Максим Горький,1908
279,219194.0,исповедь,"Литература 19 века,Русская классика",Лев Толстой,1882
810,105144.0,исповедь,Русская классика,Надежда Тэффи,
16423,282532.0,исповедь,"Книги по философии,Зарубежная образовательная ...",Жан-Жак Руссо,1765-1770
22183,360054.0,исповедь,"Зарубежная религиозная литература,Духовная лит...",Блаженный Августин,
22320,199400.0,исповедь,Зарубежная религиозная и эзотерическая литература,Блаженный Августин,
27645,308300.0,исповедь,Русская классика,Антон Чехов,
35491,191841.0,исповедь,Современная русская литература,Сабит Алиев,
40959,253632.0,исповедь,Русская классика,Максим Горький,


# Кросс валидация

## class TimeRangeSplit()

In [None]:
class TimeRangeSplit():
    """
        https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.date_range.html
    """
    def __init__(self, 
                 start_date, 
                 end_date=None, 
                 freq='D', 
                 periods=None, 
                 tz=None, 
                 normalize=False, 
                 closed=None, 
                 train_min_date=None,
                 filter_cold_users=True, 
                 filter_cold_items=True, 
                 filter_already_seen=True):
        
        self.start_date = start_date
        if end_date is None and periods is None:
            raise ValueError("Either 'end_date' or 'periods' must be non-zero, not both at the same time.")

        self.end_date = end_date
        self.freq = freq
        self.periods = periods
        self.tz = tz
        self.normalize = normalize
        self.closed = closed
        self.train_min_date = pd.to_datetime(train_min_date, errors='raise')
        self.filter_cold_users = filter_cold_users
        self.filter_cold_items = filter_cold_items
        self.filter_already_seen = filter_already_seen

        self.date_range = pd.date_range(
            start=start_date, 
            end=end_date, 
            freq=freq, 
            periods=periods, 
            tz=tz, 
            normalize=normalize, 
            closed=closed)

        self.max_n_splits = max(0, len(self.date_range) - 1)
        if self.max_n_splits == 0:
            raise ValueError("Provided parametrs set an empty date range.") 

    def split(self, 
              df, 
              user_column='user_id',
              item_column='item_id',
              datetime_column='date',
              fold_stats=False):
        df_datetime = df[datetime_column]
        if self.train_min_date is not None:
            train_min_mask = df_datetime >= self.train_min_date
        else:
            train_min_mask = df_datetime.notnull()

        date_range = self.date_range[(self.date_range >= df_datetime.min()) & 
                                     (self.date_range <= df_datetime.max())]

        for start, end in pairwise(date_range):
            fold_info = {
                'Start date': start,
                'End date': end
            }
            train_mask = train_min_mask & (df_datetime < start)
            train_idx = df.index[train_mask]
            if fold_stats:
                fold_info['Train'] = len(train_idx)

            test_mask = (df_datetime >= start) & (df_datetime < end)
            test_idx = df.index[test_mask]
            
            if self.filter_cold_users:
                new = np.setdiff1d(
                    df.loc[test_idx, user_column].unique(), 
                    df.loc[train_idx, user_column].unique())
                new_idx = df.index[test_mask & df[user_column].isin(new)]
                test_idx = np.setdiff1d(test_idx, new_idx)
                test_mask = df.index.isin(test_idx)
                if fold_stats:
                    fold_info['New users'] = len(new)
                    fold_info['New users interactions'] = len(new_idx)

            if self.filter_cold_items:
                new = np.setdiff1d(
                    df.loc[test_idx, item_column].unique(), 
                    df.loc[train_idx, item_column].unique())
                new_idx = df.index[test_mask & df[item_column].isin(new)]
                test_idx = np.setdiff1d(test_idx, new_idx)
                test_mask = df.index.isin(test_idx)
                if fold_stats:
                    fold_info['New items'] = len(new)
                    fold_info['New items interactions'] = len(new_idx)

            if self.filter_already_seen:
                user_item = [user_column, item_column]
                train_pairs = df.loc[train_idx, user_item].set_index(user_item).index
                test_pairs = df.loc[test_idx, user_item].set_index(user_item).index
                intersection = train_pairs.intersection(test_pairs)
                test_idx = test_idx[~test_pairs.isin(intersection)]
                # test_mask = rd.df.index.isin(test_idx)
                if fold_stats:
                    fold_info['Known interactions'] = len(intersection)

            if fold_stats:
                fold_info['Test'] = len(test_idx)

            yield (train_idx, test_idx, fold_info)

    def get_n_splits(self, df, datetime_column='date'):
        df_datetime = df[datetime_column]
        if self.train_min_date is not None:
            df_datetime = df_datetime[df_datetime >= self.train_min_date]

        date_range = self.date_range[(self.date_range >= df_datetime.min()) & 
                                     (self.date_range <= df_datetime.max())]

        return max(0, len(date_range) - 1)

## Сама кросс валидация

In [None]:
last_date = data_inter['start_date'].max().normalize()
folds = 7
start_date = last_date - pd.Timedelta(days=folds)
start_date, last_date

(Timestamp('2018-07-30 00:00:00'), Timestamp('2018-08-06 00:00:00'))

In [None]:
# Своя кросс валидация
cv = TimeRangeSplit(start_date=start_date, periods=folds+1)

cv.max_n_splits, cv.get_n_splits(data_inter, datetime_column='start_date')

(7, 7)

In [None]:
cv.date_range

DatetimeIndex(['2018-07-30', '2018-07-31', '2018-08-01', '2018-08-02',
               '2018-08-03', '2018-08-04', '2018-08-05', '2018-08-06'],
              dtype='datetime64[ns]', freq='D')

In [None]:
folds_with_stats = list(cv.split(
    data_inter, 
    user_column='user_id',
    item_column='item_id',
    datetime_column='start_date',
    fold_stats=True
))

folds_info_with_stats = pd.DataFrame([info for _, _, info in folds_with_stats])

In [None]:
folds_info_with_stats

Unnamed: 0,Start date,End date,Train,New users,New users interactions,New items,New items interactions,Known interactions,Test
0,2018-07-30,2018-07-31,444581,189,217,8,8,0,1970
1,2018-07-31,2018-08-01,446776,175,193,7,7,0,2007
2,2018-08-01,2018-08-02,448983,170,247,5,5,0,1961
3,2018-08-02,2018-08-03,451196,181,202,10,10,0,1947
4,2018-08-03,2018-08-04,453355,182,218,9,9,0,1962
5,2018-08-04,2018-08-05,455544,160,192,9,9,0,2053
6,2018-08-05,2018-08-06,457798,160,180,10,10,0,2004


In [None]:
fold_dates = [(info['Start date'], info['End date']) for _, _, info in folds_with_stats]
fold_dates

[(Timestamp('2018-07-30 00:00:00', freq='D'),
  Timestamp('2018-07-31 00:00:00', freq='D')),
 (Timestamp('2018-07-31 00:00:00', freq='D'),
  Timestamp('2018-08-01 00:00:00', freq='D')),
 (Timestamp('2018-08-01 00:00:00', freq='D'),
  Timestamp('2018-08-02 00:00:00', freq='D')),
 (Timestamp('2018-08-02 00:00:00', freq='D'),
  Timestamp('2018-08-03 00:00:00', freq='D')),
 (Timestamp('2018-08-03 00:00:00', freq='D'),
  Timestamp('2018-08-04 00:00:00', freq='D')),
 (Timestamp('2018-08-04 00:00:00', freq='D'),
  Timestamp('2018-08-05 00:00:00', freq='D')),
 (Timestamp('2018-08-05 00:00:00', freq='D'),
  Timestamp('2018-08-06 00:00:00', freq='D'))]

In [None]:
def compute_metrics(df_true, df_pred, top_N):
    result = {}
    test_recs = df_true.set_index(['user_id', 'item_id']).join(df_pred.set_index(['user_id', 'item_id']))
    test_recs = test_recs.sort_values(by=['user_id', 'rank'])

    test_recs['users_item_count'] = test_recs.groupby(level='user_id')['rank'].transform(np.size)
    test_recs['reciprocal_rank'] = (1 / test_recs['rank']).fillna(0)
    test_recs['cumulative_rank'] = test_recs.groupby(level='user_id').cumcount() + 1
    test_recs['cumulative_rank'] = test_recs['cumulative_rank'] / test_recs['rank']
    
    users_count = test_recs.index.get_level_values('user_id').nunique()
    for k in range(1, top_N + 1):
        hit_k = f'hit@{k}'
        test_recs[hit_k] = test_recs['rank'] <= k
        result[f'Precision@{k}'] = (test_recs[hit_k] / k).sum() / users_count
        result[f'Recall@{k}'] = (test_recs[hit_k] / test_recs['users_item_count']).sum() / users_count

    result[f'MAP@{top_N}'] = (test_recs["cumulative_rank"] / test_recs["users_item_count"]).sum() / users_count
    result[f'MRR'] = test_recs.groupby(level='user_id')['reciprocal_rank'].max().mean()
    return pd.Series(result)

# Тут сама модель машинного обучения

In [None]:
train_idx, test_idx, info = folds_with_stats[0]

train = data_inter.loc[train_idx]
test = data_inter.loc[test_idx]
train.shape, test.shape

((444581, 5), (1970, 5))

In [None]:
info['Train'], info['Test']

(444581, 1970)

In [None]:
def get_coo_matrix(df, 
                   user_col='user_id', 
                   item_col='item_id',
                   weight_col=None, 
                   users_mapping=users_mapping, 
                   items_mapping=items_mapping):
    if weight_col is None:
        weights = np.ones(len(df), dtype=np.float32)
    else:
        weights = df[weight_col].astype(np.float32)

    interaction_matrix = sp.coo_matrix((
        weights, 
        (
            df[user_col].map(users_mapping.get), 
            df[item_col].map(items_mapping.get)
        )
    ))
    return interaction_matrix

In [None]:
train_mat = get_coo_matrix(data_inter).tocsr()
train_mat

<139664x63312 sparse matrix of type '<class 'numpy.float32'>'
	with 444581 stored elements in Compressed Sparse Row format>

In [None]:
pip install implicit

Collecting implicit
[?25l  Downloading https://files.pythonhosted.org/packages/bc/07/c0121884722d16e2c5beeb815f6b84b41cbf22e738e4075f1475be2791bc/implicit-0.4.4.tar.gz (1.1MB)
[K     |▎                               | 10kB 14.3MB/s eta 0:00:01[K     |▋                               | 20kB 22.0MB/s eta 0:00:01[K     |▉                               | 30kB 25.6MB/s eta 0:00:01[K     |█▏                              | 40kB 20.7MB/s eta 0:00:01[K     |█▌                              | 51kB 15.5MB/s eta 0:00:01[K     |█▊                              | 61kB 12.1MB/s eta 0:00:01[K     |██                              | 71kB 13.2MB/s eta 0:00:01[K     |██▍                             | 81kB 14.0MB/s eta 0:00:01[K     |██▋                             | 92kB 14.1MB/s eta 0:00:01[K     |███                             | 102kB 12.9MB/s eta 0:00:01[K     |███▎                            | 112kB 12.9MB/s eta 0:00:01[K     |███▌                            | 122kB 12.9MB/s eta

In [None]:
from implicit.nearest_neighbours import CosineRecommender, BM25Recommender, TFIDFRecommender

In [None]:
cosine_model = BM25Recommender(K=10)
cosine_model.fit(train_mat.T) #

HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))




In [None]:
top_N = 10
user_id = test['user_id'].iloc[0]
row_id = users_mapping[user_id]
print(f'Рекомендации для пользователя {user_id}, номер строки - {row_id}')

Рекомендации для пользователя 109966, номер строки - 87512


In [None]:
recs = cosine_model.recommend(row_id, train_mat, N=top_N, filter_already_liked_items=True)
recs = pd.DataFrame(recs, columns=['col_id', 'similarity'])
recs

Unnamed: 0,col_id,similarity
0,19457,128.63194
1,26671,128.63194
2,19963,120.663653
3,52037,113.624992
4,52204,96.040138
5,19948,86.819328
6,55551,86.819328
7,13509,86.253647
8,59167,84.478973
9,51304,79.690929


In [None]:
recs['item_id'] = recs['col_id'].map(items_inv_mapping.get)
recs['title'] = recs['item_id'].map(item_titles.get)
recs

Unnamed: 0,col_id,similarity,item_id,title
0,19457,128.63194,9683.0,тренинг на основе идей экхарта толле. новая жи...
1,26671,128.63194,36132.0,сорок пять. часть первая
2,19963,120.663653,117805.0,странница
3,52037,113.624992,179200.0,средневековье. самые известные герои истории
4,52204,96.040138,78405.0,108 техник медитаций
5,19948,86.819328,114073.0,линия жизни
6,55551,86.819328,296064.0,12 лучших художников возрождения
7,13509,86.253647,19180.0,рак (21.06 – 22.07)
8,59167,84.478973,240526.0,карьера как стартап. книга 1 – сильные и слабы...
9,51304,79.690929,51004.0,перекресток


In [None]:
def generate_implicit_recs_mapper(model, train_matrix, N, user_mapping, item_inv_mapping):
    def _recs_mapper(user):
        user_id = user_mapping[user]
        recs = model.recommend(user_id, 
                               train_matrix, 
                               N=N, 
                               filter_already_liked_items=True)
        return [item_inv_mapping[item] for item, _ in recs]
    return _recs_mapper

In [None]:
mapper = generate_implicit_recs_mapper(cosine_model, train_mat, top_N, users_mapping, items_inv_mapping)
mapper

<function __main__.generate_implicit_recs_mapper.<locals>._recs_mapper>

In [None]:
%time
recs = pd.DataFrame({
    'user_id': test['user_id'].unique()
})
recs['item_id'] = recs['user_id'].map(mapper)
recs.head()

CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 9.3 µs


Unnamed: 0,user_id,item_id
0,109966,"[9683.0, 36132.0, 117805.0, 179200.0, 78405.0,..."
1,112336,"[284929.0, 383952.0, 233172.0, 159878.0, 81323..."
2,113743,"[255613.0, 3006.0, 193191.0, 236885.0, 190409...."
3,53141,"[235860.0, 293880.0, 369465.0, 225238.0, 13025..."
4,63013,"[119944.0, 102117.0, 145270.0, 309304.0, 19088..."


In [None]:
data_test = pd.read_csv('sample_submission2.csv')
recs = pd.DataFrame({
    'user_id': data_test['user_id'].unique()
})
recs['item_id'] = recs['user_id'].map(mapper)
recs.head(51)

Unnamed: 0,user_id,item_id
0,10001,"[296208, 297168, 235765, 237354, 234997, 27019..."
1,10002,"[378939, 43309, 327688, 240408, 54436, 234926,..."
2,100152,"[82211, 5526, 167562, 210478, 113057, 137774, ..."
3,100197,"[179505, 268152, 267417, 247795, 285127, 35120..."
4,100284,"[343815, 168733, 273724, 312572, 240559, 17074..."
5,10031,"[227479, 199170, 215433, 80963, 195671, 38454,..."
6,100412,"[44860, 129693, 254902, 153176, 382520, 255942..."
7,100428,"[291238, 328949, 155018, 334268, 105894, 15684..."
8,100450,"[81011, 126529, 203071, 213553, 303683, 212290..."
9,100562,"[292437, 138468, 359453, 219762, 313230, 16007..."


In [None]:
recs.to_csv('implict.csv', index=False)

# Выгружаем данные

In [None]:
recs = recs.explode('item_id')
recs['rank'] = recs.groupby('user_id').cumcount() + 1
recs.head(top_N + 2)

Unnamed: 0,user_id,item_id,rank
0,109966,9683,1
0,109966,36132,2
0,109966,117805,3
0,109966,179200,4
0,109966,78405,5
0,109966,114073,6
0,109966,296064,7
0,109966,19180,8
0,109966,240526,9
0,109966,51004,10


In [None]:
metrics = compute_metrics(test, recs, top_N)
metrics

Precision@1     0.009560
Recall@1        0.006612
Precision@2     0.008923
Recall@2        0.012535
Precision@3     0.008073
Recall@3        0.017182
Precision@4     0.007011
Recall@4        0.019598
Precision@5     0.006501
Recall@5        0.022732
Precision@6     0.005736
Recall@6        0.023768
Precision@7     0.005281
Recall@7        0.025467
Precision@8     0.004860
Recall@8        0.026503
Precision@9     0.004674
Recall@9        0.028946
Precision@10    0.004398
Recall@10       0.030540
MAP@10          0.014176
MRR             0.016904
dtype: float64

# Тестируем параметры

In [None]:
for k in range(1,25):
  print('K = ', k)
  cosine_model = BM25Recommender(K=k)
  cosine_model.fit(train_mat.T) #
  top_N = 10
  user_id = test['user_id'].iloc[0]
  # row_id = users_mapping[user_id]
  # recs = cosine_model.recommend(row_id, train_mat, N=top_N, filter_already_liked_items=True)
  # recs = pd.DataFrame(recs, columns=['col_id', 'similarity'])
  # recs['item_id'] = recs['col_id'].map(items_inv_mapping.get)
  # recs['title'] = recs['item_id'].map(item_titles.get)
  mapper = generate_implicit_recs_mapper(cosine_model, train_mat, top_N, users_mapping, items_inv_mapping)
  recs = pd.DataFrame({
    'user_id': test['user_id'].unique()
  })
  recs['item_id'] = recs['user_id'].map(mapper)
  recs = recs.explode('item_id')
  recs['rank'] = recs.groupby('user_id').cumcount() + 1
  metrics = compute_metrics(test, recs, top_N)
  print(metrics)

K =  1


HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))


Precision@1     0.0
Recall@1        0.0
Precision@2     0.0
Recall@2        0.0
Precision@3     0.0
Recall@3        0.0
Precision@4     0.0
Recall@4        0.0
Precision@5     0.0
Recall@5        0.0
Precision@6     0.0
Recall@6        0.0
Precision@7     0.0
Recall@7        0.0
Precision@8     0.0
Recall@8        0.0
Precision@9     0.0
Recall@9        0.0
Precision@10    0.0
Recall@10       0.0
MAP@10          0.0
MRR             0.0
dtype: float64
K =  2


HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))


Precision@1     0.006373
Recall@1        0.004169
Precision@2     0.005099
Recall@2        0.007462
Precision@3     0.004461
Recall@3        0.009905
Precision@4     0.003983
Recall@4        0.011658
Precision@5     0.003569
Recall@5        0.013145
Precision@6     0.003187
Recall@6        0.014420
Precision@7     0.002823
Recall@7        0.014579
Precision@8     0.002549
Recall@8        0.014898
Precision@9     0.002337
Recall@9        0.015110
Precision@10    0.002103
Recall@10       0.015110
MAP@10          0.007718
MRR             0.010503
dtype: float64
K =  3


HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))


Precision@1     0.011472
Recall@1        0.008949
Precision@2     0.008286
Recall@2        0.012641
Precision@3     0.007436
Recall@3        0.017846
Precision@4     0.006055
Recall@4        0.019758
Precision@5     0.005226
Recall@5        0.020926
Precision@6     0.004568
Recall@6        0.021245
Precision@7     0.004097
Recall@7        0.022520
Precision@8     0.003744
Recall@8        0.023051
Precision@9     0.003612
Recall@9        0.025175
Precision@10    0.003314
Recall@10       0.025494
MAP@10          0.014007
MRR             0.016781
dtype: float64
K =  4


HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))


Precision@1     0.011472
Recall@1        0.008843
Precision@2     0.008604
Recall@2        0.012853
Precision@3     0.007861
Recall@3        0.018138
Precision@4     0.006851
Recall@4        0.021112
Precision@5     0.006119
Recall@5        0.023980
Precision@6     0.005311
Recall@6        0.024777
Precision@7     0.004826
Recall@7        0.025945
Precision@8     0.004382
Recall@8        0.026423
Precision@9     0.003966
Recall@9        0.027061
Precision@10    0.003697
Recall@10       0.028017
MAP@10          0.014795
MRR             0.017589
dtype: float64
K =  5


HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))


Precision@1     0.010835
Recall@1        0.007887
Precision@2     0.008286
Recall@2        0.011897
Precision@3     0.007223
Recall@3        0.015907
Precision@4     0.007011
Recall@4        0.020554
Precision@5     0.006373
Recall@5        0.024379
Precision@6     0.005736
Recall@6        0.026291
Precision@7     0.005372
Recall@7        0.028362
Precision@8     0.004700
Recall@8        0.028362
Precision@9     0.004249
Recall@9        0.028574
Precision@10    0.003888
Recall@10       0.029212
MAP@10          0.014259
MRR             0.017304
dtype: float64
K =  6


HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))


Precision@1     0.010198
Recall@1        0.007250
Precision@2     0.009242
Recall@2        0.013809
Precision@3     0.007011
Recall@3        0.015270
Precision@4     0.007489
Recall@4        0.022467
Precision@5     0.006246
Recall@5        0.023210
Precision@6     0.005736
Recall@6        0.025760
Precision@7     0.005372
Recall@7        0.028309
Precision@8     0.004860
Recall@8        0.029584
Precision@9     0.004461
Recall@9        0.030062
Precision@10    0.004015
Recall@10       0.030062
MAP@10          0.014453
MRR             0.017364
dtype: float64
K =  7


HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))


Precision@1     0.009560
Recall@1        0.006612
Precision@2     0.010198
Recall@2        0.014765
Precision@3     0.007436
Recall@3        0.015588
Precision@4     0.007170
Recall@4        0.020554
Precision@5     0.006119
Recall@5        0.021935
Precision@6     0.005630
Recall@6        0.024033
Precision@7     0.005099
Recall@7        0.025627
Precision@8     0.004860
Recall@8        0.028176
Precision@9     0.004532
Recall@9        0.029769
Precision@10    0.004270
Recall@10       0.030311
MAP@10          0.014386
MRR             0.017087
dtype: float64
K =  8


HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))


Precision@1     0.009560
Recall@1        0.006612
Precision@2     0.009879
Recall@2        0.014128
Precision@3     0.007861
Recall@3        0.016545
Precision@4     0.006692
Recall@4        0.018642
Precision@5     0.006119
Recall@5        0.022148
Precision@6     0.005417
Recall@6        0.022971
Precision@7     0.004917
Recall@7        0.024352
Precision@8     0.004780
Recall@8        0.027857
Precision@9     0.004674
Recall@9        0.031044
Precision@10    0.004270
Recall@10       0.031363
MAP@10          0.014307
MRR             0.016987
dtype: float64
K =  9


HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))


Precision@1     0.009560
Recall@1        0.006612
Precision@2     0.009242
Recall@2        0.013172
Precision@3     0.007861
Recall@3        0.016545
Precision@4     0.006851
Recall@4        0.018961
Precision@5     0.006373
Recall@5        0.022573
Precision@6     0.005842
Recall@6        0.024086
Precision@7     0.005372
Recall@7        0.026105
Precision@8     0.004939
Recall@8        0.027459
Precision@9     0.004816
Recall@9        0.030965
Precision@10    0.004653
Recall@10       0.033195
MAP@10          0.014508
MRR             0.017216
dtype: float64
K =  10


HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))


Precision@1     0.009560
Recall@1        0.006612
Precision@2     0.008923
Recall@2        0.012535
Precision@3     0.008073
Recall@3        0.017182
Precision@4     0.007011
Recall@4        0.019598
Precision@5     0.006501
Recall@5        0.022732
Precision@6     0.005736
Recall@6        0.023768
Precision@7     0.005281
Recall@7        0.025467
Precision@8     0.004860
Recall@8        0.026503
Precision@9     0.004674
Recall@9        0.028946
Precision@10    0.004398
Recall@10       0.030540
MAP@10          0.014176
MRR             0.016904
dtype: float64
K =  11


HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))


Precision@1     0.009560
Recall@1        0.006612
Precision@2     0.009242
Recall@2        0.013172
Precision@3     0.007436
Recall@3        0.015270
Precision@4     0.006533
Recall@4        0.018005
Precision@5     0.006119
Recall@5        0.021829
Precision@6     0.005630
Recall@6        0.023290
Precision@7     0.005281
Recall@7        0.025467
Precision@8     0.004939
Recall@8        0.027140
Precision@9     0.004603
Recall@9        0.028309
Precision@10    0.004334
Recall@10       0.029345
MAP@10          0.013917
MRR             0.016572
dtype: float64
K =  12


HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))


Precision@1     0.010835
Recall@1        0.007887
Precision@2     0.009242
Recall@2        0.013172
Precision@3     0.007223
Recall@3        0.014951
Precision@4     0.006214
Recall@4        0.017049
Precision@5     0.005991
Recall@5        0.020661
Precision@6     0.005630
Recall@6        0.023077
Precision@7     0.005281
Recall@7        0.025945
Precision@8     0.005019
Recall@8        0.027778
Precision@9     0.004745
Recall@9        0.029477
Precision@10    0.004461
Recall@10       0.030194
MAP@10          0.014599
MRR             0.017122
dtype: float64
K =  13


HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))


Precision@1     0.010198
Recall@1        0.007250
Precision@2     0.009560
Recall@2        0.013491
Precision@3     0.007223
Recall@3        0.014526
Precision@4     0.006373
Recall@4        0.016943
Precision@5     0.005864
Recall@5        0.020448
Precision@6     0.005736
Recall@6        0.023502
Precision@7     0.005463
Recall@7        0.026158
Precision@8     0.005178
Recall@8        0.028309
Precision@9     0.004674
Recall@9        0.028521
Precision@10    0.004398
Recall@10       0.029876
MAP@10          0.014196
MRR             0.017049
dtype: float64
K =  14


HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))


Precision@1     0.010198
Recall@1        0.007250
Precision@2     0.008923
Recall@2        0.012216
Precision@3     0.007223
Recall@3        0.014526
Precision@4     0.006373
Recall@4        0.016943
Precision@5     0.005609
Recall@5        0.019174
Precision@6     0.005417
Recall@6        0.021590
Precision@7     0.005281
Recall@7        0.025202
Precision@8     0.005178
Recall@8        0.028309
Precision@9     0.004674
Recall@9        0.028521
Precision@10    0.004461
Recall@10       0.030513
MAP@10          0.013942
MRR             0.016789
dtype: float64
K =  15


HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))


Precision@1     0.010835
Recall@1        0.007887
Precision@2     0.008923
Recall@2        0.012535
Precision@3     0.007011
Recall@3        0.013889
Precision@4     0.006214
Recall@4        0.016306
Precision@5     0.005609
Recall@5        0.019174
Precision@6     0.005417
Recall@6        0.021590
Precision@7     0.005190
Recall@7        0.024564
Precision@8     0.004939
Recall@8        0.027194
Precision@9     0.004603
Recall@9        0.028203
Precision@10    0.004398
Recall@10       0.029876
MAP@10          0.014134
MRR             0.016914
dtype: float64
K =  16


HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))


Precision@1     0.010835
Recall@1        0.007887
Precision@2     0.008923
Recall@2        0.012535
Precision@3     0.007223
Recall@3        0.014526
Precision@4     0.006055
Recall@4        0.015987
Precision@5     0.005609
Recall@5        0.019174
Precision@6     0.005417
Recall@6        0.021590
Precision@7     0.005099
Recall@7        0.023927
Precision@8     0.004860
Recall@8        0.026556
Precision@9     0.004603
Recall@9        0.028203
Precision@10    0.004270
Recall@10       0.028920
MAP@10          0.014056
MRR             0.016788
dtype: float64
K =  17


HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))


Precision@1     0.010835
Recall@1        0.007887
Precision@2     0.008923
Recall@2        0.012535
Precision@3     0.006586
Recall@3        0.013358
Precision@4     0.005895
Recall@4        0.015349
Precision@5     0.005481
Recall@5        0.018536
Precision@6     0.005524
Recall@6        0.022228
Precision@7     0.005190
Recall@7        0.024246
Precision@8     0.005099
Recall@8        0.027725
Precision@9     0.004745
Recall@9        0.029212
Precision@10    0.004398
Recall@10       0.029929
MAP@10          0.014134
MRR             0.016584
dtype: float64
K =  18


HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))


Precision@1     0.010835
Recall@1        0.007887
Precision@2     0.008923
Recall@2        0.012535
Precision@3     0.006798
Recall@3        0.013995
Precision@4     0.006055
Recall@4        0.015987
Precision@5     0.005736
Recall@5        0.019492
Precision@6     0.005736
Recall@6        0.023502
Precision@7     0.005554
Recall@7        0.026795
Precision@8     0.005258
Recall@8        0.028999
Precision@9     0.004957
Recall@9        0.031124
Precision@10    0.004589
Recall@10       0.031841
MAP@10          0.014568
MRR             0.016996
dtype: float64
K =  19


HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))


Precision@1     0.010835
Recall@1        0.007887
Precision@2     0.008923
Recall@2        0.012535
Precision@3     0.006798
Recall@3        0.013995
Precision@4     0.005895
Recall@4        0.015774
Precision@5     0.005991
Recall@5        0.020767
Precision@6     0.005630
Recall@6        0.022865
Precision@7     0.005554
Recall@7        0.026795
Precision@8     0.005338
Recall@8        0.029637
Precision@9     0.004957
Recall@9        0.031124
Precision@10    0.004525
Recall@10       0.031204
MAP@10          0.014529
MRR             0.016937
dtype: float64
K =  20


HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))


Precision@1     0.010835
Recall@1        0.007887
Precision@2     0.008923
Recall@2        0.012535
Precision@3     0.006798
Recall@3        0.013995
Precision@4     0.005895
Recall@4        0.015774
Precision@5     0.005736
Recall@5        0.020236
Precision@6     0.005417
Recall@6        0.021909
Precision@7     0.005463
Recall@7        0.026158
Precision@8     0.005258
Recall@8        0.028999
Precision@9     0.004816
Recall@9        0.029849
Precision@10    0.004525
Recall@10       0.031204
MAP@10          0.014455
MRR             0.016830
dtype: float64
K =  21


HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))


Precision@1     0.010835
Recall@1        0.007887
Precision@2     0.008923
Recall@2        0.012535
Precision@3     0.006586
Recall@3        0.013358
Precision@4     0.005895
Recall@4        0.015456
Precision@5     0.005481
Recall@5        0.018961
Precision@6     0.005417
Recall@6        0.021909
Precision@7     0.005281
Recall@7        0.024883
Precision@8     0.005178
Recall@8        0.028362
Precision@9     0.004816
Recall@9        0.029849
Precision@10    0.004398
Recall@10       0.029929
MAP@10          0.014200
MRR             0.016543
dtype: float64
K =  22


HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))


Precision@1     0.010835
Recall@1        0.007887
Precision@2     0.008923
Recall@2        0.012535
Precision@3     0.006586
Recall@3        0.013358
Precision@4     0.005736
Recall@4        0.015137
Precision@5     0.005609
Recall@5        0.019598
Precision@6     0.005417
Recall@6        0.022334
Precision@7     0.005372
Recall@7        0.025839
Precision@8     0.005099
Recall@8        0.027725
Precision@9     0.004886
Recall@9        0.030487
Precision@10    0.004525
Recall@10       0.031204
MAP@10          0.014358
MRR             0.016669
dtype: float64
K =  23


HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))


Precision@1     0.010835
Recall@1        0.007887
Precision@2     0.008604
Recall@2        0.011897
Precision@3     0.006586
Recall@3        0.013358
Precision@4     0.005577
Recall@4        0.014500
Precision@5     0.005609
Recall@5        0.019598
Precision@6     0.005417
Recall@6        0.022334
Precision@7     0.005372
Recall@7        0.025839
Precision@8     0.004939
Recall@8        0.026769
Precision@9     0.004886
Recall@9        0.030487
Precision@10    0.004461
Recall@10       0.030566
MAP@10          0.014143
MRR             0.016450
dtype: float64
K =  24


HBox(children=(FloatProgress(value=0.0, max=63312.0), HTML(value='')))


Precision@1     0.010835
Recall@1        0.007887
Precision@2     0.008923
Recall@2        0.012110
Precision@3     0.006798
Recall@3        0.013570
Precision@4     0.005736
Recall@4        0.014712
Precision@5     0.005609
Recall@5        0.019174
Precision@6     0.005417
Recall@6        0.021909
Precision@7     0.005372
Recall@7        0.025839
Precision@8     0.004860
Recall@8        0.026556
Precision@9     0.004745
Recall@9        0.029159
Precision@10    0.004589
Recall@10       0.031363
MAP@10          0.014239
MRR             0.016762
dtype: float64
