## Import libraries and constants

In [1]:
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from rectools import Columns
from rectools.dataset import Interactions, Dataset
from rectools.models.popular import PopularModel
from rectools.models.random import RandomModel
from rectools.metrics import Precision, Recall, MRR, MAP, MeanInvUserFreq, Serendipity, calc_metrics
from rectools.model_selection import TimeRangeSplitter
from utils import MetricCalculator, VisualAnalyzer

In [2]:
# Constants
INTERACTIONS_PATH = "./data/interactions.csv"
ITEMS_PATH = "./data/items.csv"
K_RECOMMENDATIONS = 10
N_TEST_FOLDS = 3
FOLD_TIME_DURATION = "7D" # 1 week or 7 days
TEST_USERS = [666262, 672861, 955527]
TEST_ITEM_DATA = ["title", "genres", "watch_count"]

## Loading and preparing data

In [3]:
interactions_df = pd.read_csv(INTERACTIONS_PATH)
interactions_df = interactions_df.rename(columns={"watched_pct": "weight", "last_watch_dt": "datetime"})
interactions_df.head(5)

Unnamed: 0,user_id,item_id,datetime,total_dur,weight
0,176549,9506,2021-05-11,4250,72.0
1,699317,1659,2021-05-29,8317,100.0
2,656683,7107,2021-05-09,10,0.0
3,864613,7638,2021-07-05,14483,100.0
4,964868,9506,2021-04-30,6725,100.0


In [4]:
items_df = pd.read_csv(ITEMS_PATH)
items_count_mapping = interactions_df.groupby("item_id")["user_id"].agg("count").to_dict()
items_df["watch_count"] = items_df["item_id"].apply(lambda item_id: items_count_mapping.get(item_id, 0))
items_df.head(5)

Unnamed: 0,item_id,content_type,title,title_orig,release_year,genres,countries,for_kids,age_rating,studios,directors,actors,description,keywords,watch_count
0,10711,film,Поговори с ней,Hable con ella,2002.0,"драмы, зарубежные, детективы, мелодрамы",Испания,,16.0,,Педро Альмодовар,"Адольфо Фернандес, Ана Фернандес, Дарио Гранди...",Мелодрама легендарного Педро Альмодовара «Пого...,"Поговори, ней, 2002, Испания, друзья, любовь, ...",5
1,2508,film,Голые перцы,Search Party,2014.0,"зарубежные, приключения, комедии",США,,16.0,,Скот Армстронг,"Адам Палли, Брайан Хаски, Дж.Б. Смув, Джейсон ...",Уморительная современная комедия на популярную...,"Голые, перцы, 2014, США, друзья, свадьбы, прео...",9
2,10716,film,Тактическая сила,Tactical Force,2011.0,"криминал, зарубежные, триллеры, боевики, комедии",Канада,,16.0,,Адам П. Калтраро,"Адриан Холмс, Даррен Шалави, Джерри Вассерман,...",Профессиональный рестлер Стив Остин («Все или ...,"Тактическая, сила, 2011, Канада, бандиты, ганг...",6
3,7868,film,45 лет,45 Years,2015.0,"драмы, зарубежные, мелодрамы",Великобритания,,16.0,,Эндрю Хэй,"Александра Риддлстон-Барретт, Джеральдин Джейм...","Шарлотта Рэмплинг, Том Кортни, Джеральдин Джей...","45, лет, 2015, Великобритания, брак, жизнь, лю...",2
4,16268,film,Все решает мгновение,,1978.0,"драмы, спорт, советские, мелодрамы",СССР,,12.0,Ленфильм,Виктор Садовский,"Александр Абдулов, Александр Демьяненко, Алекс...",Расчетливая чаровница из советского кинохита «...,"Все, решает, мгновение, 1978, СССР, сильные, ж...",1


## Metrics calculation

In [5]:
models = {
    "random": RandomModel(random_state=32),
    "popular": PopularModel()
}

In [6]:
metrics_map = {
    # Classification metrics
    "precision": Precision,
    "recall": Recall,
    # Ranking metrics
    "mrr": MRR,
    "map": MAP,
    # Beyond-accuracy metrics
    "mean_inv_user_freq": MeanInvUserFreq,
    "serendipity": Serendipity
}
metrics = {}
for k in (1, 5, 10):
    for metric_name in metrics_map:
        Metric = metrics_map[metric_name]
        metric_name_at_k = f"{metric_name}@{k}"
        metrics[metric_name_at_k] = Metric(k=k)

In [7]:
splitter = TimeRangeSplitter(
    test_size=FOLD_TIME_DURATION,
    n_splits=N_TEST_FOLDS,
    filter_already_seen=True,
    filter_cold_items=True,
    filter_cold_users=True,
)

In [8]:
interactions = Interactions(interactions_df)

In [9]:
mc = MetricCalculator(models, metrics, splitter, K_RECOMMENDATIONS, interactions)
report_df = mc.generate_report(show_logs=True)

Test fold: 1, model: random, fitted in: 0 milliseconds, predicted in: 552 milliseconds
Test fold: 1, model: popular, fitted in: 149 milliseconds, predicted in: 463 milliseconds
Test fold: 2, model: random, fitted in: 0 milliseconds, predicted in: 496 milliseconds
Test fold: 2, model: popular, fitted in: 495 milliseconds, predicted in: 934 milliseconds
Test fold: 3, model: random, fitted in: 0 milliseconds, predicted in: 348 milliseconds
Test fold: 3, model: popular, fitted in: 903 milliseconds, predicted in: 401 milliseconds


In [10]:
report_df.head()

Unnamed: 0_level_0,precision@1,recall@1,precision@5,recall@5,precision@10,recall@10,mrr@1,mrr@5,mrr@10,map@1,map@5,map@10,mean_inv_user_freq@1,mean_inv_user_freq@5,mean_inv_user_freq@10,serendipity@1,serendipity@5,serendipity@10
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
random,0.000221,7.2e-05,0.000202,0.000365,0.000193,0.000693,0.000221,0.000485,0.000604,7.2e-05,0.000169,0.000211,15.614137,15.612989,15.613009,6e-06,7e-06,7e-06
popular,0.076432,0.04272,0.052402,0.137413,0.033903,0.173492,0.076432,0.131669,0.138603,0.04272,0.078295,0.084109,2.377055,3.066979,3.71339,2e-06,3e-06,2e-06


## Visual analysis

In [11]:
# For popular model
model = PopularModel()
dataset = Dataset.construct(interactions_df)
model.fit(dataset)

<rectools.models.popular.PopularModel at 0x140ff854be0>

In [12]:
va = VisualAnalyzer(model, dataset, TEST_USERS, TEST_ITEM_DATA, K_RECOMMENDATIONS, items_df)
history_df, reco_df = va.get_history_and_recommendation_dataframes()

In [13]:
history_df.head(10)

Unnamed: 0,user_id,item_id,weight,datetime,title,genres,watch_count
0,666262,93,33.0,2021-07-21,Дом ночных призраков,"зарубежные, криминал, детективы, ужасы",1
1,672861,25,90.0,2021-07-26,Медвежонок Винни и его друзья,"мюзиклы, мультфильм, приключения, комедии",47
2,672861,32,100.0,2021-08-01,В ритме сердца,"драмы, мюзиклы, мелодрамы",181
3,955527,21,63.0,2021-07-20,Признание 5,для взрослых,12


In [14]:
reco_df.head(30)

Unnamed: 0,user_id,item_id,score,rank,title,genres,watch_count
0,666262,10440,202457.0,1,Хрустальный,"триллеры, детективы",202457
3,666262,15297,193123.0,2,Клиника счастья,"драмы, мелодрамы",193123
6,666262,9728,132865.0,3,Гнев человеческий,"боевики, триллеры",132865
9,666262,13865,122119.0,4,Девятаев,"драмы, военные, приключения",122119
12,666262,4151,91167.0,5,Секреты семейной жизни,комедии,91167
15,666262,3734,74803.0,6,Прабабушка легкого поведения,комедии,74803
18,666262,2657,68581.0,7,Подслушано,"драмы, триллеры",68581
21,666262,4880,55043.0,8,Афера,комедии,55043
24,666262,142,45367.0,9,Маша,"драмы, триллеры",45367
27,666262,6809,40372.0,10,Дуров,документальное,40372
