# **Задание №1**

In [2]:
import pandas as pd
import numpy as np
from itertools import islice, cycle

In [4]:
users_df = pd.read_csv('/content/users.csv')
items_df = pd.read_csv('/content/items.csv')
interactions_df = pd.read_csv('/content/interactions.csv', parse_dates=['last_watch_dt'])

test_df = pd.read_csv('/content/interactions_test.csv')
train_df = pd.read_csv('/content/interactions_train.csv')

**Helpers**

In [7]:
def calc_map10(train, test, recs, top_N):
    test_recs = test.set_index(['user_id', 'item_id']).join(recs.set_index(['user_id', 'item_id']))
    test_recs = test_recs.sort_values(by=['user_id', 'rank'])

    test_recs['users_item_count'] = test_recs.groupby(level='user_id')['rank'].transform(np.size)
    test_recs['cumulative_rank'] = (test_recs.groupby(level='user_id').cumcount() + 1) / test_recs['rank']

    users_count = test_recs.index.get_level_values('user_id').nunique()

    result = (test_recs['cumulative_rank'] / test_recs['users_item_count']).sum() / users_count

    return result
# Baseline
class PopularRecommender():
    def __init__(self, max_K=10, days=30, item_column='item_id', dt_column='date'):
        self.max_K = max_K
        self.days = days
        self.item_column = item_column
        self.dt_column = dt_column
        self.recommendations = []

    def fit(self, df, ):
        min_date = df[self.dt_column].max().normalize() - pd.DateOffset(days=self.days)
        self.recommendations = df.loc[df[self.dt_column] > min_date, self.item_column].value_counts().head(self.max_K).index.values

    def recommend(self, users=None, N=10):
        recs = self.recommendations[:N]
        if users is None:
            return recs
        else:
            return list(islice(cycle([recs]), len(users)))

In [19]:
test = interactions_df.loc[interactions_df['user_id'].isin(test_df['user_id'])]
train = interactions_df.loc[interactions_df['user_id'].isin(train_df['user_id'])]

In [20]:
pop_model = PopularRecommender(days=7, dt_column='last_watch_dt')
pop_model.fit(train)

In [21]:
top10_recs = pop_model.recommend()
top10_recs

array([14488,  9728, 15297, 12192, 10440,   341,   512, 12360, 13865,
        4151])

In [22]:
item_titles = pd.Series(items_df['title'].values, index=items_df['item_id']).to_dict()

In [23]:
list(map(item_titles.get, top10_recs))

['Мастер меча',
 'Гнев человеческий',
 'Клиника счастья',
 'Фемида видит',
 'Хрустальный',
 'Лето - это море',
 'Рядовой Чээрин',
 'Круэлла',
 'Девятаев',
 'Секреты семейной жизни']

In [24]:
recs = pd.DataFrame({'user_id': test['user_id'].unique()})
top_N = 10
recs['item_id'] = pop_model.recommend(recs['user_id'], N=top_N)
recs.head()

Unnamed: 0,user_id,item_id
0,176549,"[14488, 9728, 15297, 12192, 10440, 341, 512, 1..."
1,699317,"[14488, 9728, 15297, 12192, 10440, 341, 512, 1..."
2,1016458,"[14488, 9728, 15297, 12192, 10440, 341, 512, 1..."
3,203219,"[14488, 9728, 15297, 12192, 10440, 341, 512, 1..."
4,988709,"[14488, 9728, 15297, 12192, 10440, 341, 512, 1..."


In [25]:
recs = recs.explode('item_id')

In [26]:
recs['rank'] = recs.groupby('user_id').cumcount() + 1
recs.head(top_N + 2)

Unnamed: 0,user_id,item_id,rank
0,176549,14488,1
0,176549,9728,2
0,176549,15297,3
0,176549,12192,4
0,176549,10440,5
0,176549,341,6
0,176549,512,7
0,176549,12360,8
0,176549,13865,9
0,176549,4151,10


In [27]:
'MAP@10', calc_map10(train, test, recs, 10)

('MAP@10', 0.07228057530973409)