# **Задание №1**

In [23]:
import pandas as pd
import numpy as np
from itertools import islice, cycle

In [24]:
users_df = pd.read_csv('./users.csv',)
items_df = pd.read_csv('./items.csv',)
interactions_df = pd.read_csv('./interactions.csv', parse_dates=['last_watch_dt'])

**Helpers**

In [50]:
def calc_map10(train, test, recs, top_N):
    test_recs = test.set_index(['user_id', 'item_id']).join(recs.set_index(['user_id', 'item_id']))
    test_recs = test_recs.sort_values(by=['user_id', 'rank'])

    test_recs['users_item_count'] = test_recs.groupby(level='user_id')['rank'].transform(np.size)
    test_recs['cumulative_rank'] = (test_recs.groupby(level='user_id').cumcount() + 1) / test_recs['rank']

    users_count = test_recs.index.get_level_values('user_id').nunique()

    result = (test_recs['cumulative_rank'] / test_recs['users_item_count']).sum() / users_count

    return result
# Baseline
class PopularRecommender():
    def __init__(self, max_K=10, days=30, item_column='item_id', dt_column='date'):
        self.max_K = max_K
        self.days = days
        self.item_column = item_column
        self.dt_column = dt_column
        self.recommendations = []

    def fit(self, df, ):
        min_date = df[self.dt_column].max().normalize() - pd.DateOffset(days=self.days)
        self.recommendations = df.loc[df[self.dt_column] > min_date, self.item_column].value_counts().head(self.max_K).index.values

    def recommend(self, users=None, N=10):
        recs = self.recommendations[:N]
        if users is None:
            return recs
        else:
            return list(islice(cycle([recs]), len(users)))

In [27]:
test = interactions_df[interactions_df['last_watch_dt'] == interactions_df['last_watch_dt'].max()]
train = interactions_df[interactions_df['last_watch_dt'] < interactions_df['last_watch_dt'].max()]

In [43]:
pop_model = PopularRecommender(days=7, dt_column='last_watch_dt')
pop_model.fit(train)

In [44]:
top10_recs = pop_model.recommend()
top10_recs

array([ 9728, 15297, 10440, 13865, 12360, 14488, 12192,   341,   512,
        4151])

In [45]:
item_titles = pd.Series(items_df['title'].values, index=items_df['item_id']).to_dict()

In [46]:
list(map(item_titles.get, top10_recs))

['Гнев человеческий',
 'Клиника счастья',
 'Хрустальный',
 'Девятаев',
 'Круэлла',
 'Мастер меча',
 'Фемида видит',
 'Лето - это море',
 'Рядовой Чээрин',
 'Секреты семейной жизни']

In [47]:
recs = pd.DataFrame({'user_id': test['user_id'].unique()})
top_N = 10
recs['item_id'] = pop_model.recommend(recs['user_id'], N=top_N)
recs.head()

Unnamed: 0,user_id,item_id
0,203219,"[9728, 15297, 10440, 13865, 12360, 14488, 1219..."
1,125519,"[9728, 15297, 10440, 13865, 12360, 14488, 1219..."
2,626036,"[9728, 15297, 10440, 13865, 12360, 14488, 1219..."
3,1029980,"[9728, 15297, 10440, 13865, 12360, 14488, 1219..."
4,830261,"[9728, 15297, 10440, 13865, 12360, 14488, 1219..."


In [48]:
recs = recs.explode('item_id')

In [49]:
recs['rank'] = recs.groupby('user_id').cumcount() + 1
recs.head(top_N + 2)

Unnamed: 0,user_id,item_id,rank
0,203219,9728,1
0,203219,15297,2
0,203219,10440,3
0,203219,13865,4
0,203219,12360,5
0,203219,14488,6
0,203219,12192,7
0,203219,341,8
0,203219,512,9
0,203219,4151,10


In [40]:
'MAP@10', calc_map10(train, test, recs, 10)

('MAP@10', 0.08938255229733759)