In [24]:
import numpy as np
import pandas as pd

from src.utils import timepoint_split, transform_data
from src.evaluation.evaluation import downvote_seen_items, topn_recommendations, model_evaluate

from tqdm import tqdm
import matplotlib as plt

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [58]:
data_movielens = get_movielens_data(include_time=True)
data_movielens.head(5)

Unnamed: 0,userid,movieid,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [59]:
data_yelp  = pd.read_csv('yelp.rating', sep='\t', names='userid,movieid,rating,timestamp'.split(','))
data_yelp.head(5)

Unnamed: 0,userid,movieid,rating,timestamp
0,0,0,4.0,1329148800
1,0,0,4.0,1337011200
2,0,1,4.0,1335888000
3,0,2,4.0,1379260800
4,0,3,2.0,1367856000


In [72]:
datasets = ['Yelp: ', 'MovieLens: ', ]

In [70]:
training_yelp, testset_yelp, holdout_yelp, data_index_yelp = transform_data(*timepoint_split(data_yelp,
                                                                                             time_split_q=0.98))

training_movielens, testset_movielens, holdout_movielens, data_index_movielens = transform_data(*timepoint_split(data_movielens,
                                                                                                                 time_split_q=0.95))

print('Shapes, yelp data: ',training_yelp.shape[0], testset_yelp.shape[0], holdout_yelp.shape[0])
print('Shapes, movielens data: ',training_movielens.shape[0], testset_movielens.shape[0], holdout_movielens.shape[0])

Shapes, yelp data:  508436 197871 4688
Shapes, movielens data:  725370 221697 802


In [53]:
data_description_yelp = dict(
    users = data_index_yelp['users'].name,
    items = data_index_yelp['items'].name,
    feedback = 'rating',
    n_users = len(data_index_yelp['users']),
    n_items = len(data_index_yelp['items']),
)

data_description_movielens = dict(
    users = data_index_movielens['users'].name,
    items = data_index_movielens['items'].name,
    feedback = 'rating',
    n_users = len(data_index_movielens['users']),
    n_items = len(data_index_movielens['items']),
)

In [71]:
# !!! scores_X - результат работы модели X, т.е. np.array шейпа: scores_X.shape=(число юзеров в холдауте, число всех айтемов),
# где элементы - предсказанные скоры айтемов для каждого юзера (не индексы айтемов, а именно скоры).
scores_yelp = ...
scores_movielens = ...

In [None]:
downvote_seen_items(scores_yelp, testset_yelp, data_description_yelp)
downvote_seen_items(scores_movielens, testset_movielens, data_description_movielens)

In [None]:
topn = 10

recs_yelp = topn_recommendations(scores_yelp, topn=topn)
recs_movielens = topn_recommendations(scores_movielens, topn=topn)

In [None]:
hr, mrr, ndcg, cov = model_evaluate(recs_yelp, holdout_yelp, data_description_yelp, topn=topn)
print(datasets[0])
print(f'HR@{topn} = {hr:.4f}')
print(f'MRR@{topn} = {mrr:.4f}')
print(f'nDCG@{topn} = {ndcg:.4f}')
print(f'COV@{topn} = {cov:.4f}')

In [None]:
hr, mrr, ndcg, cov = model_evaluate(recs_movielens, holdout_movielens, data_description_movielens, topn=topn) 
print(datasets[1])
print(f'HR@{topn} = {hr:.4f}')
print(f'MRR@{topn} = {mrr:.4f}')
print(f'nDCG@{topn} = {ndcg:.4f}')
print(f'COV@{topn} = {cov:.4f}')