# Evaluation using recsys_metrics library

In [1]:
import numpy as np
import pandas as pd
import dask.dataframe as dd
import recsys_metrics as rm
import torch

In [3]:
path = 'eva_netflix_full/'

In [None]:
# compute metrics to tensors
def user_to_tensors(user_group, user_positives):
    user_id = user_group.name
    positives = set(user_positives.get(user_id, []))
    
    sorted_group = user_group.sort_values('score', ascending=False)
    items = sorted_group['item'].values
    scores = sorted_group['score'].values
    
    target = torch.tensor(
        [1 if item in positives else 0 for item in items],
        dtype=torch.float32
    ).unsqueeze(0)
    
    # Create preds tensor
    preds = torch.tensor(scores, dtype=torch.float32).unsqueeze(0)

    return preds, target

#https://docs.dask.org/en/stable/generated/dask.dataframe.DataFrame.apply.html
def compute_rank_report(pred_df, user_positives, k=10):

    results = pred_df.groupby('user').apply(
        lambda g: pd.Series(rm.rank_report(
            preds=user_to_tensors(g, user_positives)[0],
            target=user_to_tensors(g, user_positives)[1],
            k=k
        )),
        meta={metric: 'f8' for metric in ['precision', 'recall', 'mean_average_precision', 'mean_reciprocal_rank', 'hit_rate', 'normalized_dcg']}
    ).compute()
    

    return results.mean().to_dict()

pred_df = dd.read_csv(path + 'ranking.tsv', sep='\t', 
                     names=['user', 'item', 'score', 'label'])

test =  pd.read_csv(path + 'test.tsv', sep='\t', 
                names=['user', 'item', 'label','timestamp'])

num_users = test['user'].nunique() 
num_items = test['item'].nunique()
display(num_users, num_items)

user_positives = test.groupby('user')['item'].apply(list).to_dict()

metrics = compute_rank_report(pred_df, user_positives, k=10)
print(metrics)

142765

5331

{'precision': 0.06219336838723468, 'recall': 0.21261241621060742, 'mean_average_precision': 0.12065339700254683, 'mean_reciprocal_rank': 0.16964338305252635, 'hit_rate': 0.7881890045426906, 'normalized_dcg': 0.12822332224487604}
