In [19]:
import numpy as np
import pandas as pd
import dask.dataframe as dd
import recsys_metrics as rm
import torch

In [20]:
path = 'ml_100k/'

In [None]:
def user_to_tensors(user_group, user_positives):
    user_id = user_group.name
    positives = set(user_positives.get(user_id, []))
    
    sorted_group = user_group.sort_values('score', ascending=False)
    items = sorted_group['item'].values
    scores = sorted_group['score'].values
    
    target = torch.tensor(
        [1 if item in positives else 0 for item in items],
        dtype=torch.float32
    ).unsqueeze(0)
    
    # Create preds tensor
    preds = torch.tensor(scores, dtype=torch.float32).unsqueeze(0)

    return preds, target


def compute_rank_report(pred_df, user_positives, k=10):

    results = pred_df.groupby('user').apply(
        lambda g: pd.Series(rm.rank_report(
            preds=user_to_tensors(g, user_positives)[0],
            target=user_to_tensors(g, user_positives)[1],
            k=k
        )),
        meta={metric: 'f8' for metric in ['precision', 'recall', 'mean_average_precision', 'mean_reciprocal_rank', 'hit_rate', 'normalized_dcg']}
    ).compute()
    

    return results.mean().to_dict()

pred_df = dd.read_csv(path + 'ranking.tsv', sep='\t', 
                     names=['user', 'item', 'score', 'pred_label'])

test =  pd.read_csv(path + 'test.tsv', sep='\t', 
                names=['user', 'item', 'label','timestamp'])

user_positives = test.groupby('user')['item'].apply(list).to_dict()

metrics = compute_rank_report(pred_df, user_positives, k=10)
print(metrics)

{'precision': 0.10318133616118777, 'recall': 0.2519216330858957, 'mean_average_precision': 0.2202162248144221, 'mean_reciprocal_rank': 0.3353185577942736, 'hit_rate': 0.9692470837751855, 'normalized_dcg': 0.2033918345705197}
