In [1]:
import tqdm

In [2]:
import sys
sys.path.append('/home/rinchin/recsys_benchmark/daisyRec')

daisyRec version 1.1.2

In [3]:
import daisy.utils

In [120]:
from daisy.utils.metrics import *

In [5]:
import pandas as pd

In [6]:
df_pred = pd.read_csv('./preds_full.csv')
df_test = pd.read_csv('./test.csv')

df_pred_new = pd.merge(
    df_pred, 
    df_test.loc[:, ['user_id', 'item_id', 'relevance']], on=['user_id', 'item_id'], how='left'
)

df_pred_new = df_pred_new[df_pred_new.user_id.isin(df_test.user_id)]

df_pred_new.fillna(0, inplace=True)

df_pred_new.relevance.value_counts()

0.0    21307932
4.5       25887
5.0       12924
Name: relevance, dtype: int64

In [7]:
df_pred_new.head()

Unnamed: 0,user_id,item_id,score,relevance
0,41648,2019,0.610297,0.0
1,41648,7361,0.530178,0.0
2,41648,5618,0.468824,0.0
3,41648,1617,0.427014,0.0
4,41648,5952,0.415578,0.0


In [8]:
df_test.head()

Unnamed: 0,user_id,item_id,rating,timestamp,relevance
0,1256,5378,5.0,2007-10-31 12:18:24,5.0
1,1256,778,4.5,2007-10-31 12:19:51,4.5
2,1256,8376,5.0,2007-10-31 12:30:59,5.0
3,1256,2594,5.0,2007-10-31 12:20:49,5.0
4,1256,30810,4.5,2007-10-31 12:32:24,4.5


In [9]:
recommendations = {}

for user, relevance in zip(df_pred_new.user_id, df_pred_new.relevance):
    if user in recommendations:
        recommendations[user] += [relevance]
    else:
        recommendations[user] = [relevance]

In [79]:
recommendations_top = {}

for user in tqdm.tqdm(recommendations):
    recommendations_top[user] = recommendations[user][:20]

100%|██████████| 2733/2733 [00:00<00:00, 98574.51it/s]


In [54]:
recommendations_top = {}

for user in tqdm.tqdm(df_pred_new.user_id.unique()):
    recommendations_top[user] = df_pred_new[df_pred_new.user_id == user].sort_values('score', ascending=False).relevance[:20]

100%|██████████| 2733/2733 [01:09<00:00, 40.37it/s]


In [113]:
recommendations_top_bin = {k: [1 if x > 0 else 0 for x in v] for k,v in recommendations_top.items()}

In [11]:
gt_recommendations = {}

for user, item_id in zip(df_test.user_id, df_test.item_id):
    if user in gt_recommendations:
        gt_recommendations[user] += [item_id]
    else:
        gt_recommendations[user] = [item_id] 

In [12]:
map_at_k(recommendations_top)

IndexError: invalid index to scalar variable.

Let's fix this small bug:

In [13]:
def map_at_k(rs):
    """
    Mean Average Precision calculation method
    Parameters
    ----------
    rs : Dict, {user : rank items} for test set
    Returns
    -------
    m_a_p : float, MAP value
    """
    m_a_p = np.mean([ap(r) for r in rs.values()])
    return m_a_p

In [14]:
map_at_k(recommendations_top)

0.022970972145948697

In [15]:
hr_at_k(recommendations_top, gt_recommendations)

0.47457006952067327

In [16]:
np.mean([precision_at_k(x, 20) for x in list(recommendations.values())])

0.0575192096597146

In [17]:
np.mean([
    ndcg_at_k(
        df_pred_new[df_pred_new.user_id == user].relevance.tolist(), 
        20
    ) 
    for user in recommendations
])

0.09326437896736813

In [18]:
recall_at_k(recommendations, gt_recommendations, 20)

0.09632131280310641

In [19]:
mrr_at_k(recommendations, 20)

0.2748816881385201

In [20]:
auc_at_k(recommendations_top)

ZeroDivisionError: float division by zero

Bug: https://github.com/AmazingDD/daisyRec/blob/1b7d693f07b892f26eb026d416d86a59a6c3e3a0/daisy/utils/metrics.py#L202

Also let's fix this with adding the condition

In [116]:
def auc_at_k(rs):
    """
    Area Under Curve calculation method
    Parameters
    ----------
    rs : Dict, {user : rank items} for test set
    Returns
    -------
    m_auc : float, AUC value
    """
    uauc = 0.
    for user in rs.keys():
        label_all = rs[user]

        pos_num = len(list(filter(lambda x: x == 1, label_all)))
        neg_num = len(label_all) - pos_num
        

        if pos_num * neg_num == 0:
            continue

        pos_rank_num = 0
        for j in range(len(label_all)):
            if label_all[j] == 1:
                pos_rank_num += j + 1

        auc = (pos_rank_num - pos_num * (pos_num + 1) / 2) / (pos_num * neg_num)
        uauc += auc

    m_auc = uauc / len(rs)

    return m_auc

In [117]:
auc_at_k(recommendations_top_bin)

0.19139559641952136

In [118]:
def auc_at_k(rs):
    """
    Area Under Curve calculation method
    Parameters
    ----------
    rs : Dict, {user : rank items} for test set
    Returns
    -------
    m_auc : float, AUC value
    """
    uauc = 0.
    users_to_exclude = 0
    
    for user in rs.keys():
        label_all = rs[user]

        pos_num = len(list(filter(lambda x: x == 1, label_all)))
        neg_num = len(label_all) - pos_num

        if pos_num * neg_num == 0:
            users_to_exclude += 1
            continue

        pos_rank_num = 0
        for j in range(len(label_all)):
            if label_all[j] == 1:
                pos_rank_num += j + 1

        auc = (pos_rank_num - pos_num * (pos_num + 1) / 2) / (pos_num * neg_num)
        uauc += auc

    m_auc = uauc / (len(rs) - users_to_exclude)

    return m_auc

In [119]:
auc_at_k(recommendations_top_bin)

0.4033031341669637

There may be some variations in the AP formula:

In [None]:
def ap(r, user):
    """
    Average precision calculation method
    Parameters
    ----------
    r : List, Relevance scores (list or numpy) in rank order (first element is the first item)
    Returns
    -------
    a_p : float, Average precision value
    """
    r = np.asarray(r) != 0
    out = [precision_at_k(r, k + 1) for k in range(r.size) if r[k]]
    if not out:
        return 0.
    a_p = np.sum(out) / len(gt_recommendations[user])

    return a_p


def map_at_k(rs):
    """
    Mean Average Precision calculation method
    Parameters
    ----------
    rs : Dict, {user : rank items} for test set
    Returns
    -------
    m_a_p : float, MAP value
    """
    
    return np.mean([ap(r, user) for user, r in rs.items()])

In [None]:
map_at_k(recommendations_top)

In [None]:
def ap(r, user):
    """
    Average precision calculation method
    Parameters
    ----------
    r : List, Relevance scores (list or numpy) in rank order (first element is the first item)
    Returns
    -------
    a_p : float, Average precision value
    """
    r = np.asarray(r) != 0
    out = [r[k] * precision_at_k(r, k + 1) for k in range(r.size)]
#     if not out:
#         return 0.

#     max_good = len(tmp_test[user])
#     max_good = len(r)
    max_good = min(len(gt_recommendations[user]), len(r))
    a_p = np.sum(out) / max_good

    return a_p

In [None]:
map_at_k(recommendations_top)