In [None]:
import numpy as np
import pandas as pd

import os
import tempfile
from itertools import islice, cycle
from more_itertools import pairwise
from lightfm import LightFM
from lightfm.data import Dataset

In [3]:
users_df = pd.read_csv(r"/data/users_en.csv")
interactions_df = pd.read_csv(r"/data/interactions.csv")
items_df = pd.read_csv(r"/data/items.csv")

In [121]:
users_df.drop_duplicates(inplace=True)
users_df.reset_index(drop=True, inplace=True)
interactions_df.dropna(inplace=True)
interactions_df.reset_index(drop=True, inplace=True)
items_df.drop_duplicates(inplace=True)
items_df.reset_index(drop=True, inplace=True)

Unnamed: 0,user_id,item_id,last_watch_dt,total_dur,watched_pct
0,176549,9506,2021-05-11,4250,72.0
1,699317,1659,2021-05-29,8317,100.0
2,656683,7107,2021-05-09,10,0.0
3,864613,7638,2021-07-05,14483,100.0
4,964868,9506,2021-04-30,6725,100.0
...,...,...,...,...,...
5475418,648596,12225,2021-08-13,76,0.0
5475419,546862,9673,2021-04-13,2308,49.0
5475420,697262,15297,2021-08-20,18307,63.0
5475421,384202,16197,2021-04-19,6203,100.0


In [13]:
interactions_df=inter_df
test = interactions_df[interactions_df['last_watch_dt'] == interactions_df['last_watch_dt'].max()]
train = interactions_df[interactions_df['last_watch_dt'] < interactions_df['last_watch_dt'].max()]
dataset = Dataset()
dataset.fit((x for x in interactions_df['user_id']), (x for x in interactions_df['item_id']))
(interactions_matrix, weights_matrix) = dataset.build_interactions(
    ((row['user_id'], row['item_id'], row['total_dur']) for idx, row in train.iterrows()))
model = LightFM(loss='warp')
model.fit(interactions_matrix,
          sample_weight=weights_matrix, epochs=30, num_threads=2)
user_ids = dataset.mapping()[0]
item_ids = dataset.mapping()[2]
def get_recommendations(user_id, model, n_items=10):
    user_x = user_ids[user_id]
    scores = model.predict(user_x, list(item_ids.values()))
    top_items = [x for x in np.argsort(-scores)][:n_items]
    return top_items
test['recommended_items'] = test['user_id'].apply(lambda x: get_recommendations(x, model))
print(test[['user_id', 'recommended_items']])

         user_id                               recommended_items
9         203219     [25, 16, 32, 21, 112, 42, 60, 370, 44, 174]
65        125519    [25, 21, 32, 16, 174, 84, 142, 93, 370, 122]
141       626036    [21, 25, 32, 16, 174, 84, 587, 370, 173, 28]
277      1029980       [32, 16, 25, 84, 11, 21, 51, 10, 44, 142]
336       830261    [25, 21, 32, 16, 84, 174, 142, 93, 370, 122]
...          ...                                             ...
5474995   793184    [25, 32, 21, 16, 84, 174, 235, 370, 93, 233]
5475131   376561  [21, 25, 16, 32, 2295, 1143, 84, 174, 338, 92]
5475176   721497    [25, 21, 32, 16, 174, 84, 142, 93, 370, 122]
5475217    48935    [25, 21, 32, 16, 84, 174, 142, 93, 370, 122]
5475331  1039219     [21, 25, 174, 32, 84, 10, 16, 131, 93, 142]

[71171 rows x 2 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['recommended_items'] = test['user_id'].apply(lambda x: get_recommendations(x, model))


In [19]:
from sklearn.metrics import label_ranking_average_precision_score

def apk(actual, predicted, k=10):
    """
    Computes the average precision at k between two lists of items.
    """
    if len(predicted) > k:
        predicted = predicted[:k]

    score = 0.0
    num_hits = 0.0

    for i, p in enumerate(predicted):
        if p in actual and p not in predicted[:i]:
            num_hits += 1.0
            score += num_hits / (i + 1.0)

    if not actual:
        return 0.0

    return score / min(len(actual), k)

def mapk(actual, predicted, k=10):
    """
    Computes the mean average precision at k.
    """
    return np.mean([apk(a, p, k) for a, p in zip(actual, predicted)])

# Assuming test['item_id'] is a list of the actual items the user has interacted with
test['actual_items'] = test['user_id'].apply(lambda x: list(interactions_df[interactions_df['user_id'] == x]['item_id'].values))

# The get_recommendations function needs to map back the item ids to the original ids
def get_recommendations(user_id, model, n_items=10):
    user_x = user_ids.get(user_id, None)
    if user_x is None:
        return []
    scores = model.predict(user_x, np.arange(len(item_ids)))
    top_items = [x for x in np.argsort(-scores)][:n_items]
    return [list(item_ids.keys())[list(item_ids.values()).index(i)] for i in top_items]

test['recommended_items'] = test['user_id'].apply(lambda x: get_recommendations(x, model))

# Compute MAP@10
map_score = mapk(test['actual_items'].tolist(), test['recommended_items'].tolist(), k=10)
print(f"MAP@10: {map_score}")

# If you want to display the result for each user, you can do so like this:
test[['user_id', 'recommended_items', 'actual_items']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['actual_items'] = test['user_id'].apply(lambda x: list(interactions_df[interactions_df['user_id'] == x]['item_id'].values))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['recommended_items'] = test['user_id'].apply(lambda x: get_recommendations(x, model))


MAP@10: 0.19086796327946584


Unnamed: 0,user_id,recommended_items,actual_items
9,203219,"[10440, 13865, 9728, 15297, 4740, 9996, 4495, ...","[13582, 13865, 9996, 14814, 4976, 8693, 4880]"
65,125519,"[10440, 15297, 9728, 13865, 4151, 3734, 4880, ...","[4583, 3475, 10440]"
141,626036,"[15297, 10440, 9728, 13865, 4151, 3734, 14470,...","[11109, 10770, 13865, 10912, 11345, 8893, 2043..."
277,1029980,"[9728, 13865, 10440, 3734, 12173, 15297, 7626,...","[12225, 12173, 15297, 896, 3734, 1287, 3594, 1..."
336,830261,"[10440, 15297, 9728, 13865, 3734, 4151, 4880, ...","[6646, 9070, 341, 7793]"
...,...,...,...
5474995,793184,"[10440, 9728, 15297, 13865, 3734, 4151, 7829, ...","[13865, 11778, 12192, 6402, 15297, 9728, 7059,..."
5475131,376561,"[15297, 10440, 13865, 9728, 5115, 7825, 3734, ...","[9627, 13865, 5115, 10440, 696, 4179, 7825, 71..."
5475176,721497,"[10440, 15297, 9728, 13865, 4151, 3734, 4880, ...",[512]
5475217,48935,"[10440, 15297, 9728, 13865, 3734, 4151, 4880, ...",[14598]


In [25]:
print(f"MAP@10: {map_score}")

MAP@10: 0.19086796327946584
