In [1]:
import pandas as pd

In [2]:
# model output

df_prediction = pd.read_csv('predictions/prediction_nmf.csv')

In [5]:
# all movie ids vs titles

df_movies = pd.read_csv('ml-latest-small/movies.csv', usecols=[0,1], names=['movieId', 'title'])

all_movies_dict = pd.Series(df_movies['title'].values,index=df_movies['movieId']).to_dict()

In [6]:
df_prediction.head()

Unnamed: 0,userId,movieId,predicted_rating,rating
0,1,157,3.201665,5.0
1,1,362,3.930156,5.0
2,1,457,4.612365,5.0
3,1,543,4.011487,4.0
4,1,590,4.099457,4.0


In [7]:
# all users in the validation set 

all_users = df_prediction['userId'].unique().tolist()

In [8]:
# recommendations as per "predicted ratings" in the validation set 

df_prediction1 = df_prediction[['userId', 'movieId', 'predicted_rating']]
df_prediction1['predicted_movies'] = df_prediction1 \
  .apply(lambda x: (x['movieId'], x['predicted_rating']), axis=1)

df_prediction2 = df_prediction1[['userId', 'predicted_movies']]

df_prediction_formatted = df_prediction2 \
  .groupby('userId')['predicted_movies'].apply(list).reset_index(name='recommendation')

df_prediction_sorted = df_prediction_formatted

df_prediction_sorted['recommendation'] = \
  df_prediction_sorted['recommendation'].apply( \
    lambda x: sorted(x, key=lambda tup: tup[1], reverse=True))

df_prediction_sorted.head()

sorted_reco_by_userid = pd.Series( \
  df_prediction_sorted['recommendation'].values,index=df_prediction_sorted['userId']).to_dict()

In [9]:
# this function returns top N recommendations for a given user
# based on the predicted ratings, N = 10 by default

def get_top_n_recommendations(user, N=10):
  top_n_reco = sorted_reco_by_userid[user][:N]
  
  return [all_movies_dict[str(int(x[0]))] for x in top_n_reco]

In [10]:
# recommendations as per "true ratings" in the validation set

df_true1 = df_prediction[['userId', 'movieId', 'rating']]
df_true1['movies'] = df_true1 \
  .apply(lambda x: (x['movieId'], x['rating']), axis=1)

df_true2 = df_true1[['userId', 'movies']]

df_true_formatted = df_true2 \
  .groupby('userId')['movies'].apply(list).reset_index(name='recommendation')

df_true_sorted = df_true_formatted

df_true_sorted['recommendation'] = \
  df_true_sorted['recommendation'].apply( \
    lambda x: sorted(x, key=lambda tup: tup[1], reverse=True))

df_true_sorted.head()

sorted_true_ratings_by_userid = pd.Series( \
  df_true_sorted['recommendation'].values,index=df_true_sorted['userId']).to_dict()

In [11]:
# this function returns top N recommendations for a given user
# based on the true ratings, N = 10 by default

def get_top_n_true_ratings(user, N=10):
  top_n_true_ratings = sorted_true_ratings_by_userid[user][:N]
  
  return [all_movies_dict[str(int(x[0]))] for x in top_n_true_ratings]

In [12]:
# get recommendation for each user in the validation set

top_n_recommendations_by_user = {}

user_out = []
for user in all_users:
  out = []

  # user
  out.append(user)

  # top N recommendations (n = 10 by default)
  top_n_reco_for_user = get_top_n_recommendations(user)
  top_n_recommendations_by_user[user] = top_n_reco_for_user

  top_n_true_ratings_for_user = get_top_n_true_ratings(user)
  true_positives = list(set(top_n_reco_for_user) & set(top_n_true_ratings_for_user))
  false_positives = list(set(top_n_reco_for_user) - set(top_n_true_ratings_for_user))
  false_negatives = list(set(top_n_true_ratings_for_user) - set(top_n_reco_for_user))

  # Compute precision for this user
  precision_for_user = len(true_positives) / float(len(true_positives) + len(false_positives))
  out.append(precision_for_user)

  # Compute recall for this user
  recall_for_user = len(true_positives) / float(len(true_positives) + len(false_negatives))
  out.append(recall_for_user)
  
  user_out.append(out)

In [13]:
# Recommendation evaluation for all users in the validation set

df_out = pd.DataFrame(user_out, columns=['userId', 'precision_user', 'recall_user'])
df_out

Unnamed: 0,userId,precision_user,recall_user
0,1,0.4,0.4
1,2,1.0,1.0
2,3,1.0,1.0
3,4,0.4,0.4
4,5,1.0,1.0
...,...,...,...
605,606,0.3,0.3
606,607,0.1,0.1
607,608,0.1,0.1
608,609,1.0,1.0


In [None]:
# top N recommendations for a user example

top_n_recommendations_by_user[7]