In [40]:
from tensorflow_addons.metrics import RSquare
from keras.models import load_model
import pandas as pd
import joblib

### Mean Average Precision - MAP@k

In [41]:
# modified from:
# author: Ben Hamner
# author's github: benhamner
# link to github: https://github.com/benhamner/Metrics/blob/master/Python/ml_metrics/average_precision.py 

def apk(actual, predicted, k=10):
    if len(predicted)>k:
        predicted = predicted[:k]

    score = 0.0
    num_hits = 0.0

    for i,p in enumerate(predicted):
        if p in actual and p not in predicted[:i]:
            num_hits += 1.0
            score += num_hits / (i+1.0)

    if not actual:
        return 0.0

    return score / min(len(actual), k)

def mapk(actual, predicted, k=10):
    apk_sum = 0.0
    for user in actual:
        if user in predicted:
            apk_sum += apk(actual[user], predicted[user], k)

    return apk_sum / len(actual)

### Mean Reciprocal Rank - MRR

In [42]:
def mrr(actual, predicted):
    mrr_sum = 0.0
    for user in actual:
        if user in predicted:
            rank = 1
            for movie in predicted[user]:
                if movie in actual[user]:
                    mrr_sum += 1.0 / rank
                    break
                rank += 1
    return mrr_sum / len(actual)

-----------

In [43]:
################### Experiment 4 - 1m ###################
 
# ## 1m No context
# test_split_data_path = 'eval_data/test_split_1ml_no_context.csv'
# recsys_data_path = '../data/transform_data/1m_no_context/'
# nn_model_path = '../model/experiment_4/arch5_1m_no_context_max_abs_scaler_gn_trained.keras'
# scaler_file = '1m_no_context_scaler.pkl'

# ## 1m MovieLens context
# test_split_data_path = 'eval_data/test_split_1ml_movielens_context.csv'
# recsys_data_path = '../data/transform_data/1m_movielens_context/'
# nn_model_path = '../model/experiment_4/arch5_1m_movielens_context_max_abs_scaler_gn_trained.keras'
# scaler_file = '1m_movielens_context_scaler.pkl'

# ## 1m Added IMDb context
# test_split_data_path = 'eval_data/test_split_1ml_added_imdb_context.csv'
# recsys_data_path = '../data/transform_data/1m/'
# nn_model_path = '../model/experiment_4/arch5_1m_added_imdb_context_max_abs_scaler_gn_trained.keras'
# scaler_file = '1m_added_imdb_context_scaler.pkl'


################### Experiment 4 - 25m ###################

# ## 25m Added IMDb context
# test_split_data_path = 'eval_data/test_split_25ml_added_imdb_context.csv'
# recsys_data_path = '../data/transform_data/25m/'
# nn_model_path = '../model/experiment_4/arch5_25m_added_imdb_context_max_abs_scaler_gn_trained.keras'
# scaler_file = '25m_added_imdb_context_scaler.pkl'

# ## 25m MovieLens context
# test_split_data_path = 'eval_data/test_split_25ml_movielens_context.csv'
# recsys_data_path = '../data/transform_data/25m_movielens_context/'
# nn_model_path = '../model/experiment_4/arch5_25m_movielens_context_max_abs_scaler_gn_trained.keras'
# scaler_file = '25m_movielens_context_scaler.pkl'

# ## 25m No context
# test_split_data_path = 'eval_data/test_split_25ml_no_context.csv'
# recsys_data_path = '../data/transform_data/25m_no_context/'
# nn_model_path = '../model/experiment_4/arch5_25m_no_context_max_abs_scaler_gn_trained.keras'
# scaler_file = '25m_no_context_scaler.pkl'


################### 25m - profiles ###################

# ## 25m Added IMDb context - PROFILE 1
# test_split_data_path = 'eval_data/test_split_profile_1.csv'
# recsys_data_path = '../data/transform_data/profile_1/'
# nn_model_path = '../model/profiles/arch10_25m_profile_1_gn_trained.keras'
# scaler_file = 'scaler_profile_1.pkl'

# ## 25m Added IMDb context - PROFILE 2
# test_split_data_path = 'eval_data/test_split_profile_2.csv'
# recsys_data_path = '../data/transform_data/profile_2/'
# nn_model_path = '../model/profiles/arch10_25m_profile_2_gn_trained.keras'
# scaler_file = 'scaler_profile_2.pkl'

# ## 25m Added IMDb context - PROFILE 3
# test_split_data_path = 'eval_data/test_split_profile_3.csv'
# recsys_data_path = '../data/transform_data/profile_3/'
# nn_model_path = '../model/profiles/arch10_25m_profile_3_gn_trained.keras'
# scaler_file = 'scaler_profile_3.pkl'


################### 25m - Achritecture 8 vs Achritecture 10 ###################

# ## 25m Added IMDb context - Achritecture 8
# test_split_data_path = 'eval_data/test_split_25ml_added_imdb_context.csv'
# recsys_data_path = '../data/transform_data/25m/'
# nn_model_path = '../model/arch8_arch10/arch8_25m_added_imdb_context_max_abs_scaler_hpt_gn_trained.keras'
# scaler_file = '25m_added_imdb_context_scaler.pkl'

# ## 25m Added IMDb context - Achritecture 10
# test_split_data_path = 'eval_data/test_split_25ml_added_imdb_context.csv'
# recsys_data_path = '../data/transform_data/25m/'
# nn_model_path = '../model/arch8_arch10/arch10_25m_added_imdb_context_max_abs_scaler_gn_trained.keras'
# scaler_file = '25m_added_imdb_context_scaler.pkl'


################### FINAL ################### (25m Added IMDb context)

test_split_data_path = 'eval_data/test_split_25ml_added_imdb_context.csv'
recsys_data_path = '../data/transform_data/25m/'
nn_model_path = '../model/global_recommending_model.keras'
scaler_file = '25m_added_imdb_context_scaler.pkl'

In [44]:
test_ratings = pd.read_csv(test_split_data_path)
test_ratings

Unnamed: 0,userId,movieId,day,isWeekday,season,partOfDay,holiday,movieYear,titleType,isAdult,...,genreNews,genreReality-tv,genreRomance,genreSci-fi,genreShort,genreSport,genreThriller,genreWar,genreWestern,rating
0,43093,1923,3,1,2,4,3,1998,0,0,...,0,0,1,0,0,0,0,0,0,4.0
1,58800,57669,3,1,1,2,2,2008,0,0,...,0,0,0,0,0,0,1,0,0,4.5
2,134109,69075,5,1,2,4,3,1997,0,0,...,0,0,0,0,0,0,0,0,0,2.5
3,141503,1663,5,1,4,1,2,1981,0,0,...,0,0,0,0,0,0,0,1,0,4.5
4,147198,1136,4,1,4,4,2,1975,0,0,...,0,0,0,0,0,0,0,0,0,3.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2498342,107639,3977,6,0,3,4,2,2000,0,0,...,0,0,0,0,0,0,0,0,0,2.5
2498343,22136,2870,1,1,3,4,2,1967,0,0,...,0,0,0,0,0,0,0,0,0,4.0
2498344,162047,7883,7,0,1,3,2,1943,0,0,...,0,0,0,0,0,0,0,0,0,3.5
2498345,99479,54995,1,1,3,4,2,2007,0,0,...,0,0,0,1,0,0,0,0,0,3.0


In [45]:
# number of users to evaluate
n_users = 10
# number of recommendations to make
top_k = 10
# lowest rating to consider as a positive recommendation
low_rating = 4.0

# sort users by number of ratings
test_ratings_all_users = test_ratings['userId'].value_counts().index.tolist()[:n_users]

nn_model = load_model(nn_model_path, custom_objects={'RSquare': RSquare()}, compile=True)

In [46]:
def scale_data(data):
    # Load scaler
    scaler = joblib.load(recsys_data_path + scaler_file)

    # Return scaled data
    return scaler.transform(data)

In [47]:
print(f"Top {n_users} users IDs: {test_ratings_all_users}")

Top 10 users IDs: [72315, 137293, 80974, 33844, 20055, 92046, 49403, 109731, 115102, 75309]


In [48]:
actual = {}
predicted = {}

for user_id in test_ratings_all_users:

    user_test_ratings = test_ratings[test_ratings['userId'] == user_id]

    actual_ratings = user_test_ratings[user_test_ratings['rating'] >= low_rating]
    actual[user_id] = actual_ratings['movieId'].tolist()

    scaled_user_test_ratings = scale_data(user_test_ratings.drop(['rating'], axis=1))
    movie_indices = user_test_ratings['movieId'].values

    predictions = nn_model.predict(scaled_user_test_ratings, verbose=0).flatten()

    predicted_movies = pd.DataFrame({'movieId': movie_indices, 'rating': predictions})
    predicted_movies = predicted_movies.sort_values(by='rating', ascending=False)
    predicted[user_id] = predicted_movies['movieId'][:top_k].tolist()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


----

In [49]:
# MRR
mrr_fin = mrr(actual, predicted)
print('MRR: ', mrr_fin) 

MRR:  1.0


In [50]:
# MAP@K
mapa3 = mapk(actual, predicted, 3)
mapa10 = mapk(actual, predicted, 10)
print('MAP@3:  ', mapa3)
print('MAP@10: ', mapa10)

MAP@3:   0.9
MAP@10:  0.7318015873015873
