# CZ4032 Project 2 - Neural Collaborative Filtering (NCF) Experiments

The code used in this notebook is based on the codebase provided by Microsoft Research. The following is the reference:

Microsoft, “microsoft/recommenders: Best Practices on Recommendation Systems,” GitHub. [Online]. Available: https://github.com/microsoft/recommenders. [Accessed: 22-Nov-2021].



## Initial Setup, Installs, and Imports

In [None]:
%tensorflow_version 1.x

TensorFlow 1.x selected.


In [None]:
cd recommenders/

/content/recommenders


In [None]:
!pip install .

In [None]:
import sys
import pandas as pd
import tensorflow as tf
tf.get_logger().setLevel('ERROR') # only show error messages

from recommenders.utils.timer import Timer
from recommenders.models.ncf.ncf_singlenode import NCF
from recommenders.models.ncf.dataset import Dataset as NCFDataset
from recommenders.datasets import movielens
from recommenders.utils.notebook_utils import is_jupyter
from recommenders.datasets.python_splitters import python_chrono_split
from recommenders.evaluation.python_evaluation import (rmse, mae, rsquared, exp_var, map_at_k, ndcg_at_k, precision_at_k, 
                                                     recall_at_k, get_top_k_items)
from recommenders.utils.constants import SEED as DEFAULT_SEED

print("System version: {}".format(sys.version))
print("Pandas version: {}".format(pd.__version__))
print("Tensorflow version: {}".format(tf.__version__))

System version: 3.7.12 (default, Sep 10 2021, 00:21:48) 
[GCC 7.5.0]
Pandas version: 1.1.5
Tensorflow version: 1.15.2


In [None]:
import warnings
warnings.filterwarnings("ignore")

## Initial Model Parameters

In [None]:
# top k items to recommend
TOP_K = 10

# Select MovieLens data size: 100k, 1m, 10m, or 20m
MOVIELENS_DATA_SIZE = '100k'

# Model parameters
EPOCHS = 50
BATCH_SIZE = 256

SEED = DEFAULT_SEED

In [None]:
df = movielens.load_pandas_df(
    size=MOVIELENS_DATA_SIZE,
    header=["userID", "itemID", "rating", "timestamp"],
    title_col="movie_title"
)

df.head()

100%|██████████| 4.81k/4.81k [00:00<00:00, 11.1kKB/s]


Unnamed: 0,userID,itemID,rating,timestamp,movie_title
0,196,242,3.0,881250949,Kolya (1996)
1,63,242,3.0,875747190,Kolya (1996)
2,226,242,5.0,883888671,Kolya (1996)
3,154,242,3.0,879138235,Kolya (1996)
4,306,242,5.0,876503793,Kolya (1996)


In [None]:
df.shape

(100000, 5)

In [None]:
train, test = python_chrono_split(df, 0.75)

In [None]:
data = NCFDataset(train=train, test=test, seed=SEED)

In [None]:
model = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=4,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=10,
    seed=SEED
)

In [None]:
with Timer() as train_time:
    model.fit(data)

print("Took {} seconds for training.".format(train_time))

Took 305.7687 seconds for training.


In [None]:
with Timer() as test_time:
    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item) 
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)
    all_predictions = all_predictions.drop('timestamp', axis=1)

print("Took {} seconds for prediction.".format(test_time))

Took 3.2560 seconds for prediction.


## Method to recommend k items 

In [None]:
def recommend_k_items(all_predictions, top_k=10, df=df):
    df_ = all_predictions.copy()

    df_sorted = df_.sort_values(['userID'], ascending=True) \
        .groupby(['userID'], sort=False) \
        .apply(lambda x: x.sort_values(['prediction'], ascending=False)) \
        .reset_index(drop=True)

    user_counts = {}
    indexes = []

    for i in range(len(df_sorted)):
        curr_user = int(df_sorted['userID'].iloc[i])

        if not curr_user in user_counts:
            user_counts[curr_user] = 1
            indexes.append(i)
        elif user_counts[curr_user] < top_k:
            user_counts[curr_user] += 1
            indexes.append(i)
        else:
            continue


    df_sorted_top_k = df_sorted.iloc[indexes]
    df_sorted_top_k.reset_index(drop=True, inplace=True)

    id_to_title = {}

    for i in range(len(df)):
        curr_id = df['itemID'].iloc[i]

        if not curr_id in id_to_title:
            id_to_title[curr_id] = df['movie_title'].iloc[i]

    df_sorted_top_k['movie_title'] = ''

    for i in range(len(df_sorted_top_k)):
        curr_id = df_sorted_top_k['itemID'].iloc[i]
        if curr_id in id_to_title:
            df_sorted_top_k['movie_title'].iloc[i] = id_to_title[curr_id]
        else:
            print("No movie title exists for itemID:", curr_id)

    return df_sorted_top_k    

# Experiments

## Top k = 10 (Original)

In [None]:
TOP_K = 10

eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.051711
NDCG:	0.203581
Precision@K:	0.180806
Recall@K:	0.102204


In [None]:
df_sorted_top_10 = recommend_k_items(all_predictions, top_k=TOP_K)
df_sorted_top_10.head(50)

Unnamed: 0,userID,itemID,movie_title,prediction
0,1,100,Fargo (1996),0.946887
1,1,12,"Usual Suspects, The (1995)",0.902238
2,1,475,Trainspotting (1996),0.884382
3,1,405,Mission: Impossible (1996),0.822719
4,1,273,Heat (1995),0.815772
5,1,154,Monty Python's Life of Brian (1979),0.798393
6,1,288,Scream (1996),0.765276
7,1,209,This Is Spinal Tap (1984),0.764259
8,1,318,Schindler's List (1993),0.750901
9,1,210,Indiana Jones and the Last Crusade (1989),0.742695


## Top k = 20

In [None]:
TOP_K=20

In [None]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.068953
NDCG:	0.214761
Precision@K:	0.159279
Recall@K:	0.177134


In [None]:
df_sorted_top_20 = recommend_k_items(all_predictions, top_k=TOP_K)
df_sorted_top_20.head(60)

Unnamed: 0,userID,itemID,movie_title,prediction
0,1,100,Fargo (1996),0.946887
1,1,12,"Usual Suspects, The (1995)",0.902238
2,1,475,Trainspotting (1996),0.884382
3,1,405,Mission: Impossible (1996),0.822719
4,1,273,Heat (1995),0.815772
5,1,154,Monty Python's Life of Brian (1979),0.798393
6,1,288,Scream (1996),0.765276
7,1,209,This Is Spinal Tap (1984),0.764259
8,1,318,Schindler's List (1993),0.750901
9,1,210,Indiana Jones and the Last Crusade (1989),0.742695


## Top k = 5

In [None]:
TOP_K = 5

In [None]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.038237
NDCG:	0.213644
Precision@K:	0.205302
Recall@K:	0.059235


In [None]:
df_sorted_top_5 = recommend_k_items(all_predictions, top_k=TOP_K)

In [None]:
df_sorted_top_5.head(25)

Unnamed: 0,userID,itemID,movie_title,prediction
0,1,100,Fargo (1996),0.946887
1,1,12,"Usual Suspects, The (1995)",0.902238
2,1,475,Trainspotting (1996),0.884382
3,1,405,Mission: Impossible (1996),0.822719
4,1,273,Heat (1995),0.815772
5,2,283,Emma (1996),0.972814
6,2,124,Lone Star (1996),0.958005
7,2,9,Dead Man Walking (1995),0.953126
8,2,15,Mr. Holland's Opus (1995),0.9523
9,2,137,Big Night (1996),0.950962


## Batch Size = 2048

In [None]:
TOP_K = 10
BATCH_SIZE = 2048

In [None]:
from recommenders.models.ncf.ncf_singlenode import NCF

In [None]:
model = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=4,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    seed=SEED
)

In [None]:
with Timer() as train_time:
    model.fit(data)

print("Took {} seconds for training.".format(train_time))

Took 150.2523 seconds for training.


In [None]:
with Timer() as test_time:
    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item) 
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)
    all_predictions = all_predictions.drop('timestamp', axis=1)

print("Took {} seconds for prediction.".format(test_time))

Took 3.3340 seconds for prediction.


In [None]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.048676
NDCG:	0.196176
Precision@K:	0.172428
Recall@K:	0.098011


In [None]:
df_sorted_batch_size_2048 = recommend_k_items(all_predictions, top_k=TOP_K)

In [None]:
df_sorted_batch_size_2048.head(20)

Unnamed: 0,userID,itemID,movie_title,prediction
0,1,100,Fargo (1996),0.902827
1,1,423,E.T. the Extra-Terrestrial (1982),0.857376
2,1,88,Sleepless in Seattle (1993),0.843532
3,1,433,Heathers (1989),0.814742
4,1,12,"Usual Suspects, The (1995)",0.797866
5,1,655,Stand by Me (1986),0.79731
6,1,367,Clueless (1995),0.789017
7,1,451,Grease (1978),0.787894
8,1,496,It's a Wonderful Life (1946),0.777362
9,1,403,Batman (1989),0.775711


## Batch Size = 512

In [None]:
TOP_K = 10
BATCH_SIZE = 512

In [None]:
model = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=4,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    seed=SEED
)

In [None]:
with Timer() as train_time:
    model.fit(data)

print("Took {} seconds for training.".format(train_time))

Took 218.6817 seconds for training.


In [None]:
with Timer() as test_time:
    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item) 
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)
    all_predictions = all_predictions.drop('timestamp', axis=1)

print("Took {} seconds for prediction.".format(test_time))

Took 3.4520 seconds for prediction.


In [None]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.051295
NDCG:	0.205208
Precision@K:	0.183139
Recall@K:	0.103683


In [None]:
df_sorted_batch_size_512 = recommend_k_items(all_predictions, top_k=TOP_K)

In [None]:
df_sorted_batch_size_512.head(20)

Unnamed: 0,userID,itemID,movie_title,prediction
0,1,100,Fargo (1996),0.90476
1,1,12,"Usual Suspects, The (1995)",0.814309
2,1,210,Indiana Jones and the Last Crusade (1989),0.783533
3,1,222,Star Trek: First Contact (1996),0.75702
4,1,405,Mission: Impossible (1996),0.742362
5,1,568,Speed (1994),0.739283
6,1,318,Schindler's List (1993),0.728522
7,1,496,It's a Wonderful Life (1946),0.720418
8,1,357,One Flew Over the Cuckoo's Nest (1975),0.71647
9,1,154,Monty Python's Life of Brian (1979),0.716328


## Layer sizes= [32,16,8]

In [None]:
TOP_K = 10
BATCH_SIZE = 1024
LAYER_SIZES = [32,16,8]

In [None]:
model = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=4,
    layer_sizes=LAYER_SIZES,
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    seed=SEED
)

In [None]:
with Timer() as train_time:
    model.fit(data)

print("Took {} seconds for training.".format(train_time))

Took 174.9608 seconds for training.


In [None]:
with Timer() as test_time:
    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item) 
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)
    all_predictions = all_predictions.drop('timestamp', axis=1)

print("Took {} seconds for prediction.".format(test_time))

Took 3.3413 seconds for prediction.


In [None]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.049375
NDCG:	0.198526
Precision@K:	0.177413
Recall@K:	0.103253


In [None]:
df_sorted_layer_sizes_2x = recommend_k_items(all_predictions, top_k=TOP_K)

## Layer sizes= [8,4,2]

In [None]:
TOP_K = 10
BATCH_SIZE = 1024
LAYER_SIZES = [8,4,2]

In [None]:
model = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=4,
    layer_sizes=LAYER_SIZES,
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    seed=SEED
)

In [None]:
with Timer() as train_time:
    model.fit(data)

print("Took {} seconds for training.".format(train_time))

Took 174.7787 seconds for training.


In [None]:
with Timer() as test_time:
    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item) 
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)
    all_predictions = all_predictions.drop('timestamp', axis=1)

print("Took {} seconds for prediction.".format(test_time))

Took 3.3051 seconds for prediction.


In [None]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.046363
NDCG:	0.195142
Precision@K:	0.178579
Recall@K:	0.097783


In [None]:
df_sorted_layer_sizes_half = recommend_k_items(all_predictions, top_k=TOP_K)

## Layer sizes= [32,16,8] - Custom setup (changed Epochs and LR)

In [None]:
TOP_K = 10
BATCH_SIZE = 1024
LAYER_SIZES = [32,16,8]
EPOCHS=100
LR = 5e-4

In [None]:
model = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=4,
    layer_sizes=LAYER_SIZES,
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=LR,
    seed=SEED
)

In [None]:
with Timer() as train_time:
    model.fit(data)

print("Took {} seconds for training.".format(train_time))

Took 350.2023 seconds for training.


In [None]:
with Timer() as test_time:
    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item) 
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)
    all_predictions = all_predictions.drop('timestamp', axis=1)

print("Took {} seconds for prediction.".format(test_time))

Took 3.3214 seconds for prediction.


In [None]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.046421
NDCG:	0.194284
Precision@K:	0.175398
Recall@K:	0.099244


## Layer sizes - [64,32,16,8,4]

In [None]:
TOP_K = 10
BATCH_SIZE = 1024
LAYER_SIZES = [64,32,16,8,4]
EPOCHS=100
LR = 1e-3

In [None]:
model = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=4,
    layer_sizes=LAYER_SIZES,
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=LR,
    seed=SEED
)

In [None]:
with Timer() as train_time:
    model.fit(data)

print("Took {} seconds for training.".format(train_time))

Took 353.6428 seconds for training.


In [None]:
with Timer() as test_time:
    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item) 
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)
    all_predictions = all_predictions.drop('timestamp', axis=1)

print("Took {} seconds for prediction.".format(test_time))

Took 3.3215 seconds for prediction.


In [None]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.037569
NDCG:	0.166569
Precision@K:	0.155143
Recall@K:	0.087315


## Number of Factors = 8

In [None]:
TOP_K = 10
BATCH_SIZE = 1024
LAYER_SIZES = [32,16,8]
EPOCHS=50
LR = 5e-4
N_FACTORS = 8

In [None]:
model = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=N_FACTORS,
    layer_sizes=LAYER_SIZES,
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=LR,
    seed=SEED
)

In [None]:
with Timer() as train_time:
    model.fit(data)

print("Took {} seconds for training.".format(train_time))

Took 176.2697 seconds for training.


In [None]:
with Timer() as test_time:
    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item) 
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)
    all_predictions = all_predictions.drop('timestamp', axis=1)

print("Took {} seconds for prediction.".format(test_time))

Took 3.3132 seconds for prediction.


In [None]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.051182
NDCG:	0.206517
Precision@K:	0.183245
Recall@K:	0.102796


## Number of Factors = 16

In [None]:
TOP_K = 10
BATCH_SIZE = 1024
LAYER_SIZES = [64,32,16]
EPOCHS=50
LR = 5e-4
N_FACTORS = 16

In [None]:
model = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=N_FACTORS,
    layer_sizes=LAYER_SIZES,
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=LR,
    seed=SEED
)

In [None]:
with Timer() as train_time:
    model.fit(data)

print("Took {} seconds for training.".format(train_time))

Took 174.9821 seconds for training.


In [None]:
with Timer() as test_time:
    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item) 
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)
    all_predictions = all_predictions.drop('timestamp', axis=1)

print("Took {} seconds for prediction.".format(test_time))

Took 3.3067 seconds for prediction.


In [None]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.051469
NDCG:	0.209400
Precision@K:	0.184093
Recall@K:	0.105175


## Number of Factors = 32

In [None]:
TOP_K = 10
BATCH_SIZE = 1024
LAYER_SIZES = [128,64,32]
EPOCHS=150
LR = 1e-4
N_FACTORS = 32

In [None]:
model = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=N_FACTORS,
    layer_sizes=LAYER_SIZES,
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=LR,
    seed=SEED
)

In [None]:
with Timer() as train_time:
    model.fit(data)

print("Took {} seconds for training.".format(train_time))

Took 531.6639 seconds for training.


In [None]:
with Timer() as test_time:
    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item) 
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)
    all_predictions = all_predictions.drop('timestamp', axis=1)

print("Took {} seconds for prediction.".format(test_time))

Took 3.2893 seconds for prediction.


In [None]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.049884
NDCG:	0.206284
Precision@K:	0.186638
Recall@K:	0.102784


# End of Notebook