## Global imports and dataset

In [81]:
import cornac
import surprise
from recommenders.datasets import movielens
from recommenders.utils.constants import SEED
from recommenders.utils.timer import Timer
from recommenders.evaluation.python_evaluation import rmse, ndcg_at_k
from sklearn.model_selection import train_test_split
import recommenders.models.cornac.cornac_utils as cornac_utils
import recommenders.models.surprise.surprise_utils as surprise_utils

In [71]:
# Configuration

# Convert explicit ratings (1-5) to implicit binary rating
implicit_ratings = False
training_split = 0.75

# Number of recommendations for each user (evaluation) 
k = 10

In [72]:
# Import movielens dataset
data = movielens.load_pandas_df(size='100k', header=['userID', 'itemID', 'rating'])
data.head()

100%|██████████| 4.81k/4.81k [00:02<00:00, 1.64kKB/s]


Unnamed: 0,userID,itemID,rating
0,196,242,3.0
1,186,302,3.0
2,22,377,1.0
3,244,51,2.0
4,166,346,1.0


In [73]:
# Convert explicit rating to implicit ratings
if (implicit_ratings):
  data.rating = data.rating.map(lambda x: 1 if x > 3 else 0) 

data.head()

Unnamed: 0,userID,itemID,rating
0,196,242,3.0
1,186,302,3.0
2,22,377,1.0
3,244,51,2.0
4,166,346,1.0


In [74]:
# Split dataset to training and testing set. 75/25
train, test = train_test_split(data, train_size=training_split)

train.shape

(75000, 3)

## Suprise Recommenders

In [75]:
# Convert surprise reader to binary rating scale if implicit ratings
reader = surprise.Reader(rating_scale = (1,5))

if (implicit_ratings):
  reader = surprise.Reader(rating_scale = (0,1))

In [76]:
# Convert train dataset to surprise dataset
trainset = surprise.Dataset.load_from_df(train , reader=reader).build_full_trainset()

trainset

<surprise.trainset.Trainset at 0x7f243471fc70>

### SVD

In [77]:
# Hyperparameters
n_factors = 20
n_epochs = 30

In [78]:
# Train
svd = surprise.SVD(random_state=0, n_factors=n_factors, n_epochs=n_epochs, verbose=True)

with Timer() as train_time:
    svd.fit(trainset)

print(f"Training took {train_time.interval} seconds")

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
Processing epoch 20
Processing epoch 21
Processing epoch 22
Processing epoch 23
Processing epoch 24
Processing epoch 25
Processing epoch 26
Processing epoch 27
Processing epoch 28
Processing epoch 29
Training took 2.0705203999987134 seconds


In [83]:
# View results
predictions = surprise_utils.predict(svd, test, usercol='userID', itemcol='itemID')

predictions.head()

Unnamed: 0,userID,itemID,prediction
0,130,294,3.875115
1,361,657,4.319471
2,916,720,2.262653
3,488,520,3.716537
4,221,550,3.18165


In [84]:
# Test
with Timer() as test_time:
    all_predictions = surprise_utils.compute_ranking_predictions(svd, train, usercol='userID', itemcol='itemID', remove_seen=True)
    
print(f"Testing took {test_time.interval} seconds")

Testing took 33.70395140000073 seconds


In [85]:
# Evaluate
eval_rmse = rmse(test, predictions)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=k)

print("SVD (Surprise) Top@10 results:")
print('----')
print("RMSE:\t%f" % eval_rmse)
print("NDCG:\t%f" % eval_ndcg)

SVD (Surprise) Top@10 results:
----
RMSE:	0.934061
NDCG:	0.116506


### SVD++

In [86]:
# Hyperparameters
n_factors = 20
n_epochs = 30

In [87]:
# Train
svdpp = surprise.SVDpp(random_state=0, n_factors=n_factors, n_epochs=n_epochs, verbose=True)

with Timer() as train_time:
    svdpp.fit(trainset)

print(f"Training took {train_time.interval} seconds")

 processing epoch 0
 processing epoch 1
 processing epoch 2
 processing epoch 3
 processing epoch 4
 processing epoch 5
 processing epoch 6
 processing epoch 7
 processing epoch 8
 processing epoch 9
 processing epoch 10
 processing epoch 11
 processing epoch 12
 processing epoch 13
 processing epoch 14
 processing epoch 15
 processing epoch 16
 processing epoch 17
 processing epoch 18
 processing epoch 19
 processing epoch 20
 processing epoch 21
 processing epoch 22
 processing epoch 23
 processing epoch 24
 processing epoch 25
 processing epoch 26
 processing epoch 27
 processing epoch 28
 processing epoch 29
Training took 54.23632120000002 seconds


In [88]:
# View results
predictions = surprise_utils.predict(svdpp, test, usercol='userID', itemcol='itemID')

predictions.head()

Unnamed: 0,userID,itemID,prediction
0,130,294,4.076305
1,361,657,4.4301
2,916,720,1.92638
3,488,520,3.834405
4,221,550,2.947133


In [89]:
# Test
with Timer() as test_time:
    all_predictions = surprise_utils.compute_ranking_predictions(svdpp, train, usercol='userID', itemcol='itemID', remove_seen=True)
    
print(f"Testing took {test_time.interval} seconds")

Testing took 215.69523010000012 seconds


In [91]:
# Evaluate
eval_rmse = rmse(test, predictions)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=k)

print("SVD++ (Surprise) Top@10 results:")
print('----')
print("RMSE:\t%f" % eval_rmse)
print("NDCG:\t%f" % eval_ndcg)

SVD++ (Surprise) Top@10 results:
----
RMSE:	0.939785
NDCG:	0.115138


### NMF

In [90]:
# Hyperparameters
n_factors = 20
n_epochs = 30

In [92]:
# Train
nmf = surprise.NMF(random_state=0, n_factors=n_factors, n_epochs=n_epochs, verbose=True)

with Timer() as train_time:
    nmf.fit(trainset)

print(f"Training took {train_time.interval} seconds")

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
Processing epoch 20
Processing epoch 21
Processing epoch 22
Processing epoch 23
Processing epoch 24
Processing epoch 25
Processing epoch 26
Processing epoch 27
Processing epoch 28
Processing epoch 29
Training took 2.591307599999709 seconds


In [93]:
# View results
predictions = surprise_utils.predict(nmf, test, usercol='userID', itemcol='itemID')

predictions.head()

Unnamed: 0,userID,itemID,prediction
0,130,294,4.235743
1,361,657,4.714417
2,916,720,3.297722
3,488,520,3.716121
4,221,550,4.271358


In [94]:
# Test
with Timer() as test_time:
    all_predictions = surprise_utils.compute_ranking_predictions(nmf, train, usercol='userID', itemcol='itemID', remove_seen=True)
    
print(f"Testing took {test_time.interval} seconds")

Testing took 27.45095670000046 seconds


In [95]:
# Evaluate
eval_rmse = rmse(test, predictions)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=k)

print("NMF (Surprise) Top@10 results:")
print('----')
print("RMSE:\t%f" % eval_rmse)
print("NDCG:\t%f" % eval_ndcg)

NMF (Surprise) Top@10 results:
----
RMSE:	1.015566
NDCG:	0.050782


## Cronac Recommenders

In [96]:
trainset = cornac.data.Dataset.from_uir(train.itertuples(index = False), seed = SEED)

In [None]:
# ratio_split = cornac.eval_methods.RatioSplit(
#     data=data,
#     test_size=0.2,
#     exclude_unknowns=True,
#     verbose=True,
#     seed=123,
#     rating_threshold=0.8,
# )

### MostPop

In [None]:
# Hyperparameters

# None :)

In [10]:
most_pop = cornac.models.MostPop()

In [11]:
with Timer() as train_time:
    most_pop.fit(trainset)

print(f"Training took {train_time.interval} seconds")

Training took 0.045236100000693114 seconds


In [97]:
# View results
predictions = cornac_utils.predict(most_pop, test, usercol='userID', itemcol='itemID')

predictions.head()

Unnamed: 0,userID,itemID,prediction
0,130,294,5.0
1,361,657,5.0
2,916,720,5.0
3,488,520,5.0
4,221,550,5.0


In [98]:
with Timer() as test_time:
    all_predictions = cornac_utils.predict_ranking(most_pop, train, usercol='userID', itemcol='itemID', remove_seen=True)
    
print(f"Testing took {test_time.interval} seconds")

Testing took 1.6240087000005587 seconds


In [99]:
# Evaluate
eval_rmse = rmse(test, predictions)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=k)

print("MostPop (Cornac) Top@10 results:")
print('----')
print("RMSE:\t%f" % eval_rmse)
print("NDCG:\t%f" % eval_ndcg)


MostPop (Cornac) Top@10 results:
----
RMSE:	1.829153
NDCG:	0.253725


## EASE

In [115]:
# Hyperparameters

lamb=500,
posB=False

In [116]:
sys.path.append('./algorithms')
from EASE import EASE

ease = EASE(lamb=lamb,posB=posB)

In [117]:
with Timer() as train_time:
    ease.fit(trainset)

print(f"Training took {train_time.interval} seconds")

Training took 0.5557379999991099 seconds


In [125]:
len(most_pop.train_set.uid_map)

943

In [121]:
# View results
predictions = cornac_utils.predict(ease, test, usercol='userID', itemcol='itemID')

predictions.head()

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all().

In [105]:
with Timer() as test_time:
    all_predictions = cornacpredict_ranking(ease, test, usercol='userID', itemcol='itemID', remove_seen=True)
    
print(f"Testing took {test_time.interval} seconds")

ValueError: All arrays must be of the same length

In [None]:
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=k)

print("MostPop (Cornac) Top@10 results:")
print('----')
print("NDCG:\t%f" % eval_ndcg)

## BPR

In [None]:
bpr = cornac.models.BPR(
    k=200,
    max_iter=100,
    learning_rate=0.01,
    lambda_reg=0.001,
    verbose=True,
    seed=SEED,
    
)

In [None]:
with Timer() as train_time:
    bpr.fit(trainset)

print(f"Training took {train_time.interval} seconds")