In [52]:
from utils.load_dataset import load_dataset
from spotlight.interactions import Interactions
from spotlight.cross_validation import random_train_test_split, user_based_train_test_split
from spotlight.factorization.explicit import ExplicitFactorizationModel
from spotlight.factorization.implicit import ImplicitFactorizationModel
from spotlight.evaluation import rmse_score, mrr_score, precision_recall_score, sequence_mrr_score, sequence_precision_recall_score
import numpy as np
from spotlight.sequence.implicit import ImplicitSequenceModel

In [53]:
USERS_NUMBER = 10_000
RANDOM_STATE = 42

In [54]:
ds_df = load_dataset()
users = ds_df['ratings']['UserID'].drop_duplicates().sample(USERS_NUMBER, random_state=RANDOM_STATE)
ds_df_filter = ds_df['ratings'][ds_df['ratings']['UserID'].isin(users)]
len(ds_df_filter) / len(ds_df['ratings'])

0.14446593433054805

In [55]:
ds_df_filter

Unnamed: 0,UserID,MovieID,Rating,Timestamp
4435,40,34,5.0,945889233
4436,40,36,4.0,945889346
4437,40,50,5.0,945889117
4438,40,150,3.0,945889313
4439,40,174,3.0,945876902
...,...,...,...,...
9999946,71565,3789,4.0,974295682
9999947,71565,3808,4.0,974295467
9999948,71565,3811,4.0,974295234
9999949,71565,3812,4.0,974294786


In [56]:
interactions = Interactions(
    user_ids=ds_df_filter['UserID'].to_numpy(dtype=np.int32),
    item_ids=ds_df_filter['MovieID'].to_numpy(dtype=np.int32),
    ratings=ds_df_filter['Rating'].to_numpy(dtype=np.float32),
    timestamps=ds_df_filter['Timestamp'].to_numpy(dtype=np.int32)
)

In [57]:
train, test = random_train_test_split(interactions, test_percentage=0.1)

In [58]:
len(test), len(train)

(144467, 1300200)

## ExplicitFactorizationModel

In [71]:
ef_model = ExplicitFactorizationModel(n_iter=10, use_cuda=True)

In [72]:
ef_model.fit(train, verbose=True)

Epoch 0: loss 1.486768904857031
Epoch 1: loss 0.9071326319404793
Epoch 2: loss 0.8639572547232689
Epoch 3: loss 0.8432541418953378
Epoch 4: loss 0.8290304625144646
Epoch 5: loss 0.8252826363675635
Epoch 6: loss 0.8215568530838845
Epoch 7: loss 0.8213962489670481
Epoch 8: loss 0.8168330821119723
Epoch 9: loss 0.8171691264569184


In [73]:
ef_rmse = rmse_score(ef_model, test)

In [74]:
ef_mrr = 0.core(ef_model, test)

In [75]:
ef_pr = precision_recall_score(ef_model, test, k=10)

In [76]:
print(ef_rmse)

0.97768897


In [77]:
print(ef_mrr)

[0.00041529 0.00040294 0.00021148 ... 0.0005501  0.00021661 0.00050254]


In [78]:
print(np.mean(ef_mrr), np.std(ef_mrr))

0.0003903954615485635 0.000410927553458754


In [79]:
print(np.mean(ef_pr[0]), np.std(ef_pr[0]))

0.00010209290454313425 0.003193566578788576


In [80]:
print(np.mean(ef_pr[1]), np.std(ef_pr[1]))

2.0753514862900123e-05 0.0008413224929900213


## ImplicitFactorizationModel

In [81]:
if_model = ImplicitFactorizationModel(n_iter=10, use_cuda=True)

In [82]:
if_model.fit(train, verbose=True)

Epoch 0: loss 0.1704829562580839
Epoch 1: loss 0.10005596098490976
Epoch 2: loss 0.09231385266066161
Epoch 3: loss 0.0862474008469741
Epoch 4: loss 0.08103210529997185
Epoch 5: loss 0.07653650188744127
Epoch 6: loss 0.07298732838965842
Epoch 7: loss 0.06998453474379702
Epoch 8: loss 0.0671835051092911
Epoch 9: loss 0.06526695993138676


In [83]:
if_rmse = rmse_score(if_model, test)

In [84]:
if_mrr = mrr_score(if_model, test)

In [85]:
if_pr = precision_recall_score(if_model, test, k=10)

In [86]:
print(if_rmse)

37.853374


In [87]:
print(if_mrr)

[0.04133573 0.00281147 0.06450852 ... 0.00513433 0.17176614 0.00344302]


In [88]:
print(np.mean(if_mrr), np.std(if_mrr))

0.013948668381919981 0.03315449451630753


In [89]:
print(np.mean(if_pr[0]), np.std(if_pr[0]))

0.02082695252679939 0.047361524527483545


In [90]:
print(np.mean(if_pr[1]), np.std(if_pr[1]))

0.024785313779040523 0.08008777106750527


## Sequence models

In [91]:
pre_seq_train, pre_seq_test = user_based_train_test_split(interactions, test_percentage=0.1)

In [92]:
seq_train = pre_seq_train.to_sequence()
seq_test = pre_seq_test.to_sequence()

### Pooling

In [93]:
seq_pool_model = ImplicitSequenceModel(n_iter=20, representation='pooling')
seq_pool_model.fit(seq_train, verbose=True)

Epoch 0: loss 0.29253047211768507
Epoch 1: loss 0.16279941417645138
Epoch 2: loss 0.1460761603520397
Epoch 3: loss 0.1341611057689888
Epoch 4: loss 0.12490408521981294
Epoch 5: loss 0.11780699059197204
Epoch 6: loss 0.11120910405716063
Epoch 7: loss 0.10532643219647299
Epoch 8: loss 0.10136260065345709
Epoch 9: loss 0.09756145653219278
Epoch 10: loss 0.0944088946022235
Epoch 11: loss 0.09112286757493654
Epoch 12: loss 0.08885273401104905
Epoch 13: loss 0.08599191792167638
Epoch 14: loss 0.08375510940250334
Epoch 15: loss 0.08161894685301944
Epoch 16: loss 0.07942156349893079
Epoch 17: loss 0.07737731471827729
Epoch 18: loss 0.07620817096372748
Epoch 19: loss 0.07400390063146901


In [94]:
seq_pool_mrr = sequence_mrr_score(seq_pool_model, seq_test)

In [95]:
print(np.mean(seq_pool_mrr), np.std(seq_pool_mrr))

0.012973217849541157 0.06601030551110476


### LSTM

In [96]:
seq_lstm_model = ImplicitSequenceModel(n_iter=20, representation='lstm')
seq_lstm_model.fit(seq_train, verbose=True)

Epoch 0: loss 0.16993749386740728
Epoch 1: loss 0.10828359423651442
Epoch 2: loss 0.10666997267549481
Epoch 3: loss 0.10611408567870524
Epoch 4: loss 0.1066096091480083
Epoch 5: loss 0.10602178578990948
Epoch 6: loss 0.10585490445617034
Epoch 7: loss 0.1059329775746331
Epoch 8: loss 0.10599722992274697
Epoch 9: loss 0.10599673175369832
Epoch 10: loss 0.10590842204742106
Epoch 11: loss 0.10581618700417275
Epoch 12: loss 0.10586496170596024
Epoch 13: loss 0.10564454499023042
Epoch 14: loss 0.10525114229480123
Epoch 15: loss 0.10533809191600452
Epoch 16: loss 0.1060119821714811
Epoch 17: loss 0.10573159816883816
Epoch 18: loss 0.10574374046219166
Epoch 19: loss 0.10629778081983214


In [97]:
seq_lstm_mrr = sequence_mrr_score(seq_lstm_model, seq_test)

In [98]:
print(np.mean(seq_lstm_mrr), np.std(seq_lstm_mrr))

0.00967996445874615 0.048870198892012925


## CNN

In [99]:
seq_cnn_model = ImplicitSequenceModel(n_iter=20, representation='cnn')
seq_cnn_model.fit(seq_train, verbose=True)

Epoch 0: loss 0.16685462857777175
Epoch 1: loss 0.10863340487384977
Epoch 2: loss 0.10819457441625033
Epoch 3: loss 0.10742456298817246
Epoch 4: loss 0.10644947406003684
Epoch 5: loss 0.1057167087925931
Epoch 6: loss 0.10169929192320022
Epoch 7: loss 0.09756805802989368
Epoch 8: loss 0.09475171761481027
Epoch 9: loss 0.09222896708735041
Epoch 10: loss 0.09091078337041145
Epoch 11: loss 0.08921988074713787
Epoch 12: loss 0.08778431022915097
Epoch 13: loss 0.0864443590602494
Epoch 14: loss 0.0858871587499013
Epoch 15: loss 0.08466778539772722
Epoch 16: loss 0.08347535817512088
Epoch 17: loss 0.08235619545776128
Epoch 18: loss 0.08174779659894936
Epoch 19: loss 0.0806023338845021


In [100]:
seq_cnn_mrr = sequence_mrr_score(seq_cnn_model, seq_test)

In [101]:
print(np.mean(seq_cnn_mrr), np.std(seq_cnn_mrr))

0.00819898759082587 0.05235291331049043
