In [1]:
import pandas as pd
import cornac

FM model is only supported on Linux.
Windows executable can be found at http://www.libfm.org.


In [2]:
from cornac.models import MF, EMF, NEMF, ALS
from cornac.explainer import EMFExplainer, ALSExplainer, PHI4MFExplainer

In [3]:
path = '../tests/dataset/good_read_UIR_sample.csv'
df = pd.read_csv(path, sep='\t', header=0, names=['user_id', 'item_id', 'rating'])
data = df[['user_id', 'item_id', 'rating']].values
ratio_split = cornac.eval_methods.RatioSplit(data=data, test_size=0.2, verbose=True)

rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 25692
Number of items = 29744
Number of ratings = 103377
Max rating = 5.0
Min rating = 0.0
Global mean = 3.8
---
Test data:
Number of users = 9110
Number of items = 6836
Number of ratings = 18934
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 25692
Total items = 29744


In [4]:
def experiment_mf(model):
    metrics = [cornac.metrics.MAE(), cornac.metrics.RMSE(), cornac.metrics.Precision(k=10), cornac.metrics.Recall(k=10), cornac.metrics.NDCG(k=10), cornac.metrics.AUC(), cornac.metrics.MAP()]
    cornac.Experiment(eval_method=ratio_split, models=[model], metrics=metrics).run()

In [5]:
mf = MF(k=10, max_iter=200, learning_rate=0.001, lambda_reg=0.001, verbose=True, seed=6, num_threads=6, early_stop=True)
emf = EMF(k=10, max_iter=200, learning_rate=0.001, lambda_reg=0.01, explain_reg=0.01, verbose=True, seed=6, num_threads=6, early_stop=True)
nemf = NEMF(k=10, max_iter=200, learning_rate=0.001, lambda_reg=0.01, explain_reg=0.01, novel_reg=0.01, verbose=True, seed=6, num_threads=6, early_stop=True)
als = ALS(k=10, max_iter=200, lambda_reg=0.001, alpha=1, verbose=True, seed=6, num_threads=6)

In [6]:
experiment_mf(mf)


[MF] Training started!


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!

[MF] Evaluation started!


Rating:   0%|          | 0/19075 [00:00<?, ?it/s]

Ranking:   0%|          | 0/9220 [00:00<?, ?it/s]


TEST:
...
   |    MAE |   RMSE |    AUC |    MAP | NDCG@10 | Precision@10 | Recall@10 | Train (s) | Test (s)
-- + ------ + ------ + ------ + ------ + ------- + ------------ + --------- + --------- + --------
MF | 0.8449 | 0.8972 | 0.5975 | 0.0020 |  0.0017 |       0.0008 |    0.0041 |    1.1122 |  64.8875



In [7]:
experiment_mf(emf)


[EMF] Training started!


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!

[EMF] Evaluation started!


Rating:   0%|          | 0/18971 [00:00<?, ?it/s]

Ranking:   0%|          | 0/9156 [00:00<?, ?it/s]


TEST:
...
    |    MAE |   RMSE |    AUC |    MAP | NDCG@10 | Precision@10 | Recall@10 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------- + ------------ + --------- + --------- + --------
EMF | 0.9702 | 1.0347 | 0.5401 | 0.0027 |  0.0026 |       0.0008 |    0.0054 |   99.4979 |  76.1230



In [8]:
experiment_mf(nemf)


[NEMF] Training started!
Start compute edge weight matrix...
Start compute novel matrix...
Matrix computation finished!


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!

[NEMF] Evaluation started!


Rating:   0%|          | 0/18971 [00:00<?, ?it/s]

Ranking:   0%|          | 0/9156 [00:00<?, ?it/s]


TEST:
...
     |    MAE |   RMSE |    AUC |    MAP | NDCG@10 | Precision@10 | Recall@10 | Train (s) | Test (s)
---- + ------ + ------ + ------ + ------ + ------- + ------------ + --------- + --------- + --------
NEMF | 0.8577 | 0.9138 | 0.5725 | 0.0023 |  0.0028 |       0.0013 |    0.0046 |  145.1590 |  77.6844



In [6]:
experiment_mf(als)


[ALS] Training started!




  0%|          | 0/200 [00:00<?, ?it/s]

INFO:implicit:Final training loss 0.0004



[ALS] Evaluation started!


Rating:   0%|          | 0/18934 [00:00<?, ?it/s]

Ranking:   0%|          | 0/9110 [00:00<?, ?it/s]


TEST:
...
    |    MAE |   RMSE |    AUC |    MAP | NDCG@10 | Precision@10 | Recall@10 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------- + ------------ + --------- + --------- + --------
ALS | 3.7788 | 3.8253 | 0.8116 | 0.0472 |  0.0592 |       0.0187 |    0.1044 |   27.8736 |  71.9747



In [33]:
users = {'5f0d7ea4515a98abebea35cec77f864c',
         '419d645596370839d4c723c0f8661b3b',
         'cc9203c7b89299484b9a6695d947869c',
         '7486fc29bb00a3b83f323214f7552ca6',
         'd02090eba4c74022bee5a3b661ef487d'}

In [35]:
recommendations = emf.recommend(users, 3)
emf_explainer = EMFExplainer(emf, ratio_split.train_set)
explanations = emf_explainer.explain_recommendations(recommendations)
print(explanations)

Computing explanations:   0%|          | 0/15 [00:00<?, ?it/s]

                             user_id   item_id  prediction  \
0   5f0d7ea4515a98abebea35cec77f864c  26723194    5.162625   
1   5f0d7ea4515a98abebea35cec77f864c    395962    4.965280   
2   5f0d7ea4515a98abebea35cec77f864c  23381014    4.889035   
3   cc9203c7b89299484b9a6695d947869c   3591262    5.118013   
4   cc9203c7b89299484b9a6695d947869c     92144    4.716733   
5   cc9203c7b89299484b9a6695d947869c   1118668    4.617853   
6   7486fc29bb00a3b83f323214f7552ca6  18594409    4.921811   
7   7486fc29bb00a3b83f323214f7552ca6  29095428    4.874328   
8   7486fc29bb00a3b83f323214f7552ca6  13623150    4.822907   
9   419d645596370839d4c723c0f8661b3b  33916178    5.208796   
10  419d645596370839d4c723c0f8661b3b  18809475    5.049128   
11  419d645596370839d4c723c0f8661b3b  26827419    4.944519   
12  d02090eba4c74022bee5a3b661ef487d     48002    4.917584   
13  d02090eba4c74022bee5a3b661ef487d  18594409    4.868793   
14  d02090eba4c74022bee5a3b661ef487d     22871    4.862644   

       

In [7]:
users = ['8e7e5b546a63cb9add8431ee6914cf59',
         'cb4527a42f29840bd0933a8a0a6f33f6',
         '5a1de0997778a6c9c4adff89a5f02de8',
         'fae5119b1177246f4f5fef9ca0c8fcc6',
         '5a1de0997778a6c9c4adff89a5f02de8',
         'd029dc3d13a17f66da3eda6454afd5d6']

In [9]:
recommendations = mf.recommend(users, 3)
phi_explainer = PHI4MFExplainer(mf, ratio_split.train_set, 0.001, 0.001, 0.01)
explanations = phi_explainer.explain_recommendations(recommendations)
print(explanations)

Computing explanations:   0%|          | 0/18 [00:00<?, ?it/s]

                             user_id   item_id  prediction  \
0   8e7e5b546a63cb9add8431ee6914cf59   8667848    6.352953   
1   8e7e5b546a63cb9add8431ee6914cf59  16068905    5.163914   
2   8e7e5b546a63cb9add8431ee6914cf59   2118745    4.686979   
3   cb4527a42f29840bd0933a8a0a6f33f6  16068905    4.696466   
4   cb4527a42f29840bd0933a8a0a6f33f6  17370618    4.494055   
5   cb4527a42f29840bd0933a8a0a6f33f6  11043410    4.446526   
6   5a1de0997778a6c9c4adff89a5f02de8  17370618    4.329241   
7   5a1de0997778a6c9c4adff89a5f02de8  11043410    4.285030   
8   5a1de0997778a6c9c4adff89a5f02de8    105986    4.267663   
9   fae5119b1177246f4f5fef9ca0c8fcc6  11857408    4.982469   
10  fae5119b1177246f4f5fef9ca0c8fcc6  16068905    4.777263   
11  fae5119b1177246f4f5fef9ca0c8fcc6  17370618    4.476998   
12  5a1de0997778a6c9c4adff89a5f02de8  17370618    4.329241   
13  5a1de0997778a6c9c4adff89a5f02de8  11043410    4.285030   
14  5a1de0997778a6c9c4adff89a5f02de8    105986    4.267663   
15  d029

In [8]:
recommendations = als.recommend(users, 3)
als_explainer = ALSExplainer(als, ratio_split.train_set)
explanations = als_explainer.explain_recommendations(recommendations)
print(explanations)

Computing explanations:   0%|          | 0/18 [00:00<?, ?it/s]

                             user_id   item_id  prediction  \
0   8e7e5b546a63cb9add8431ee6914cf59    100915    2.083831   
1   8e7e5b546a63cb9add8431ee6914cf59   5310515    1.826583   
2   8e7e5b546a63cb9add8431ee6914cf59  12127750    1.780168   
3   cb4527a42f29840bd0933a8a0a6f33f6  16068905    1.096844   
4   cb4527a42f29840bd0933a8a0a6f33f6  15749186    1.018936   
5   cb4527a42f29840bd0933a8a0a6f33f6  12812550    0.950120   
6   5a1de0997778a6c9c4adff89a5f02de8  16068905    1.374194   
7   5a1de0997778a6c9c4adff89a5f02de8   6482837    1.266620   
8   5a1de0997778a6c9c4adff89a5f02de8   6218281    1.033718   
9   fae5119b1177246f4f5fef9ca0c8fcc6  16151178    1.152069   
10  fae5119b1177246f4f5fef9ca0c8fcc6  11857408    1.066897   
11  fae5119b1177246f4f5fef9ca0c8fcc6   2213661    1.040462   
12  5a1de0997778a6c9c4adff89a5f02de8  16068905    1.374194   
13  5a1de0997778a6c9c4adff89a5f02de8   6482837    1.266620   
14  5a1de0997778a6c9c4adff89a5f02de8   6218281    1.033718   
15  d029

In [10]:
print(explanations.iloc[0])

user_id                          8e7e5b546a63cb9add8431ee6914cf59
item_id                                                    100915
prediction                                               2.083831
explanations    {5060378: 0.41022232288138677, 84119: 0.227793...
Name: 0, dtype: object


In [9]:
explanations['explanations'][0]

{5060378: 0.41022232288138677,
 84119: 0.22779380294845225,
 16068905: 0.2258606880351472,
 16164271: 0.18668785912044722,
 29801: 0.12652015763350594,
 118944: 0.12460275238002383,
 91714: 0.10825451261757725,
 7686667: 0.09496001821585659,
 11737306: 0.08034126710212261,
 20775020: 0.07811528910782888}