In [1]:
import cornac
from cornac.data import Reader
from cornac.metrics import *
from cornac.models import *
from cornac.eval_methods import RatioSplit
from cornac.data import TextModality
from cornac.data.text import BaseTokenizer

  from .autonotebook import tqdm as notebook_tqdm


FM model is only supported on Linux.
Windows executable can be found at http://www.libfm.org.


In [2]:
from cornac.datasets import amazon_digital_music

In [3]:
data = amazon_digital_music.load_review()
item_ids = list(map(lambda x: x[1], data))
docs = list(map(lambda x: x[2], data))
feedback = amazon_digital_music.load_feedback(reader=Reader(item_set=item_ids))

# Instantiate a TextModality, it makes it convenient to work with text auxiliary information
# For more details, please refer to the tutorial on how to work with auxiliary data
item_text_modality = TextModality(
    corpus=docs,
    ids=item_ids,
    tokenizer=BaseTokenizer(stop_words="english"),
    max_vocab=8000,
    max_doc_freq=0.5,
)

# Define an evaluation method to split feedback into train and test sets
ratio_split = RatioSplit(
    data=feedback,
    test_size=0.2,
    exclude_unknowns=True,
    item_text=item_text_modality,
    verbose=True,
    seed=123,
    rating_threshold=0.5,
)

rating_threshold = 0.5
exclude_unknowns = True
---
Training data:
Number of users = 5541
Number of items = 3568
Number of ratings = 51764
Max rating = 5.0
Min rating = 1.0
Global mean = 4.2
---
Test data:
Number of users = 4466
Number of items = 3103
Number of ratings = 12942
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 5541
Total items = 3568


In [4]:
cdl2=CDL(k=50, autoencoder_structure=[200], max_iter=30, lambda_u=0.1, lambda_v=1, lambda_w=0.1, lambda_n=1000, seed=123),
cdr2=CDR(k=50, autoencoder_structure=[200], max_iter=100, batch_size=128, lambda_u=0.01, lambda_v=0.1, 
        lambda_w=0.0001, lambda_n=5, learning_rate=0.001, vocab_size=8000, seed=123),
cvae2=CVAE(z_dim=50, vae_layers=[200, 100], act_fn="sigmoid", input_dim=8000, lr=0.001, batch_size=128, n_epochs=100, 
        lambda_u=1e-4, lambda_v=0.001, lambda_r=10, lambda_w=1e-4, seed=123, verbose=True),
convmf2=ConvMF(n_epochs=5, verbose=True, seed=123),
hft2= HFT(k=10, max_iter=40, grad_iter=5, l2_reg=0.001, lambda_text=0.01, vocab_size=5000, seed=123,),
ctr2= CTR(k=50, max_iter=50, lambda_v=1)

In [5]:
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=100),Precision(k=100), 
           Recall(k=100), NDCG(k=100), NCRR(k=100),
           MRR(),AUC(), MAP()]

In [6]:
cornac.Experiment(eval_method=ratio_split, models=cdr2, metrics=metrics).run()


[CDR] Training started!


100%|██████████| 100/100 [44:09<00:00, 26.49s/it, loss=0.179]  



Learning completed

[CDR] Evaluation started!


Rating: 100%|██████████| 12942/12942 [00:00<00:00, 154786.46it/s]
Ranking: 100%|██████████| 4466/4466 [01:57<00:00, 38.11it/s]


TEST:
...
    |    MAE |     MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
--- + ------ + ------- + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
CDR | 3.2251 | 11.6496 | 3.2940 | 0.8829 | 0.0234 | 0.0530 | 0.0846 |   0.0610 |   0.1530 |        0.0123 |     0.4799 | 2652.9431 | 118.3474






In [14]:
cornac.Experiment(eval_method=ratio_split, models=cdl2, metrics=metrics).run()


[CDL] Training started!


100%|██████████| 30/30 [00:36<00:00,  1.22s/it, loss=151]


Learning completed!

[CDL] Evaluation started!


Rating: 100%|██████████| 12942/12942 [00:00<00:00, 153134.14it/s]
Ranking: 100%|██████████| 4466/4466 [00:07<00:00, 580.60it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
CDL | 2.8942 | 9.7014 | 2.9685 | 0.8629 | 0.0178 | 0.0326 | 0.0507 |   0.0358 |   0.1056 |        0.0094 |     0.3542 |   37.3302 |   8.5225






In [8]:
cornac.Experiment(eval_method=ratio_split, models=convmf2, metrics=metrics).run()


[ConvMF] Training started!
Epoch: 1/5


Optimizing CNN: 100%|██████████| 5/5 [00:26<00:00,  5.35s/it]


Loss: 448667136.56125 Elapsed: 80.6962s Converge: 44866713656124573865759919688861763787471452230998636363776.000000 
Epoch: 2/5


Optimizing CNN: 100%|██████████| 5/5 [00:27<00:00,  5.55s/it]


Loss: 46789272.56446 Elapsed: 79.9633s Converge: 0.895715 
Epoch: 3/5


Optimizing CNN: 100%|██████████| 5/5 [00:27<00:00,  5.52s/it]


Loss: 13668392.19111 Elapsed: 79.7703s Converge: 0.707873 
Epoch: 4/5


Optimizing CNN: 100%|██████████| 5/5 [00:27<00:00,  5.47s/it]


Loss: 7162603.63693 Elapsed: 80.2352s Converge: 0.475973 
Epoch: 5/5


Optimizing CNN: 100%|██████████| 5/5 [00:28<00:00,  5.64s/it]


Loss: 5181446.61303 Elapsed: 81.4464s Converge: 0.276597 

[ConvMF] Evaluation started!


Rating: 100%|██████████| 12942/12942 [00:00<00:00, 193014.70it/s]
Ranking: 100%|██████████| 4466/4466 [00:18<00:00, 244.89it/s]


TEST:
...
       |    MAE |    MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
------ + ------ + ------ + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
ConvMF | 0.8898 | 1.2446 | 0.9535 | 0.5947 | 0.0042 | 0.0085 | 0.0169 |   0.0093 |   0.0250 |        0.0023 |     0.0827 |  404.2961 |  18.9562






In [9]:
cornac.Experiment(eval_method=ratio_split, models=cvae2, metrics=metrics).run()


[CVAE] Training started!


100%|██████████| 100/100 [01:37<00:00,  1.03it/s, cf_loss=0.354, vae_loss=0.112]



[CVAE] Evaluation started!


Rating: 100%|██████████| 12942/12942 [00:00<00:00, 182982.53it/s]
Ranking: 100%|██████████| 4466/4466 [00:20<00:00, 213.26it/s]


TEST:
...
     |    MAE |    MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
---- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
CVAE | 2.4053 | 7.3115 | 2.5105 | 0.8562 | 0.0224 | 0.0476 | 0.0776 |   0.0552 |   0.1422 |        0.0118 |     0.4456 |   97.5220 |  21.6224






In [12]:
item_text_modality_hft = TextModality(
    corpus=docs,
    ids=item_ids,
    tokenizer=BaseTokenizer(stop_words="english"),
    max_vocab=5000,
    max_doc_freq=0.5,
)

# Define an evaluation method to split feedback into train and test sets
ratio_split_hft = RatioSplit(
    data=feedback,
    test_size=0.2,
    exclude_unknowns=True,
    item_text=item_text_modality_hft,
    verbose=True,
    seed=123,
)

cornac.Experiment(eval_method=ratio_split_hft, models=hft2, metrics=metrics).run()

rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 5541
Number of items = 3568
Number of ratings = 51764
Max rating = 5.0
Min rating = 1.0
Global mean = 4.2
---
Test data:
Number of users = 4466
Number of items = 3103
Number of ratings = 12942
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 5541
Total items = 3568

[HFT] Training started!


100%|██████████| 40/40 [03:12<00:00,  4.82s/it, loss=2.12e+4]


Learning completed!

[HFT] Evaluation started!


Rating: 100%|██████████| 12942/12942 [00:00<00:00, 154305.10it/s]
Ranking: 100%|██████████| 4466/4466 [01:35<00:00, 46.95it/s] 


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
HFT | 0.7441 | 1.1605 | 0.8353 | 0.4877 | 0.0025 | 0.0046 | 0.0109 |   0.0050 |   0.0133 |        0.0014 |     0.0440 |  193.4325 |  96.0480






In [13]:
cornac.Experiment(eval_method=ratio_split, models=[ctr2], metrics=metrics).run()


[CTR] Training started!


100%|██████████| 50/50 [33:40<00:00, 40.41s/it, cf_loss=6.02e+4, lda_likelihood=-6.22e+5]   


Learning completed!

[CTR] Evaluation started!


Rating: 100%|██████████| 12942/12942 [00:00<00:00, 164568.28it/s]
Ranking: 100%|██████████| 4466/4466 [00:05<00:00, 779.55it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
CTR | 2.4186 | 7.3892 | 2.5263 | 0.8589 | 0.0238 | 0.0528 | 0.0874 |   0.0620 |   0.1558 |        0.0125 |     0.4837 | 2020.6736 |   6.5359




