In [1]:
import cornac
from cornac.data import Reader
from cornac.metrics import *
from cornac.models import *
from cornac.eval_methods import RatioSplit
from cornac.data import TextModality
from cornac.data.text import BaseTokenizer

  from .autonotebook import tqdm as notebook_tqdm


FM model is only supported on Linux.
Windows executable can be found at http://www.libfm.org.


In [2]:
from cornac.datasets import citeulike

In [3]:
docs, item_ids = citeulike.load_text()
feedback = citeulike.load_feedback(reader=Reader(item_set=item_ids))

item_text_modality = TextModality(
    corpus=docs,
    ids=item_ids,
    tokenizer=BaseTokenizer(stop_words="english"),
    max_vocab=8000,
    max_doc_freq=0.5,
)

ratio_split = RatioSplit(
    data=feedback,
    test_size=0.2,
    exclude_unknowns=True,
    item_text=item_text_modality,
    verbose=True,
    seed=123,
    rating_threshold=0.5,
)

rating_threshold = 0.5
exclude_unknowns = True




---
Training data:
Number of users = 5551
Number of items = 16949
Number of ratings = 168396
Max rating = 1.0
Min rating = 1.0
Global mean = 1.0
---
Test data:
Number of users = 5444
Number of items = 14146
Number of ratings = 42053
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 5551
Total items = 16949


In [4]:
cdl1=CDL(k=50, autoencoder_structure=[200], max_iter=30, lambda_u=0.1, lambda_v=1, lambda_w=0.1, lambda_n=1000, seed=123),
cdr1=CDR(k=50, autoencoder_structure=[200], max_iter=100, batch_size=128, lambda_u=0.01, lambda_v=0.1, 
        lambda_w=0.0001, lambda_n=5, learning_rate=0.001, vocab_size=8000, seed=123),
cvae1=CVAE(z_dim=50, vae_layers=[200, 100], act_fn="sigmoid", input_dim=8000, lr=0.001, batch_size=128, n_epochs=100, 
        lambda_u=1e-4, lambda_v=0.001, lambda_r=10, lambda_w=1e-4, seed=123, verbose=True),
convmf1=ConvMF(n_epochs=5, verbose=True, seed=123),
hft1= HFT(k=10, max_iter=40, grad_iter=5, l2_reg=0.001, lambda_text=0.01, vocab_size=5000, seed=123,),
ctr1= CTR(k=50, max_iter=50, lambda_v=1)


In [5]:
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=100),Precision(k=100), 
           Recall(k=100), NDCG(k=100), NCRR(k=100),
           MRR(),AUC(), MAP()]

In [6]:
cornac.Experiment(eval_method=ratio_split, models=cdl1, metrics=metrics).run()


[CDL] Training started!


100%|██████████| 30/30 [02:40<00:00,  5.34s/it, loss=8.02]


Learning completed!

[CDL] Evaluation started!


Rating: 100%|██████████| 42053/42053 [00:00<00:00, 208796.17it/s]
Ranking: 100%|██████████| 5444/5444 [00:24<00:00, 218.78it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
CDL | 0.0000 | 0.0000 | 0.0000 | 0.9532 | 0.0449 | 0.0554 | 0.1474 |   0.0816 |   0.1638 |        0.0254 |     0.3760 |  163.3918 |  25.8039






In [7]:
cornac.Experiment(eval_method=ratio_split, models=cdr1, metrics=metrics).run()


[CDR] Training started!


100%|██████████| 100/100 [53:02<00:00, 31.82s/it, loss=0.0963]



Learning completed

[CDR] Evaluation started!


Rating: 100%|██████████| 42053/42053 [00:00<00:00, 199330.60it/s]
Ranking: 100%|██████████| 5444/5444 [00:24<00:00, 218.78it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
CDR | 0.0000 | 0.0000 | 0.0000 | 0.9390 | 0.0392 | 0.0405 | 0.1028 |   0.0558 |   0.1307 |        0.0222 |     0.3243 | 3183.3147 |  25.8420






In [8]:
cornac.Experiment(eval_method=ratio_split, models=cvae1, metrics=metrics).run()


[CVAE] Training started!


100%|██████████| 100/100 [06:41<00:00,  4.01s/it, cf_loss=0.0135, vae_loss=0.0452]



[CVAE] Evaluation started!


Rating: 100%|██████████| 42053/42053 [00:00<00:00, 204578.01it/s]
Ranking: 100%|██████████| 5444/5444 [00:25<00:00, 216.19it/s]


TEST:
...
     |    MAE |    MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
---- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
CVAE | 0.0000 | 0.0000 | 0.0000 | 0.9386 | 0.0462 | 0.0526 | 0.1402 |   0.0770 |   0.1645 |        0.0260 |     0.3974 |  402.3302 |  26.1593






In [9]:
cornac.Experiment(eval_method=ratio_split, models=convmf1, metrics=metrics).run()


[ConvMF] Training started!
Epoch: 1/5


Optimizing CNN: 100%|██████████| 5/5 [01:47<00:00, 21.58s/it]


Loss: 6297942083.08773 Elapsed: 124.7561s Converge: 629794208308772873223113158585979528876414846225182732845056.000000 
Epoch: 2/5


Optimizing CNN: 100%|██████████| 5/5 [01:47<00:00, 21.55s/it]


Loss: 341626001.25416 Elapsed: 118.3029s Converge: 0.945756 
Epoch: 3/5


Optimizing CNN: 100%|██████████| 5/5 [01:47<00:00, 21.48s/it]


Loss: 163915828.11621 Elapsed: 116.6738s Converge: 0.520189 
Epoch: 4/5


Optimizing CNN: 100%|██████████| 5/5 [01:47<00:00, 21.51s/it]


Loss: 120514343.16097 Elapsed: 118.3793s Converge: 0.264779 
Epoch: 5/5


Optimizing CNN: 100%|██████████| 5/5 [01:47<00:00, 21.51s/it]


Loss: 107048790.63736 Elapsed: 117.8420s Converge: 0.111734 

[ConvMF] Evaluation started!


Rating: 100%|██████████| 42053/42053 [00:00<00:00, 200978.63it/s]
Ranking: 100%|██████████| 5444/5444 [00:23<00:00, 234.83it/s]


TEST:
...
       |    MAE |    MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
------ + ------ + ------ + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
ConvMF | 0.0000 | 0.0000 | 0.0000 | 0.5630 | 0.0017 | 0.0014 | 0.0038 |   0.0011 |   0.0059 |        0.0009 |     0.0224 |  613.0717 |  24.1969






In [17]:
item_text_modality_hft = TextModality(
    corpus=docs,
    ids=item_ids,
    tokenizer=BaseTokenizer(sep="\t", stop_words="english"),
    max_vocab=5000,
    max_doc_freq=0.5,
)

ratio_split_hft = RatioSplit(
    data=feedback,
    test_size=0.2,
    exclude_unknowns=True,
    item_text=item_text_modality_hft,
    verbose=True,
    seed=123,
    rating_threshold=0.5,
)

cornac.Experiment(eval_method=ratio_split_hft, models=hft1, metrics=metrics).run()

rating_threshold = 0.5
exclude_unknowns = True
---
Training data:
Number of users = 5551
Number of items = 16949
Number of ratings = 168396
Max rating = 1.0
Min rating = 1.0
Global mean = 1.0
---
Test data:
Number of users = 5444
Number of items = 14146
Number of ratings = 42053
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 5551
Total items = 16949

[HFT] Training started!


100%|██████████| 40/40 [01:59<00:00,  2.98s/it, loss=544]


Learning completed!

[HFT] Evaluation started!


Rating: 100%|██████████| 42053/42053 [00:00<00:00, 205517.42it/s]
Ranking: 100%|██████████| 5444/5444 [00:22<00:00, 243.22it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
HFT | 0.0000 | 0.0000 | 0.0000 | 0.4992 | 0.0007 | 0.0009 | 0.0023 |   0.0005 |   0.0018 |        0.0004 |     0.0048 |  119.3633 |  23.2491






In [13]:
cornac.Experiment(eval_method=ratio_split, models=[ctr1], metrics=metrics).run()


[CTR] Training started!


100%|██████████| 50/50 [03:28<00:00,  4.18s/it, cf_loss=1.52e+4, lda_likelihood=-3.85e+6]


Learning completed!

[CTR] Evaluation started!


Rating: 100%|██████████| 42053/42053 [00:00<00:00, 199019.55it/s]
Ranking: 100%|██████████| 5444/5444 [00:23<00:00, 232.45it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
CTR | 0.0000 | 0.0000 | 0.0000 | 0.9379 | 0.0463 | 0.0531 | 0.1395 |   0.0774 |   0.1653 |        0.0261 |     0.3990 |  208.9737 |  24.3337




